deriva-ml 1.14.33__tar.gz → 1.14.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/PKG-INFO +4 -5
  2. deriva_ml-1.14.35/README.md +11 -0
  3. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/pyproject.toml +8 -2
  4. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/workflow.py +3 -5
  5. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/feature.py +1 -0
  6. deriva_ml-1.14.35/src/deriva_ml/install_kernel.py +46 -0
  7. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/run_notebook.py +4 -4
  8. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/PKG-INFO +4 -5
  9. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/SOURCES.txt +1 -0
  10. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/entry_points.txt +1 -0
  11. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/conftest.py +11 -1
  12. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/execution/test_execution.py +17 -6
  13. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/execution/workflow-test.ipynb +12 -17
  14. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/execution/workflow-test.py +0 -1
  15. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/feature/test_features.py +32 -16
  16. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/test_utils.py +69 -1
  17. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/uv.lock +673 -645
  18. deriva_ml-1.14.33/README.md +0 -12
  19. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/.github/release-drafter.yml +0 -0
  20. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/.github/workflows/publish-docs.yml +0 -0
  21. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/.github/workflows/release.yml +0 -0
  22. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/.gitignore +0 -0
  23. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/LICENSE +0 -0
  24. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/.DS_Store +0 -0
  25. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  26. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  27. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
  28. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  29. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
  30. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  31. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/ERD.png +0 -0
  32. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/Launcher.png +0 -0
  33. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/copy_minid.png +0 -0
  34. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/deriva-logo.png +0 -0
  35. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/deriva-ml.pdf +0 -0
  36. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/assets/sharing-at-home.pdf +0 -0
  37. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/dataset.md +0 -0
  38. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/dataset_aux_classes.md +0 -0
  39. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/dataset_bag.md +0 -0
  40. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/deriva_definitions.md +0 -0
  41. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/deriva_ml_base.md +0 -0
  42. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/deriva_model.md +0 -0
  43. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/execution.md +0 -0
  44. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/execution_configuration.md +0 -0
  45. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/feature.md +0 -0
  46. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/code-docs/upload.md +0 -0
  47. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/index.md +0 -0
  48. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/release-notes.md +0 -0
  49. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/datasets.md +0 -0
  50. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/deriva_ml_structure.md +0 -0
  51. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/execution-configuration.md +0 -0
  52. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/file-assets.md +0 -0
  53. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/identifiers.md +0 -0
  54. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/install.md +0 -0
  55. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/notebooks.md +0 -0
  56. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/docs/user-guide/overview.md +0 -0
  57. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/mkdocs.yml +0 -0
  58. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/release.sh +0 -0
  59. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/setup.cfg +0 -0
  60. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/__init__.py +0 -0
  61. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/__init__.py +0 -0
  62. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/base.py +0 -0
  63. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/constants.py +0 -0
  64. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/definitions.py +0 -0
  65. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/enums.py +0 -0
  66. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/ermrest.py +0 -0
  67. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/exceptions.py +0 -0
  68. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/core/filespec.py +0 -0
  69. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/__init__.py +0 -0
  70. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/aux_classes.py +0 -0
  71. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/dataset.py +0 -0
  72. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/dataset_bag.py +0 -0
  73. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/history.py +0 -0
  74. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/dataset/upload.py +0 -0
  75. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/demo_catalog.py +0 -0
  76. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/__init__.py +0 -0
  77. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/environment.py +0 -0
  78. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/execution.py +0 -0
  79. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/execution/execution_configuration.py +0 -0
  80. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/model/__init__.py +0 -0
  81. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/model/catalog.py +0 -0
  82. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/model/database.py +0 -0
  83. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/model/sql_mapper.py +0 -0
  84. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/__init__.py +0 -0
  85. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/annotations.py +0 -0
  86. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/check_schema.py +0 -0
  87. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/create_schema.py +0 -0
  88. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
  89. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/policy.json +0 -0
  90. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml/schema/table_comments_utils.py +0 -0
  91. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  92. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/requires.txt +0 -0
  93. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/src/deriva_ml.egg-info/top_level.txt +0 -0
  94. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/__init__.py +0 -0
  95. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/core/__init__.py +0 -0
  96. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/core/test_basic_tables.py +0 -0
  97. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/core/test_file.py +0 -0
  98. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/core/test_vocabulary.py +0 -0
  99. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/__init__.py +0 -0
  100. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/demo-catalog-schema.json +0 -0
  101. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/deriva-ml-reference.json +0 -0
  102. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/eye-ai-catalog-schema.json +0 -0
  103. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/test_dataset_export.py +0 -0
  104. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/test_dataset_version.py +0 -0
  105. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/test_datasets.py +0 -0
  106. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/dataset/test_download.py +0 -0
  107. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/execution/__init__.py +0 -0
  108. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/model/__init__.py +0 -0
  109. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/model/test_database.py +0 -0
  110. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/model/test_models.py +0 -0
  111. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/test-files/execution-parameters.json +0 -0
  112. {deriva_ml-1.14.33 → deriva_ml-1.14.35}/tests/test-files/notebook-parameters.json +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.33
3
+ Version: 1.14.35
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
25
25
  using a deriva catalog.
26
26
 
27
27
 
28
- ## Installing the GitHub CLI
28
+ Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
29
29
 
30
- The script release.sh will create a new release tag in GitHub. This script requires the
31
- GitHUB CLI be installed.
30
+ To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
32
31
 
33
- See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
34
32
 
33
+ ## References
@@ -0,0 +1,11 @@
1
+ # DerivaML
2
+ Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
3
+ using a deriva catalog.
4
+
5
+
6
+ Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
7
+
8
+ To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
9
+
10
+
11
+ ## References
@@ -27,6 +27,8 @@ deriva-ml-table-comments-utils = "deriva_ml.schema_setup.table_comments_utils:ma
27
27
  deriva-ml-create-schema = "deriva_ml.schema_setup.create_schema:main"
28
28
  deriva-ml-alter-annotation = "deriva_ml.schema_setup.alter_annotation:main"
29
29
  deriva-ml-run-notebook = "deriva_ml.run_notebook:main"
30
+ deriva-ml-install-kernel = "deriva_ml.install_kernel:main"
31
+
30
32
  deriva-ml-check-catalog-schema = "deriva_ml.schema.check_schema:main"
31
33
 
32
34
  [project.optional-dependencies]
@@ -64,6 +66,9 @@ setup_hooks = []
64
66
  pre_commit_hooks = []
65
67
  post_commit_hooks = []
66
68
 
69
+ [tool.pytest]
70
+ mock_use_standalone_module = true
71
+
67
72
  [tool.pytest.ini_options]
68
73
  testpaths = ["tests"]
69
74
  python_files = ["test_*.py"]
@@ -74,8 +79,8 @@ addopts = "-v --import-mode=importlib"
74
79
  [tool.ruff]
75
80
  line-length = 120
76
81
  target-version = "py310"
77
- select = ["E", "F", "I", "PTH"]
78
- ignore = []
82
+ lint.select = ["E", "F", "I", "PTH"]
83
+ lint.ignore = []
79
84
 
80
85
  [tool.ruff.format]
81
86
  # Like Black, use double quotes for strings.
@@ -97,6 +102,7 @@ dev = [
97
102
  "mkdocstrings[python]",
98
103
  "mkdocs-material",
99
104
  "pytest>=8.4.1",
105
+ "pytest-mock",
100
106
  "pytest-coverage>=0.0",
101
107
  "ruff"
102
108
  ]
@@ -100,9 +100,6 @@ class Workflow(BaseModel):
100
100
  - DERIVA_ML_WORKFLOW_CHECKSUM: Override the computed checksum
101
101
 
102
102
  Args:
103
- name: Human-readable name for the workflow.
104
- workflow_type: Type of workflow (must be a vocabulary term).
105
- description: Optional description of workflow purpose.
106
103
 
107
104
  Returns:
108
105
  Workflow: New workflow instance with detected Git information.
@@ -240,6 +237,7 @@ class Workflow(BaseModel):
240
237
  """
241
238
 
242
239
  server, session = Workflow._get_notebook_session()
240
+
243
241
  if server and session:
244
242
  relative_path = session["notebook"]["path"]
245
243
  # Join the notebook directory with the relative path
@@ -321,8 +319,8 @@ class Workflow(BaseModel):
321
319
  # Being called from the command line interpreter.
322
320
  filename = Path.cwd() / Path("REPL")
323
321
  # Get the caller's filename, which is two up the stack from here.
324
- else:
325
- raise DerivaMLException("Looking for caller failed") # Stack is too shallow
322
+ else:
323
+ raise DerivaMLException("Looking for caller failed") # Stack is too shallow
326
324
  return filename, is_notebook
327
325
 
328
326
  @staticmethod
@@ -52,6 +52,7 @@ class FeatureRecord(BaseModel):
52
52
 
53
53
  class Config:
54
54
  arbitrary_types_allowed = True
55
+ extra = "forbid"
55
56
 
56
57
  @classmethod
57
58
  def feature_columns(cls) -> set[Column]:
@@ -0,0 +1,46 @@
1
+ # your_pkg/install_kernel.py
2
+ import sys
3
+ import re
4
+ from importlib import metadata
5
+ from ipykernel.kernelspec import install as install_kernel
6
+
7
+ def _dist_name_for_this_package() -> str:
8
+ """
9
+ Try to resolve the distribution name that provides this package.
10
+ Works in editable installs and wheels.
11
+ """
12
+ # Top-level package name of this module (your_pkg)
13
+ top_pkg = __name__.split(".")[0]
14
+
15
+ # Map top-level packages -> distributions
16
+ pkg_to_dists = metadata.packages_distributions()
17
+ dists = pkg_to_dists.get(top_pkg) or []
18
+
19
+ # Fall back to project name in METADATA when mapping isn't available
20
+ dist_name = dists[0] if dists else metadata.metadata(top_pkg).get("Name", top_pkg)
21
+ return dist_name
22
+
23
+ def _normalize_kernel_name(name: str) -> str:
24
+ """
25
+ Jupyter kernel directory names should be simple: lowercase, [-a-z0-9_].
26
+ """
27
+ name = name.strip().lower()
28
+ name = re.sub(r"[^a-z0-9._-]+", "-", name)
29
+ return name
30
+
31
+ def main() -> None:
32
+ dist_name = _dist_name_for_this_package() # e.g., "deriva-model-template"
33
+ kernel_name = _normalize_kernel_name(dist_name) # e.g., "deriva-model-template"
34
+ display_name = f"Python ({dist_name})"
35
+
36
+ # Install into the current environment's prefix (e.g., .venv/share/jupyter/kernels/..)
37
+ install_kernel(
38
+ user=False, # write under sys.prefix (the active env)
39
+ kernel_name=kernel_name,
40
+ display_name=display_name,
41
+ prefix=sys.prefix,
42
+ )
43
+ print(f"Installed Jupyter kernel '{kernel_name}' with display name '{display_name}' under {sys.prefix!s}")
44
+
45
+ if __name__ == "__main__":
46
+ main()
@@ -95,7 +95,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
95
95
  exit(1)
96
96
 
97
97
  os.environ["DERIVA_HOST"] = args.host
98
- os.environ["DERIVA_CATALOG_ID"] = args.catalog
98
+ os.environ["DERIVA_CATALOG"] = args.catalog
99
99
 
100
100
  # Create a workflow instance for this specific version of the script.
101
101
  # Return an existing workflow if one is found.
@@ -106,14 +106,14 @@ class DerivaMLRunNotebookCLI(BaseCLI):
106
106
  return
107
107
  else:
108
108
  notebook_parameters = (
109
- {"host": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
110
- | {k: v["default"] for k, v in notebook_parameters.items()}
109
+ {k: v["default"] for k, v in notebook_parameters.items()}
110
+ | {"host": args.host, "hostname": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
111
111
  | parameters
112
112
  )
113
113
  print(f"Running notebook {notebook_file.name} with parameters:")
114
114
  for param, value in notebook_parameters.items():
115
115
  print(f" {param}:{value}")
116
- self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
116
+ self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel[0], log=args.log_output)
117
117
 
118
118
  def run_notebook(self, notebook_file, parameters, kernel=None, log=False):
119
119
  url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.33
3
+ Version: 1.14.35
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
25
25
  using a deriva catalog.
26
26
 
27
27
 
28
- ## Installing the GitHub CLI
28
+ Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
29
29
 
30
- The script release.sh will create a new release tag in GitHub. This script requires the
31
- GitHUB CLI be installed.
30
+ To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
32
31
 
33
- See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
34
32
 
33
+ ## References
@@ -44,6 +44,7 @@ docs/user-guide/overview.md
44
44
  src/deriva_ml/__init__.py
45
45
  src/deriva_ml/demo_catalog.py
46
46
  src/deriva_ml/feature.py
47
+ src/deriva_ml/install_kernel.py
47
48
  src/deriva_ml/run_notebook.py
48
49
  src/deriva_ml.egg-info/PKG-INFO
49
50
  src/deriva_ml.egg-info/SOURCES.txt
@@ -2,5 +2,6 @@
2
2
  deriva-ml-alter-annotation = deriva_ml.schema_setup.alter_annotation:main
3
3
  deriva-ml-check-catalog-schema = deriva_ml.schema.check_schema:main
4
4
  deriva-ml-create-schema = deriva_ml.schema_setup.create_schema:main
5
+ deriva-ml-install-kernel = deriva_ml.install_kernel:main
5
6
  deriva-ml-run-notebook = deriva_ml.run_notebook:main
6
7
  deriva-ml-table-comments-utils = deriva_ml.schema_setup.table_comments_utils:main
@@ -5,7 +5,7 @@ Pytest configuration and shared fixtures.
5
5
  import os
6
6
 
7
7
  import pytest
8
- from test_utils import MLCatalog, MLDatasetCatalog
8
+ from test_utils import MLCatalog, MLDatasetCatalog, create_jupyter_kernel, destroy_jupyter_kernel
9
9
 
10
10
  from deriva_ml import DerivaML
11
11
  from deriva_ml.demo_catalog import (
@@ -61,6 +61,16 @@ def dataset_test(catalog_with_datasets):
61
61
  return catalog_with_datasets
62
62
 
63
63
 
64
+ @pytest.fixture(scope="function")
65
+ def notebook_test(deriva_catalog, tmp_path):
66
+ deriva_catalog.reset_demo_catalog()
67
+ create_jupyter_kernel("test_kernel", tmp_path)
68
+ yield DerivaML(deriva_catalog.hostname, deriva_catalog.catalog_id, use_minid=False, working_dir=tmp_path)
69
+ print("Resetting catalog... ", end="")
70
+ deriva_catalog.reset_demo_catalog()
71
+ destroy_jupyter_kernel("test_kernel")
72
+
73
+
64
74
  @pytest.fixture(scope="function")
65
75
  def test_ml_demo_catalog(ml_catalog, tmp_path):
66
76
  # reset_demo_catalog(ml_catalog.catalog)
@@ -3,6 +3,7 @@ Tests for the execution module.
3
3
  """
4
4
 
5
5
  import subprocess
6
+ from pathlib import Path
6
7
  from tempfile import TemporaryDirectory
7
8
 
8
9
  from deriva_ml import (
@@ -23,20 +24,23 @@ class TestWorkflow:
23
24
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
24
25
  ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
25
26
  print("Running workflow-test.py ...")
27
+ workflow_script = Path(__file__).parent / "workflow-test.py"
28
+
26
29
  workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
27
30
  workflows = list(workflow_table.entities().fetch())
28
31
  assert 0 == len(workflows)
29
32
  result = subprocess.run(
30
33
  [
31
34
  "python",
32
- "execution/workflow-test.py",
35
+ workflow_script.as_posix(),
33
36
  ml_instance.catalog.deriva_server.server,
34
37
  ml_instance.catalog_id,
35
38
  ],
36
39
  capture_output=True,
37
40
  text=True,
38
41
  )
39
-
42
+ print(result.stdout)
43
+ print(result.stderr)
40
44
  workflows = list(workflow_table.entities().fetch())
41
45
  assert 1 == len(workflows)
42
46
  workflow_rid = workflows[0]["RID"]
@@ -50,18 +54,22 @@ class TestWorkflow:
50
54
  result = subprocess.run(
51
55
  [
52
56
  "python",
53
- "execution/workflow-test.py",
57
+ workflow_script.as_posix(),
54
58
  ml_instance.catalog.deriva_server.server,
55
59
  ml_instance.catalog_id,
56
60
  ],
57
61
  capture_output=True,
58
62
  text=True,
59
63
  )
64
+ print(result.stdout)
65
+ print(result.stderr)
60
66
  new_workflow = result.stdout.strip()
61
67
  assert new_workflow == workflow_rid
62
68
 
63
- def test_workflow_creation_notebook(self, test_ml):
64
- ml_instance = test_ml
69
+ def test_workflow_creation_notebook(self, notebook_test):
70
+ ml_instance = notebook_test
71
+
72
+ notebook_path = Path(__file__).parent / "workflow-test.ipynb" # directory where this test lives
65
73
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
66
74
  ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
67
75
  workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
@@ -69,14 +77,17 @@ class TestWorkflow:
69
77
  assert 0 == len(workflows)
70
78
 
71
79
  print("Running notebook...")
80
+
72
81
  result = subprocess.run(
73
82
  [
74
83
  "deriva-ml-run-notebook",
75
- "execution/workflow-test.ipynb",
84
+ notebook_path.as_posix(),
76
85
  "--host",
77
86
  ml_instance.catalog.deriva_server.server,
78
87
  "--catalog",
79
88
  ml_instance.catalog_id,
89
+ "--kernel",
90
+ "test_kernel",
80
91
  "--log-output",
81
92
  ],
82
93
  capture_output=True,
@@ -18,9 +18,7 @@
18
18
  "outputs": [],
19
19
  "source": [
20
20
  "from deriva_ml import DerivaML, MLVocab as vc\n",
21
- "import os\n",
22
- "import logging\n",
23
- "logger = logging.getLogger()"
21
+ "import os"
24
22
  ]
25
23
  },
26
24
  {
@@ -42,8 +40,8 @@
42
40
  },
43
41
  "outputs": [],
44
42
  "source": [
45
- "hostname = None\n",
46
- "catalog_id = None"
43
+ "host = None\n",
44
+ "catalog = None"
47
45
  ]
48
46
  },
49
47
  {
@@ -54,24 +52,21 @@
54
52
  "outputs": [],
55
53
  "source": [
56
54
  "# Modify these to your desired server and catalog.\n",
57
- "hostname = hostname or os.environ.get(\"DERIVA_HOST\")\n",
58
- "catalog_id = catalog_id or os.environ.get(\"DERIVA_CATALOG_ID\")\n",
55
+ "host = host or os.environ.get(\"DERIVA_HOST\")\n",
56
+ "catalog = catalog or os.environ.get(\"DERIVA_CATALOG\")\n",
59
57
  "\n",
60
58
  "# Change this line to call the domain specific class derived from DerivaML\n",
61
- "ml_instance = DerivaML(hostname, catalog_id)\n",
62
- "logger.info(\"Got ML instance:\")\n",
59
+ "ml_instance = DerivaML(host, catalog)\n",
63
60
  "\n",
64
61
  "ml_instance.add_term(vc.asset_type, \"Test Model\", description=\"Model for our Test workflow\")\n",
65
62
  "ml_instance.add_term(vc.workflow_type, \"Test Workflow\", description=\"A ML Workflow that uses Deriva ML API\")\n",
66
- "print(\"Added terms to ML instance\")\n",
63
+ "\n",
67
64
  "api_workflow = ml_instance.create_workflow(\n",
68
- " name=\"Test Workflow One\",\n",
69
- " workflow_type=\"Test Workflow\",\n",
70
- " description=\"A test operation\",\n",
71
- ")\n",
72
- "logger.info(f\"URL: {api_workflow.url}\")\n",
73
- "rid = ml_instance.add_workflow(api_workflow)\n",
74
- "logger.info(f\"RID {rid}\")\n"
65
+ " name=\"Test Workflow One\",\n",
66
+ " workflow_type=\"Test Workflow\",\n",
67
+ " description=\"A test operation\",\n",
68
+ " )\n",
69
+ "rid = ml_instance.add_workflow(api_workflow)"
75
70
  ]
76
71
  }
77
72
  ],
@@ -6,7 +6,6 @@ from deriva_ml import MLVocab as vc
6
6
  hostname = sys.argv[1]
7
7
  catalog_id = sys.argv[2]
8
8
 
9
-
10
9
  ml_instance = DerivaML(hostname, catalog_id)
11
10
 
12
11
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
@@ -2,9 +2,8 @@
2
2
  Tests for feature functionality.
3
3
  """
4
4
 
5
- from unittest.mock import Mock
6
-
7
5
  import pytest
6
+ from pydantic import ValidationError
8
7
 
9
8
  from deriva_ml import (
10
9
  BuiltinTypes,
@@ -21,14 +20,14 @@ from deriva_ml.feature import FeatureRecord
21
20
  class TestFeatureRecord:
22
21
  """Test cases for the FeatureRecord base class."""
23
22
 
24
- def test_feature_record_creation(self):
23
+ def test_feature_record_creation(self, mocker):
25
24
  """Test basic FeatureRecord creation."""
26
25
  # Create a mock feature
27
- mock_feature = Mock()
28
- mock_feature.feature_columns = {Mock(name="value"), Mock(name="confidence")}
29
- mock_feature.asset_columns = {Mock(name="image_file")}
30
- mock_feature.term_columns = {Mock(name="category")}
31
- mock_feature.value_columns = {Mock(name="score")}
26
+ mock_feature = mocker.Mock()
27
+ mock_feature.feature_columns = {mocker.Mock(name="value"), mocker.Mock(name="confidence")}
28
+ mock_feature.asset_columns = {mocker.Mock(name="image_file")}
29
+ mock_feature.term_columns = {mocker.Mock(name="category")}
30
+ mock_feature.value_columns = {mocker.Mock(name="score")}
32
31
 
33
32
  # Create a test class that inherits from FeatureRecord
34
33
  class TestFeature(FeatureRecord):
@@ -58,17 +57,17 @@ class TestFeatureRecord:
58
57
  assert record.category == "good"
59
58
  assert record.score == 0.8
60
59
 
61
- def test_feature_record_column_methods(self):
60
+ def test_feature_record_column_methods(self, mocker):
62
61
  """Test the column access methods of FeatureRecord."""
63
62
  # Create mock columns
64
- value_col = Mock(name="value")
65
- confidence_col = Mock(name="confidence")
66
- asset_col = Mock(name="image_file")
67
- term_col = Mock(name="category")
68
- value_only_col = Mock(name="score")
63
+ value_col = mocker.Mock(name="value")
64
+ confidence_col = mocker.Mock(name="confidence")
65
+ asset_col = mocker.Mock(name="image_file")
66
+ term_col = mocker.Mock(name="category")
67
+ value_only_col = mocker.Mock(name="score")
69
68
 
70
69
  # Create a mock feature
71
- mock_feature = Mock()
70
+ mock_feature = mocker.Mock()
72
71
  mock_feature.feature_columns = {value_col, confidence_col, asset_col, term_col, value_only_col}
73
72
  mock_feature.asset_columns = {asset_col}
74
73
  mock_feature.term_columns = {term_col}
@@ -141,6 +140,22 @@ class TestFeatures:
141
140
  with pytest.raises(DerivaMLException):
142
141
  ml_instance.lookup_feature("Subject", "SubjectHealth1")
143
142
 
143
+ def test_feature_record(self, dataset_test, tmp_path):
144
+ ml_instance = DerivaML(
145
+ dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
146
+ )
147
+ SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
148
+ print(SubjectHealthFeature.model_fields.keys())
149
+
150
+ print(SubjectHealthFeature.feature_columns())
151
+
152
+ with pytest.raises(ValidationError):
153
+ SubjectHealthFeature(Subject="SubjectRID", Health="Good", Scale=23, Foo="Bar")
154
+ print(SubjectHealthFeature.value_columns())
155
+ print(SubjectHealthFeature.term_columns())
156
+ print(SubjectHealthFeature.asset_columns())
157
+ print(SubjectHealthFeature.feature_columns())
158
+
144
159
  def test_add_feature(self, dataset_test, tmp_path):
145
160
  ml_instance = DerivaML(
146
161
  dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
@@ -166,7 +181,8 @@ class TestFeatures:
166
181
 
167
182
  with feature_execution.execute() as exe:
168
183
  SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
169
- exe.add_features([SubjectHealthFeature(Subject=subject_rids[0], Health="Good", Scale=23)])
184
+ print(SubjectHealthFeature.feature_columns())
185
+ exe.add_features([SubjectHealthFeature(Subject=subject_rids[0], SubjectHealth="Sick", Scale=23)])
170
186
 
171
187
  feature_execution.upload_execution_outputs()
172
188
  features = list(ml_instance.list_feature_values("Subject", "Health"))
@@ -1,8 +1,12 @@
1
+ import os
2
+ import shutil
1
3
  from tempfile import TemporaryDirectory
2
4
  from urllib.parse import quote as urlquote
3
5
 
4
6
  from demo_catalog import create_demo_features
5
7
  from deriva.core.datapath import DataPathException
8
+ from ipykernel.kernelspec import install
9
+ from jupyter_client.kernelspec import KernelSpecManager
6
10
 
7
11
  from deriva_ml import DerivaML
8
12
  from deriva_ml.demo_catalog import (
@@ -42,7 +46,14 @@ class MLCatalog:
42
46
  pb = self.catalog.getPathBuilder()
43
47
  ml_path = pb.schemas["deriva-ml"]
44
48
  domain_path = pb.schemas[self.domain_schema]
45
- for t in ["Dataset_Execution", "Dataset_Version", "Dataset_Dataset", "Workflow", "Workflow_Execution"]:
49
+ for t in [
50
+ "Dataset_Execution",
51
+ "Dataset_Version",
52
+ "Dataset_Dataset",
53
+ "Execution",
54
+ "Workflow_Execution",
55
+ "Workflow",
56
+ ]:
46
57
  try:
47
58
  ml_path.tables[t].path.delete()
48
59
  except DataPathException:
@@ -110,3 +121,60 @@ class MLDatasetCatalog:
110
121
  with TemporaryDirectory() as tmp_dir:
111
122
  ml_instance = DerivaML(self.catalog.hostname, self.catalog.catalog_id, working_dir=tmp_dir, use_minid=False)
112
123
  self.dataset_description: DatasetDescription = create_demo_datasets(ml_instance)
124
+
125
+
126
+ def create_jupyter_kernel(name: str, kernel_dir, display_name: str = None, user: bool = True) -> None:
127
+ """
128
+ Create and install a Jupyter kernel spec using ipykernel.
129
+
130
+ Parameters
131
+ ----------
132
+ name : str
133
+ The internal name of the kernel (used in `--kernel`).
134
+ display_name : str, optional
135
+ The label shown in Jupyter’s kernel chooser (defaults to name).
136
+ user : bool, default=True
137
+ If True, install for the current user only.
138
+ If False, requires admin rights (system-wide).
139
+ """
140
+ if display_name is None:
141
+ display_name = name
142
+
143
+ os.environ["JUPYTER_PATH"] = f"{kernel_dir}/share/jupyter"
144
+
145
+ print(f"Installing Jupyter kernel '{name}' with display name '{display_name}'")
146
+ install(
147
+ kernel_name=name,
148
+ display_name=display_name,
149
+ prefix=kernel_dir, # ensures it uses the current environment
150
+ )
151
+ print("✅ Kernel installed successfully.")
152
+
153
+
154
+ def destroy_jupyter_kernel(name: str, user: bool = True) -> None:
155
+ """
156
+ Remove a Jupyter kernel spec by name.
157
+
158
+ Parameters
159
+ ----------
160
+ name : str
161
+ The internal kernel name (the same name used in create_jupyter_kernel).
162
+ user : bool, default=True
163
+ If True, remove from the user-level kernels directory.
164
+ If False, attempt system-wide removal (requires permissions).
165
+ """
166
+ ksm = KernelSpecManager()
167
+ kernels = ksm.find_kernel_specs()
168
+
169
+ if name not in kernels:
170
+ print(f"❌ Kernel '{name}' not found.")
171
+ return
172
+
173
+ kernel_path = kernels[name]
174
+ print(f"Removing kernel '{name}' at {kernel_path}")
175
+
176
+ try:
177
+ shutil.rmtree(kernel_path)
178
+ print(f"✅ Kernel '{name}' removed successfully.")
179
+ except Exception as e:
180
+ print(f"⚠️ Failed to remove kernel '{name}': {e}")