deriva-ml 1.14.33__tar.gz → 1.14.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/PKG-INFO +4 -5
  2. deriva_ml-1.14.34/README.md +11 -0
  3. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/pyproject.toml +4 -0
  4. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/feature.py +1 -0
  5. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/run_notebook.py +4 -4
  6. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/PKG-INFO +4 -5
  7. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/conftest.py +11 -1
  8. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/execution/test_execution.py +17 -6
  9. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/execution/workflow-test.ipynb +12 -17
  10. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/execution/workflow-test.py +0 -1
  11. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/feature/test_features.py +32 -16
  12. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/test_utils.py +69 -1
  13. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/uv.lock +608 -580
  14. deriva_ml-1.14.33/README.md +0 -12
  15. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/.github/release-drafter.yml +0 -0
  16. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/.github/workflows/publish-docs.yml +0 -0
  17. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/.github/workflows/release.yml +0 -0
  18. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/.gitignore +0 -0
  19. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/LICENSE +0 -0
  20. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/.DS_Store +0 -0
  21. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  22. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  23. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
  24. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  25. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
  26. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  27. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/assets/ERD.png +0 -0
  28. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/assets/Launcher.png +0 -0
  29. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/assets/copy_minid.png +0 -0
  30. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/assets/deriva-logo.png +0 -0
  31. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/assets/deriva-ml.pdf +0 -0
  32. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/assets/sharing-at-home.pdf +0 -0
  33. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/dataset.md +0 -0
  34. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/dataset_aux_classes.md +0 -0
  35. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/dataset_bag.md +0 -0
  36. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/deriva_definitions.md +0 -0
  37. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/deriva_ml_base.md +0 -0
  38. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/deriva_model.md +0 -0
  39. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/execution.md +0 -0
  40. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/execution_configuration.md +0 -0
  41. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/feature.md +0 -0
  42. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/code-docs/upload.md +0 -0
  43. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/index.md +0 -0
  44. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/release-notes.md +0 -0
  45. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/user-guide/datasets.md +0 -0
  46. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/user-guide/deriva_ml_structure.md +0 -0
  47. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/user-guide/execution-configuration.md +0 -0
  48. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/user-guide/file-assets.md +0 -0
  49. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/user-guide/identifiers.md +0 -0
  50. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/user-guide/install.md +0 -0
  51. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/user-guide/notebooks.md +0 -0
  52. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/docs/user-guide/overview.md +0 -0
  53. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/mkdocs.yml +0 -0
  54. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/release.sh +0 -0
  55. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/setup.cfg +0 -0
  56. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/__init__.py +0 -0
  57. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/core/__init__.py +0 -0
  58. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/core/base.py +0 -0
  59. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/core/constants.py +0 -0
  60. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/core/definitions.py +0 -0
  61. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/core/enums.py +0 -0
  62. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/core/ermrest.py +0 -0
  63. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/core/exceptions.py +0 -0
  64. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/core/filespec.py +0 -0
  65. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/dataset/__init__.py +0 -0
  66. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/dataset/aux_classes.py +0 -0
  67. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/dataset/dataset.py +0 -0
  68. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/dataset/dataset_bag.py +0 -0
  69. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/dataset/history.py +0 -0
  70. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/dataset/upload.py +0 -0
  71. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/demo_catalog.py +0 -0
  72. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/execution/__init__.py +0 -0
  73. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/execution/environment.py +0 -0
  74. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/execution/execution.py +0 -0
  75. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/execution/execution_configuration.py +0 -0
  76. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/execution/workflow.py +0 -0
  77. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/model/__init__.py +0 -0
  78. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/model/catalog.py +0 -0
  79. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/model/database.py +0 -0
  80. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/model/sql_mapper.py +0 -0
  81. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/schema/__init__.py +0 -0
  82. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/schema/annotations.py +0 -0
  83. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/schema/check_schema.py +0 -0
  84. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/schema/create_schema.py +0 -0
  85. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
  86. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/schema/policy.json +0 -0
  87. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml/schema/table_comments_utils.py +0 -0
  88. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/SOURCES.txt +0 -0
  89. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  90. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/entry_points.txt +0 -0
  91. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/requires.txt +0 -0
  92. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/src/deriva_ml.egg-info/top_level.txt +0 -0
  93. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/__init__.py +0 -0
  94. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/core/__init__.py +0 -0
  95. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/core/test_basic_tables.py +0 -0
  96. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/core/test_file.py +0 -0
  97. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/core/test_vocabulary.py +0 -0
  98. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/dataset/__init__.py +0 -0
  99. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/dataset/demo-catalog-schema.json +0 -0
  100. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/dataset/deriva-ml-reference.json +0 -0
  101. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/dataset/eye-ai-catalog-schema.json +0 -0
  102. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/dataset/test_dataset_export.py +0 -0
  103. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/dataset/test_dataset_version.py +0 -0
  104. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/dataset/test_datasets.py +0 -0
  105. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/dataset/test_download.py +0 -0
  106. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/execution/__init__.py +0 -0
  107. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/model/__init__.py +0 -0
  108. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/model/test_database.py +0 -0
  109. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/model/test_models.py +0 -0
  110. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/test-files/execution-parameters.json +0 -0
  111. {deriva_ml-1.14.33 → deriva_ml-1.14.34}/tests/test-files/notebook-parameters.json +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.33
3
+ Version: 1.14.34
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
25
25
  using a deriva catalog.
26
26
 
27
27
 
28
- ## Installing the GitHub CLI
28
+ Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
29
29
 
30
- The script release.sh will create a new release tag in GitHub. This script requires the
31
- GitHUB CLI be installed.
30
+ To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
32
31
 
33
- See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
34
32
 
33
+ ## References
@@ -0,0 +1,11 @@
1
+ # DerivaML
2
+ Deriva-ML is a python library to simplify the process of creating and executing reproducible machine learning workflows
3
+ using a deriva catalog.
4
+
5
+
6
+ Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
7
+
8
+ To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
9
+
10
+
11
+ ## References
@@ -64,6 +64,9 @@ setup_hooks = []
64
64
  pre_commit_hooks = []
65
65
  post_commit_hooks = []
66
66
 
67
+ [tool.pytest]
68
+ mock_use_standalone_module = true
69
+
67
70
  [tool.pytest.ini_options]
68
71
  testpaths = ["tests"]
69
72
  python_files = ["test_*.py"]
@@ -97,6 +100,7 @@ dev = [
97
100
  "mkdocstrings[python]",
98
101
  "mkdocs-material",
99
102
  "pytest>=8.4.1",
103
+ "pytest-mock",
100
104
  "pytest-coverage>=0.0",
101
105
  "ruff"
102
106
  ]
@@ -52,6 +52,7 @@ class FeatureRecord(BaseModel):
52
52
 
53
53
  class Config:
54
54
  arbitrary_types_allowed = True
55
+ extra = "forbid"
55
56
 
56
57
  @classmethod
57
58
  def feature_columns(cls) -> set[Column]:
@@ -95,7 +95,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
95
95
  exit(1)
96
96
 
97
97
  os.environ["DERIVA_HOST"] = args.host
98
- os.environ["DERIVA_CATALOG_ID"] = args.catalog
98
+ os.environ["DERIVA_CATALOG"] = args.catalog
99
99
 
100
100
  # Create a workflow instance for this specific version of the script.
101
101
  # Return an existing workflow if one is found.
@@ -106,14 +106,14 @@ class DerivaMLRunNotebookCLI(BaseCLI):
106
106
  return
107
107
  else:
108
108
  notebook_parameters = (
109
- {"host": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
110
- | {k: v["default"] for k, v in notebook_parameters.items()}
109
+ {k: v["default"] for k, v in notebook_parameters.items()}
110
+ | {"host": args.host, "hostname": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
111
111
  | parameters
112
112
  )
113
113
  print(f"Running notebook {notebook_file.name} with parameters:")
114
114
  for param, value in notebook_parameters.items():
115
115
  print(f" {param}:{value}")
116
- self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
116
+ self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel[0], log=args.log_output)
117
117
 
118
118
  def run_notebook(self, notebook_file, parameters, kernel=None, log=False):
119
119
  url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.33
3
+ Version: 1.14.34
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -25,10 +25,9 @@ Deriva-ML is a python library to simplify the process of creating and executing
25
25
  using a deriva catalog.
26
26
 
27
27
 
28
- ## Installing the GitHub CLI
28
+ Complete on-line documentation for DerivaML can be found [here](https://informatics-isi-edu.github.io/deriva-ml/)
29
29
 
30
- The script release.sh will create a new release tag in GitHub. This script requires the
31
- GitHUB CLI be installed.
30
+ To get started using DerivaML, you can clone the [model template repository](https://github.com/informatics-isi-edu/deriva-ml-model-template), and modify it to suite your requirements.
32
31
 
33
- See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
34
32
 
33
+ ## References
@@ -5,7 +5,7 @@ Pytest configuration and shared fixtures.
5
5
  import os
6
6
 
7
7
  import pytest
8
- from test_utils import MLCatalog, MLDatasetCatalog
8
+ from test_utils import MLCatalog, MLDatasetCatalog, create_jupyter_kernel, destroy_jupyter_kernel
9
9
 
10
10
  from deriva_ml import DerivaML
11
11
  from deriva_ml.demo_catalog import (
@@ -61,6 +61,16 @@ def dataset_test(catalog_with_datasets):
61
61
  return catalog_with_datasets
62
62
 
63
63
 
64
+ @pytest.fixture(scope="function")
65
+ def notebook_test(deriva_catalog, tmp_path):
66
+ deriva_catalog.reset_demo_catalog()
67
+ create_jupyter_kernel("test_kernel", tmp_path)
68
+ yield DerivaML(deriva_catalog.hostname, deriva_catalog.catalog_id, use_minid=False, working_dir=tmp_path)
69
+ print("Resetting catalog... ", end="")
70
+ deriva_catalog.reset_demo_catalog()
71
+ destroy_jupyter_kernel("test_kernel")
72
+
73
+
64
74
  @pytest.fixture(scope="function")
65
75
  def test_ml_demo_catalog(ml_catalog, tmp_path):
66
76
  # reset_demo_catalog(ml_catalog.catalog)
@@ -3,6 +3,7 @@ Tests for the execution module.
3
3
  """
4
4
 
5
5
  import subprocess
6
+ from pathlib import Path
6
7
  from tempfile import TemporaryDirectory
7
8
 
8
9
  from deriva_ml import (
@@ -23,20 +24,23 @@ class TestWorkflow:
23
24
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
24
25
  ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
25
26
  print("Running workflow-test.py ...")
27
+ workflow_script = Path(__file__).parent / "workflow-test.py"
28
+
26
29
  workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
27
30
  workflows = list(workflow_table.entities().fetch())
28
31
  assert 0 == len(workflows)
29
32
  result = subprocess.run(
30
33
  [
31
34
  "python",
32
- "execution/workflow-test.py",
35
+ workflow_script.as_posix(),
33
36
  ml_instance.catalog.deriva_server.server,
34
37
  ml_instance.catalog_id,
35
38
  ],
36
39
  capture_output=True,
37
40
  text=True,
38
41
  )
39
-
42
+ print(result.stdout)
43
+ print(result.stderr)
40
44
  workflows = list(workflow_table.entities().fetch())
41
45
  assert 1 == len(workflows)
42
46
  workflow_rid = workflows[0]["RID"]
@@ -50,18 +54,22 @@ class TestWorkflow:
50
54
  result = subprocess.run(
51
55
  [
52
56
  "python",
53
- "execution/workflow-test.py",
57
+ workflow_script.as_posix(),
54
58
  ml_instance.catalog.deriva_server.server,
55
59
  ml_instance.catalog_id,
56
60
  ],
57
61
  capture_output=True,
58
62
  text=True,
59
63
  )
64
+ print(result.stdout)
65
+ print(result.stderr)
60
66
  new_workflow = result.stdout.strip()
61
67
  assert new_workflow == workflow_rid
62
68
 
63
- def test_workflow_creation_notebook(self, test_ml):
64
- ml_instance = test_ml
69
+ def test_workflow_creation_notebook(self, notebook_test):
70
+ ml_instance = notebook_test
71
+
72
+ notebook_path = Path(__file__).parent / "workflow-test.ipynb" # directory where this test lives
65
73
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
66
74
  ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
67
75
  workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
@@ -69,14 +77,17 @@ class TestWorkflow:
69
77
  assert 0 == len(workflows)
70
78
 
71
79
  print("Running notebook...")
80
+
72
81
  result = subprocess.run(
73
82
  [
74
83
  "deriva-ml-run-notebook",
75
- "execution/workflow-test.ipynb",
84
+ notebook_path.as_posix(),
76
85
  "--host",
77
86
  ml_instance.catalog.deriva_server.server,
78
87
  "--catalog",
79
88
  ml_instance.catalog_id,
89
+ "--kernel",
90
+ "test_kernel",
80
91
  "--log-output",
81
92
  ],
82
93
  capture_output=True,
@@ -18,9 +18,7 @@
18
18
  "outputs": [],
19
19
  "source": [
20
20
  "from deriva_ml import DerivaML, MLVocab as vc\n",
21
- "import os\n",
22
- "import logging\n",
23
- "logger = logging.getLogger()"
21
+ "import os"
24
22
  ]
25
23
  },
26
24
  {
@@ -42,8 +40,8 @@
42
40
  },
43
41
  "outputs": [],
44
42
  "source": [
45
- "hostname = None\n",
46
- "catalog_id = None"
43
+ "host = None\n",
44
+ "catalog = None"
47
45
  ]
48
46
  },
49
47
  {
@@ -54,24 +52,21 @@
54
52
  "outputs": [],
55
53
  "source": [
56
54
  "# Modify these to your desired server and catalog.\n",
57
- "hostname = hostname or os.environ.get(\"DERIVA_HOST\")\n",
58
- "catalog_id = catalog_id or os.environ.get(\"DERIVA_CATALOG_ID\")\n",
55
+ "host = host or os.environ.get(\"DERIVA_HOST\")\n",
56
+ "catalog = catalog or os.environ.get(\"DERIVA_CATALOG\")\n",
59
57
  "\n",
60
58
  "# Change this line to call the domain specific class derived from DerivaML\n",
61
- "ml_instance = DerivaML(hostname, catalog_id)\n",
62
- "logger.info(\"Got ML instance:\")\n",
59
+ "ml_instance = DerivaML(host, catalog)\n",
63
60
  "\n",
64
61
  "ml_instance.add_term(vc.asset_type, \"Test Model\", description=\"Model for our Test workflow\")\n",
65
62
  "ml_instance.add_term(vc.workflow_type, \"Test Workflow\", description=\"A ML Workflow that uses Deriva ML API\")\n",
66
- "print(\"Added terms to ML instance\")\n",
63
+ "\n",
67
64
  "api_workflow = ml_instance.create_workflow(\n",
68
- " name=\"Test Workflow One\",\n",
69
- " workflow_type=\"Test Workflow\",\n",
70
- " description=\"A test operation\",\n",
71
- ")\n",
72
- "logger.info(f\"URL: {api_workflow.url}\")\n",
73
- "rid = ml_instance.add_workflow(api_workflow)\n",
74
- "logger.info(f\"RID {rid}\")\n"
65
+ " name=\"Test Workflow One\",\n",
66
+ " workflow_type=\"Test Workflow\",\n",
67
+ " description=\"A test operation\",\n",
68
+ " )\n",
69
+ "rid = ml_instance.add_workflow(api_workflow)"
75
70
  ]
76
71
  }
77
72
  ],
@@ -6,7 +6,6 @@ from deriva_ml import MLVocab as vc
6
6
  hostname = sys.argv[1]
7
7
  catalog_id = sys.argv[2]
8
8
 
9
-
10
9
  ml_instance = DerivaML(hostname, catalog_id)
11
10
 
12
11
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
@@ -2,9 +2,8 @@
2
2
  Tests for feature functionality.
3
3
  """
4
4
 
5
- from unittest.mock import Mock
6
-
7
5
  import pytest
6
+ from pydantic import ValidationError
8
7
 
9
8
  from deriva_ml import (
10
9
  BuiltinTypes,
@@ -21,14 +20,14 @@ from deriva_ml.feature import FeatureRecord
21
20
  class TestFeatureRecord:
22
21
  """Test cases for the FeatureRecord base class."""
23
22
 
24
- def test_feature_record_creation(self):
23
+ def test_feature_record_creation(self, mocker):
25
24
  """Test basic FeatureRecord creation."""
26
25
  # Create a mock feature
27
- mock_feature = Mock()
28
- mock_feature.feature_columns = {Mock(name="value"), Mock(name="confidence")}
29
- mock_feature.asset_columns = {Mock(name="image_file")}
30
- mock_feature.term_columns = {Mock(name="category")}
31
- mock_feature.value_columns = {Mock(name="score")}
26
+ mock_feature = mocker.Mock()
27
+ mock_feature.feature_columns = {mocker.Mock(name="value"), mocker.Mock(name="confidence")}
28
+ mock_feature.asset_columns = {mocker.Mock(name="image_file")}
29
+ mock_feature.term_columns = {mocker.Mock(name="category")}
30
+ mock_feature.value_columns = {mocker.Mock(name="score")}
32
31
 
33
32
  # Create a test class that inherits from FeatureRecord
34
33
  class TestFeature(FeatureRecord):
@@ -58,17 +57,17 @@ class TestFeatureRecord:
58
57
  assert record.category == "good"
59
58
  assert record.score == 0.8
60
59
 
61
- def test_feature_record_column_methods(self):
60
+ def test_feature_record_column_methods(self, mocker):
62
61
  """Test the column access methods of FeatureRecord."""
63
62
  # Create mock columns
64
- value_col = Mock(name="value")
65
- confidence_col = Mock(name="confidence")
66
- asset_col = Mock(name="image_file")
67
- term_col = Mock(name="category")
68
- value_only_col = Mock(name="score")
63
+ value_col = mocker.Mock(name="value")
64
+ confidence_col = mocker.Mock(name="confidence")
65
+ asset_col = mocker.Mock(name="image_file")
66
+ term_col = mocker.Mock(name="category")
67
+ value_only_col = mocker.Mock(name="score")
69
68
 
70
69
  # Create a mock feature
71
- mock_feature = Mock()
70
+ mock_feature = mocker.Mock()
72
71
  mock_feature.feature_columns = {value_col, confidence_col, asset_col, term_col, value_only_col}
73
72
  mock_feature.asset_columns = {asset_col}
74
73
  mock_feature.term_columns = {term_col}
@@ -141,6 +140,22 @@ class TestFeatures:
141
140
  with pytest.raises(DerivaMLException):
142
141
  ml_instance.lookup_feature("Subject", "SubjectHealth1")
143
142
 
143
+ def test_feature_record(self, dataset_test, tmp_path):
144
+ ml_instance = DerivaML(
145
+ dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
146
+ )
147
+ SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
148
+ print(SubjectHealthFeature.model_fields.keys())
149
+
150
+ print(SubjectHealthFeature.feature_columns())
151
+
152
+ with pytest.raises(ValidationError):
153
+ SubjectHealthFeature(Subject="SubjectRID", Health="Good", Scale=23, Foo="Bar")
154
+ print(SubjectHealthFeature.value_columns())
155
+ print(SubjectHealthFeature.term_columns())
156
+ print(SubjectHealthFeature.asset_columns())
157
+ print(SubjectHealthFeature.feature_columns())
158
+
144
159
  def test_add_feature(self, dataset_test, tmp_path):
145
160
  ml_instance = DerivaML(
146
161
  dataset_test.catalog.hostname, dataset_test.catalog.catalog_id, working_dir=tmp_path, use_minid=False
@@ -166,7 +181,8 @@ class TestFeatures:
166
181
 
167
182
  with feature_execution.execute() as exe:
168
183
  SubjectHealthFeature = ml_instance.feature_record_class("Subject", "Health")
169
- exe.add_features([SubjectHealthFeature(Subject=subject_rids[0], Health="Good", Scale=23)])
184
+ print(SubjectHealthFeature.feature_columns())
185
+ exe.add_features([SubjectHealthFeature(Subject=subject_rids[0], SubjectHealth="Sick", Scale=23)])
170
186
 
171
187
  feature_execution.upload_execution_outputs()
172
188
  features = list(ml_instance.list_feature_values("Subject", "Health"))
@@ -1,8 +1,12 @@
1
+ import os
2
+ import shutil
1
3
  from tempfile import TemporaryDirectory
2
4
  from urllib.parse import quote as urlquote
3
5
 
4
6
  from demo_catalog import create_demo_features
5
7
  from deriva.core.datapath import DataPathException
8
+ from ipykernel.kernelspec import install
9
+ from jupyter_client.kernelspec import KernelSpecManager
6
10
 
7
11
  from deriva_ml import DerivaML
8
12
  from deriva_ml.demo_catalog import (
@@ -42,7 +46,14 @@ class MLCatalog:
42
46
  pb = self.catalog.getPathBuilder()
43
47
  ml_path = pb.schemas["deriva-ml"]
44
48
  domain_path = pb.schemas[self.domain_schema]
45
- for t in ["Dataset_Execution", "Dataset_Version", "Dataset_Dataset", "Workflow", "Workflow_Execution"]:
49
+ for t in [
50
+ "Dataset_Execution",
51
+ "Dataset_Version",
52
+ "Dataset_Dataset",
53
+ "Execution",
54
+ "Workflow_Execution",
55
+ "Workflow",
56
+ ]:
46
57
  try:
47
58
  ml_path.tables[t].path.delete()
48
59
  except DataPathException:
@@ -110,3 +121,60 @@ class MLDatasetCatalog:
110
121
  with TemporaryDirectory() as tmp_dir:
111
122
  ml_instance = DerivaML(self.catalog.hostname, self.catalog.catalog_id, working_dir=tmp_dir, use_minid=False)
112
123
  self.dataset_description: DatasetDescription = create_demo_datasets(ml_instance)
124
+
125
+
126
+ def create_jupyter_kernel(name: str, kernel_dir, display_name: str = None, user: bool = True) -> None:
127
+ """
128
+ Create and install a Jupyter kernel spec using ipykernel.
129
+
130
+ Parameters
131
+ ----------
132
+ name : str
133
+ The internal name of the kernel (used in `--kernel`).
134
+ display_name : str, optional
135
+ The label shown in Jupyter’s kernel chooser (defaults to name).
136
+ user : bool, default=True
137
+ If True, install for the current user only.
138
+ If False, requires admin rights (system-wide).
139
+ """
140
+ if display_name is None:
141
+ display_name = name
142
+
143
+ os.environ["JUPYTER_PATH"] = f"{kernel_dir}/share/jupyter"
144
+
145
+ print(f"Installing Jupyter kernel '{name}' with display name '{display_name}'")
146
+ install(
147
+ kernel_name=name,
148
+ display_name=display_name,
149
+ prefix=kernel_dir, # ensures it uses the current environment
150
+ )
151
+ print("✅ Kernel installed successfully.")
152
+
153
+
154
+ def destroy_jupyter_kernel(name: str, user: bool = True) -> None:
155
+ """
156
+ Remove a Jupyter kernel spec by name.
157
+
158
+ Parameters
159
+ ----------
160
+ name : str
161
+ The internal kernel name (the same name used in create_jupyter_kernel).
162
+ user : bool, default=True
163
+ If True, remove from the user-level kernels directory.
164
+ If False, attempt system-wide removal (requires permissions).
165
+ """
166
+ ksm = KernelSpecManager()
167
+ kernels = ksm.find_kernel_specs()
168
+
169
+ if name not in kernels:
170
+ print(f"❌ Kernel '{name}' not found.")
171
+ return
172
+
173
+ kernel_path = kernels[name]
174
+ print(f"Removing kernel '{name}' at {kernel_path}")
175
+
176
+ try:
177
+ shutil.rmtree(kernel_path)
178
+ print(f"✅ Kernel '{name}' removed successfully.")
179
+ except Exception as e:
180
+ print(f"⚠️ Failed to remove kernel '{name}': {e}")