deriva-ml 1.17.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. deriva_ml/.DS_Store +0 -0
  2. deriva_ml/__init__.py +79 -0
  3. deriva_ml/bump_version.py +142 -0
  4. deriva_ml/core/__init__.py +39 -0
  5. deriva_ml/core/base.py +1527 -0
  6. deriva_ml/core/config.py +69 -0
  7. deriva_ml/core/constants.py +36 -0
  8. deriva_ml/core/definitions.py +74 -0
  9. deriva_ml/core/enums.py +222 -0
  10. deriva_ml/core/ermrest.py +288 -0
  11. deriva_ml/core/exceptions.py +28 -0
  12. deriva_ml/core/filespec.py +116 -0
  13. deriva_ml/dataset/__init__.py +12 -0
  14. deriva_ml/dataset/aux_classes.py +225 -0
  15. deriva_ml/dataset/dataset.py +1519 -0
  16. deriva_ml/dataset/dataset_bag.py +450 -0
  17. deriva_ml/dataset/history.py +109 -0
  18. deriva_ml/dataset/upload.py +439 -0
  19. deriva_ml/demo_catalog.py +495 -0
  20. deriva_ml/execution/__init__.py +26 -0
  21. deriva_ml/execution/environment.py +290 -0
  22. deriva_ml/execution/execution.py +1180 -0
  23. deriva_ml/execution/execution_configuration.py +147 -0
  24. deriva_ml/execution/workflow.py +413 -0
  25. deriva_ml/feature.py +228 -0
  26. deriva_ml/install_kernel.py +71 -0
  27. deriva_ml/model/__init__.py +0 -0
  28. deriva_ml/model/catalog.py +485 -0
  29. deriva_ml/model/database.py +719 -0
  30. deriva_ml/protocols/dataset.py +19 -0
  31. deriva_ml/run_notebook.py +228 -0
  32. deriva_ml/schema/__init__.py +3 -0
  33. deriva_ml/schema/annotations.py +473 -0
  34. deriva_ml/schema/check_schema.py +104 -0
  35. deriva_ml/schema/create_schema.py +393 -0
  36. deriva_ml/schema/deriva-ml-reference.json +8525 -0
  37. deriva_ml/schema/policy.json +81 -0
  38. deriva_ml/schema/table_comments_utils.py +57 -0
  39. deriva_ml/test.py +94 -0
  40. deriva_ml-1.17.10.dist-info/METADATA +38 -0
  41. deriva_ml-1.17.10.dist-info/RECORD +45 -0
  42. deriva_ml-1.17.10.dist-info/WHEEL +5 -0
  43. deriva_ml-1.17.10.dist-info/entry_points.txt +9 -0
  44. deriva_ml-1.17.10.dist-info/licenses/LICENSE +201 -0
  45. deriva_ml-1.17.10.dist-info/top_level.txt +1 -0
@@ -0,0 +1,19 @@
1
+ """A module defining the DatasetLike protocol for dataset operations.
2
+
3
+ This module contains the definition of the DatasetLike protocol, which
4
+ provides an interface for datasets to implement specific functionality related
5
+ to listing dataset children. It is particularly useful for ensuring type
6
+ compatibility for objects that mimic datasets in their behavior.
7
+
8
+ Classes:
9
+ DatasetLike: A protocol that specifies methods required for dataset-like
10
+ objects.
11
+ """
12
+ from typing import Protocol, runtime_checkable
13
+
14
+ from deriva_ml.core.definitions import RID
15
+
16
+
17
+ @runtime_checkable
18
+ class DatasetLike(Protocol):
19
+ def list_dataset_children(self, dataset_rid: RID, recurse: bool = False) -> list[RID]: ...
@@ -0,0 +1,228 @@
1
+ """Module to run a notebook using papermill"""
2
+
3
+ import json
4
+ import os
5
+ import tempfile
6
+ from pathlib import Path
7
+
8
+ import nbformat
9
+ import papermill as pm
10
+ import yaml
11
+ from deriva.core import BaseCLI
12
+ from jupyter_client.kernelspec import KernelSpecManager
13
+ from nbconvert import MarkdownExporter
14
+
15
+ from deriva_ml import DerivaML, ExecAssetType, Execution, ExecutionConfiguration, MLAsset, Workflow
16
+
17
+
18
+ class DerivaMLRunNotebookCLI(BaseCLI):
19
+ """Main class to part command line arguments and call model"""
20
+
21
+ def __init__(self, description, epilog, **kwargs):
22
+ BaseCLI.__init__(self, description, epilog, **kwargs)
23
+ Workflow._check_nbstrip_status()
24
+ self.parser.add_argument("notebook_file", type=Path, help="Path to the notebook file")
25
+
26
+ self.parser.add_argument(
27
+ "--file",
28
+ "-f",
29
+ type=Path,
30
+ default=None,
31
+ help="JSON or YAML file with parameter values to inject into the notebook.",
32
+ )
33
+
34
+ self.parser.add_argument(
35
+ "--inspect",
36
+ action="store_true",
37
+ help="Display parameters information for the given notebook path.",
38
+ )
39
+
40
+ self.parser.add_argument(
41
+ "--log-output",
42
+ action="store_true",
43
+ help="Display logging output from notebook.",
44
+ )
45
+
46
+ self.parser.add_argument(
47
+ "--parameter",
48
+ "-p",
49
+ nargs=2,
50
+ action="append",
51
+ metavar=("KEY", "VALUE"),
52
+ default=[],
53
+ help="Provide a parameter name and value to inject into the notebook.",
54
+ )
55
+
56
+ self.parser.add_argument(
57
+ "--kernel",
58
+ "-k",
59
+ type=str,
60
+ help="Name of kernel to run..",
61
+ default=self._find_kernel_for_venv(),
62
+ )
63
+
64
+ @staticmethod
65
+ def _coerce_number(val: str):
66
+ """
67
+ Try to convert a string to int, then float; otherwise return str.
68
+ """
69
+ try:
70
+ return int(val)
71
+ except ValueError:
72
+ try:
73
+ return float(val)
74
+ except ValueError:
75
+ return val
76
+
77
+ def main(self):
78
+ """Parse arguments and set up execution environment."""
79
+ args = self.parse_cli()
80
+ notebook_file: Path = args.notebook_file
81
+ parameter_file = args.file
82
+
83
+ # args.parameter is now a list of [KEY, VALUE] lists
84
+ # e.g. [['timeout', '30'], ['name', 'Alice'], ...]
85
+ parameters = {key: self._coerce_number(val) for key, val in args.parameter}
86
+
87
+ if parameter_file:
88
+ with parameter_file.open("r") as f:
89
+ if parameter_file.suffix == ".json":
90
+ parameters |= json.load(f)
91
+ elif parameter_file.suffix == ".yaml":
92
+ parameters |= yaml.safe_load(f)
93
+ else:
94
+ print("Parameter file must be an json or YAML file.")
95
+ exit(1)
96
+
97
+ if not (notebook_file.is_file() and notebook_file.suffix == ".ipynb"):
98
+ print(f"Notebook file must be an ipynb file: {notebook_file.name}.")
99
+ exit(1)
100
+
101
+ # Create a workflow instance for this specific version of the script.
102
+ # Return an existing workflow if one is found.
103
+ notebook_parameters = pm.inspect_notebook(notebook_file)
104
+
105
+ if args.inspect:
106
+ for param, value in notebook_parameters.items():
107
+ print(f"{param}:{value['inferred_type_name']} (default {value['default']})")
108
+ return
109
+ else:
110
+ notebook_parameters = {k: v["default"] for k, v in notebook_parameters.items()} | parameters
111
+ self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
112
+
113
+ @staticmethod
114
+ def _find_kernel_for_venv() -> str | None:
115
+ """
116
+ Return the name and spec of an existing Jupyter kernel corresponding
117
+ to a given Python virtual environment path.
118
+
119
+ Parameters
120
+ ----------
121
+ venv_path : str
122
+ Absolute or relative path to the virtual environment.
123
+
124
+ Returns
125
+ -------
126
+ dict | None
127
+ The kernel spec (as a dict) if found, or None if not found.
128
+ """
129
+ venv = os.environ.get("VIRTUAL_ENV")
130
+ if not venv:
131
+ return None
132
+ venv_path = Path(venv).resolve()
133
+ ksm = KernelSpecManager()
134
+ for name, spec in ksm.get_all_specs().items():
135
+ kernel_json = spec.get("spec", {})
136
+ argv = kernel_json.get("argv", [])
137
+ # check for python executable path inside argv
138
+ for arg in argv:
139
+ try:
140
+ if Path(arg).resolve() == venv_path.joinpath("bin", "python").resolve():
141
+ return name
142
+ except Exception:
143
+ continue
144
+ return None
145
+
146
+ def run_notebook(self, notebook_file: Path, parameters, kernel=None, log=False):
147
+ url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
148
+ os.environ["DERIVA_ML_WORKFLOW_URL"] = url
149
+ os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
150
+ os.environ["DERIVA_ML_NOTEBOOK_PATH"] = notebook_file.as_posix()
151
+ with tempfile.TemporaryDirectory() as tmpdirname:
152
+ notebook_output = Path(tmpdirname) / Path(notebook_file).name
153
+ execution_rid_path = Path(tmpdirname) / "execution_rid.json"
154
+ os.environ["DERIVA_ML_SAVE_EXECUTION_RID"] = execution_rid_path.as_posix()
155
+ pm.execute_notebook(
156
+ input_path=notebook_file,
157
+ output_path=notebook_output,
158
+ parameters=parameters,
159
+ kernel_name=kernel,
160
+ log_output=log,
161
+ )
162
+ print(f"Notebook output saved to {notebook_output}")
163
+ with execution_rid_path.open("r") as f:
164
+ execution_config = json.load(f)
165
+
166
+ if not execution_config:
167
+ print("Execution RID not found.")
168
+ exit(1)
169
+
170
+ execution_rid = execution_config["execution_rid"]
171
+ hostname = execution_config["hostname"]
172
+ catalog_id = execution_config["catalog_id"]
173
+ workflow_rid = execution_config["workflow_rid"]
174
+ ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id, working_dir=tmpdirname)
175
+ workflow_rid = ml_instance.retrieve_rid(execution_config["execution_rid"])["Workflow"]
176
+
177
+ execution = Execution(
178
+ configuration=ExecutionConfiguration(workflow=workflow_rid),
179
+ ml_object=ml_instance,
180
+ reload=execution_rid,
181
+ )
182
+
183
+ # Generate an HTML version of the output notebook.
184
+ notebook_output_md = notebook_output.with_suffix(".md")
185
+ with notebook_output.open() as f:
186
+ nb = nbformat.read(f, as_version=4)
187
+ # Convert to Markdown
188
+ exporter = MarkdownExporter()
189
+ (body, resources) = exporter.from_notebook_node(nb)
190
+
191
+ with notebook_output_md.open("w") as f:
192
+ f.write(body)
193
+ nb = nbformat.read(notebook_output, as_version=4)
194
+
195
+ execution.asset_file_path(
196
+ asset_name=MLAsset.execution_asset,
197
+ file_name=notebook_output,
198
+ asset_types=ExecAssetType.notebook_output,
199
+ )
200
+
201
+ execution.asset_file_path(
202
+ asset_name=MLAsset.execution_asset,
203
+ file_name=notebook_output_md,
204
+ asset_types=ExecAssetType.notebook_output,
205
+ )
206
+ execution.upload_execution_outputs()
207
+
208
+ print(ml_instance.cite(execution_rid))
209
+
210
+
211
+ def main():
212
+ """Main entry point for the notebook runner CLI.
213
+
214
+ Creates and runs the DerivaMLRunNotebookCLI instance.
215
+
216
+ Returns:
217
+ None. Executes the CLI.
218
+ """
219
+ cli = DerivaMLRunNotebookCLI(description="Deriva ML Execution Script Demo", epilog="")
220
+ cli.main()
221
+
222
+
223
+ if __name__ == "__main__":
224
+ try:
225
+ main()
226
+ except Exception as e:
227
+ print(e)
228
+ exit(1)
@@ -0,0 +1,3 @@
1
+ from deriva_ml.schema.create_schema import create_ml_catalog, reset_ml_schema
2
+
3
+ __all__ = ["create_ml_catalog", "reset_ml_schema"]