deriva-ml 1.14.28__py3-none-any.whl → 1.14.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/core/base.py +14 -7
- deriva_ml/execution/workflow.py +29 -40
- deriva_ml/run_notebook.py +12 -19
- {deriva_ml-1.14.28.dist-info → deriva_ml-1.14.30.dist-info}/METADATA +1 -1
- {deriva_ml-1.14.28.dist-info → deriva_ml-1.14.30.dist-info}/RECORD +9 -9
- {deriva_ml-1.14.28.dist-info → deriva_ml-1.14.30.dist-info}/WHEEL +0 -0
- {deriva_ml-1.14.28.dist-info → deriva_ml-1.14.30.dist-info}/entry_points.txt +0 -0
- {deriva_ml-1.14.28.dist-info → deriva_ml-1.14.30.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.14.28.dist-info → deriva_ml-1.14.30.dist-info}/top_level.txt +0 -0
deriva_ml/core/base.py
CHANGED
|
@@ -1305,7 +1305,9 @@ class DerivaML(Dataset):
|
|
|
1305
1305
|
def add_workflow(self, workflow: Workflow) -> RID:
|
|
1306
1306
|
"""Adds a workflow to the catalog.
|
|
1307
1307
|
|
|
1308
|
-
Registers a new workflow in the catalog or returns the RID of an existing workflow with the same
|
|
1308
|
+
Registers a new workflow in the catalog or returns the RID of an existing workflow with the same
|
|
1309
|
+
URL or checksum.
|
|
1310
|
+
|
|
1309
1311
|
Each workflow represents a specific computational process or analysis pipeline.
|
|
1310
1312
|
|
|
1311
1313
|
Args:
|
|
@@ -1328,11 +1330,12 @@ class DerivaML(Dataset):
|
|
|
1328
1330
|
>>> workflow_rid = ml.add_workflow(workflow)
|
|
1329
1331
|
"""
|
|
1330
1332
|
# Check if a workflow already exists by URL
|
|
1331
|
-
if workflow_rid := self.lookup_workflow(workflow.url):
|
|
1333
|
+
if workflow_rid := self.lookup_workflow(workflow.checksum or workflow.url):
|
|
1332
1334
|
return workflow_rid
|
|
1333
1335
|
|
|
1334
1336
|
# Get an ML schema path for the workflow table
|
|
1335
1337
|
ml_schema_path = self.pathBuilder.schemas[self.ml_schema]
|
|
1338
|
+
|
|
1336
1339
|
try:
|
|
1337
1340
|
# Create a workflow record
|
|
1338
1341
|
workflow_record = {
|
|
@@ -1350,12 +1353,11 @@ class DerivaML(Dataset):
|
|
|
1350
1353
|
raise DerivaMLException(f"Failed to insert workflow. Error: {error}")
|
|
1351
1354
|
return workflow_rid
|
|
1352
1355
|
|
|
1353
|
-
def lookup_workflow(self,
|
|
1356
|
+
def lookup_workflow(self, url_or_checksum: str) -> RID | None:
|
|
1354
1357
|
"""Finds a workflow by URL.
|
|
1355
1358
|
|
|
1356
1359
|
Args:
|
|
1357
|
-
|
|
1358
|
-
|
|
1360
|
+
url_or_checksum: URL or checksum of the workflow.
|
|
1359
1361
|
Returns:
|
|
1360
1362
|
RID: Resource Identifier of the workflow if found, None otherwise.
|
|
1361
1363
|
|
|
@@ -1369,7 +1371,12 @@ class DerivaML(Dataset):
|
|
|
1369
1371
|
try:
|
|
1370
1372
|
# Search for workflow by URL
|
|
1371
1373
|
url_column = workflow_path.URL
|
|
1372
|
-
|
|
1374
|
+
checksum_column = workflow_path.Checksum
|
|
1375
|
+
return list(
|
|
1376
|
+
workflow_path.path.filter(
|
|
1377
|
+
(url_column == url_or_checksum) | (checksum_column == url_or_checksum)
|
|
1378
|
+
).entities()
|
|
1379
|
+
)[0]["RID"]
|
|
1373
1380
|
except IndexError:
|
|
1374
1381
|
return None
|
|
1375
1382
|
|
|
@@ -1403,7 +1410,7 @@ class DerivaML(Dataset):
|
|
|
1403
1410
|
self.lookup_term(MLVocab.workflow_type, workflow_type)
|
|
1404
1411
|
|
|
1405
1412
|
# Create and return a new workflow object
|
|
1406
|
-
return Workflow
|
|
1413
|
+
return Workflow(name=name, workflow_type=workflow_type, description=description)
|
|
1407
1414
|
|
|
1408
1415
|
def create_execution(self, configuration: ExecutionConfiguration, dry_run: bool = False) -> "Execution":
|
|
1409
1416
|
"""Creates an execution environment.
|
deriva_ml/execution/workflow.py
CHANGED
|
@@ -7,10 +7,6 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
|
-
from pydantic import (
|
|
11
|
-
BaseModel,
|
|
12
|
-
PrivateAttr,
|
|
13
|
-
)
|
|
14
10
|
from requests import RequestException
|
|
15
11
|
|
|
16
12
|
from deriva_ml.core.definitions import RID
|
|
@@ -52,7 +48,7 @@ except ImportError:
|
|
|
52
48
|
return get_connection_file()
|
|
53
49
|
|
|
54
50
|
|
|
55
|
-
class Workflow
|
|
51
|
+
class Workflow:
|
|
56
52
|
"""Represents a computational workflow in DerivaML.
|
|
57
53
|
|
|
58
54
|
A workflow defines a computational process or analysis pipeline. Each workflow has
|
|
@@ -79,27 +75,17 @@ class Workflow(BaseModel):
|
|
|
79
75
|
... )
|
|
80
76
|
"""
|
|
81
77
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
workflow_type: str
|
|
85
|
-
version: str | None = None
|
|
86
|
-
description: str | None = None
|
|
87
|
-
rid: RID | None = None
|
|
88
|
-
checksum: str | None = None
|
|
89
|
-
is_notebook: bool = False
|
|
90
|
-
|
|
91
|
-
_logger: Any = PrivateAttr()
|
|
92
|
-
|
|
93
|
-
def __post_init__(self):
|
|
94
|
-
"""Initializes logging for the workflow."""
|
|
95
|
-
self._logger = logging.getLogger("deriva_ml")
|
|
96
|
-
|
|
97
|
-
@staticmethod
|
|
98
|
-
def create_workflow(
|
|
78
|
+
def __init__(
|
|
79
|
+
self,
|
|
99
80
|
name: str,
|
|
100
81
|
workflow_type: str,
|
|
101
|
-
description: str =
|
|
102
|
-
|
|
82
|
+
description: str | None = None,
|
|
83
|
+
url: str | None = None,
|
|
84
|
+
version: str | None = None,
|
|
85
|
+
rid: RID | None = None,
|
|
86
|
+
checksum: str | None = None,
|
|
87
|
+
is_notebook: bool = False,
|
|
88
|
+
):
|
|
103
89
|
"""Creates a workflow from the current execution context.
|
|
104
90
|
|
|
105
91
|
Identifies the currently executing program (script or notebook) and creates
|
|
@@ -128,24 +114,27 @@ class Workflow(BaseModel):
|
|
|
128
114
|
... description="Process sample data"
|
|
129
115
|
... )
|
|
130
116
|
"""
|
|
117
|
+
self.name = name
|
|
118
|
+
self.url = url
|
|
119
|
+
self.workflow_type = workflow_type
|
|
120
|
+
self.version = version
|
|
121
|
+
self.description = description
|
|
122
|
+
self.rid = rid
|
|
123
|
+
self.checksum = checksum
|
|
124
|
+
self.is_notebook = is_notebook
|
|
125
|
+
"""Initializes logging for the workflow."""
|
|
131
126
|
|
|
132
127
|
# Check to see if execution file info is being passed in by calling program.
|
|
133
128
|
if "DERIVA_ML_WORKFLOW_URL" in os.environ:
|
|
134
|
-
|
|
135
|
-
checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
|
|
136
|
-
is_notebook = True
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
url=github_url,
|
|
144
|
-
checksum=checksum,
|
|
145
|
-
description=description,
|
|
146
|
-
workflow_type=workflow_type,
|
|
147
|
-
is_notebook=is_notebook,
|
|
148
|
-
)
|
|
129
|
+
self.url = os.environ["DERIVA_ML_WORKFLOW_URL"]
|
|
130
|
+
self.checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
|
|
131
|
+
self.is_notebook = True
|
|
132
|
+
|
|
133
|
+
if not self.url:
|
|
134
|
+
path, self.is_notebook = Workflow._get_python_script()
|
|
135
|
+
self.url, self.checksum = Workflow.get_url_and_checksum(path)
|
|
136
|
+
|
|
137
|
+
self._logger = logging.getLogger("deriva_ml")
|
|
149
138
|
|
|
150
139
|
@staticmethod
|
|
151
140
|
def get_url_and_checksum(executable_path: Path) -> tuple[str, str]:
|
|
@@ -386,7 +375,7 @@ class Workflow(BaseModel):
|
|
|
386
375
|
"""Get SHA-1 hash of latest commit of the file in the repository"""
|
|
387
376
|
|
|
388
377
|
result = subprocess.run(
|
|
389
|
-
["git", "log", "-n", "1", "--pretty=format:%H
|
|
378
|
+
["git", "log", "-n", "1", "--pretty=format:%H", executable_path],
|
|
390
379
|
cwd=repo_root,
|
|
391
380
|
capture_output=True,
|
|
392
381
|
text=True,
|
deriva_ml/run_notebook.py
CHANGED
|
@@ -19,9 +19,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
19
19
|
def __init__(self, description, epilog, **kwargs):
|
|
20
20
|
BaseCLI.__init__(self, description, epilog, **kwargs)
|
|
21
21
|
Workflow._check_nbstrip_status()
|
|
22
|
-
self.parser.add_argument(
|
|
23
|
-
"notebook_file", type=Path, help="Path to the notebook file"
|
|
24
|
-
)
|
|
22
|
+
self.parser.add_argument("notebook_file", type=Path, help="Path to the notebook file")
|
|
25
23
|
|
|
26
24
|
self.parser.add_argument(
|
|
27
25
|
"--file",
|
|
@@ -39,7 +37,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
39
37
|
|
|
40
38
|
self.parser.add_argument(
|
|
41
39
|
"--log-output",
|
|
42
|
-
action="
|
|
40
|
+
action="store_true",
|
|
43
41
|
help="Display logging output from notebook.",
|
|
44
42
|
)
|
|
45
43
|
|
|
@@ -60,9 +58,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
60
58
|
help="Provide a parameter name and value to inject into the notebook.",
|
|
61
59
|
)
|
|
62
60
|
|
|
63
|
-
self.parser.add_argument(
|
|
64
|
-
"--kernel", "-k", nargs=1, help="Name of kernel to run..", default=None
|
|
65
|
-
)
|
|
61
|
+
self.parser.add_argument("--kernel", "-k", nargs=1, help="Name of kernel to run..", default=None)
|
|
66
62
|
|
|
67
63
|
@staticmethod
|
|
68
64
|
def _coerce_number(val: str):
|
|
@@ -95,7 +91,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
95
91
|
parameters |= json.load(f)
|
|
96
92
|
|
|
97
93
|
if not (notebook_file.is_file() and notebook_file.suffix == ".ipynb"):
|
|
98
|
-
print("Notebook file must be an ipynb file.")
|
|
94
|
+
print(f"Notebook file must be an ipynb file: {notebook_file.name}.")
|
|
99
95
|
exit(1)
|
|
100
96
|
|
|
101
97
|
os.environ["DERIVA_HOST"] = args.host
|
|
@@ -106,22 +102,20 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
106
102
|
notebook_parameters = pm.inspect_notebook(notebook_file)
|
|
107
103
|
if args.inspect:
|
|
108
104
|
for param, value in notebook_parameters.items():
|
|
109
|
-
print(
|
|
110
|
-
f"{param}:{value['inferred_type_name']} (default {value['default']})"
|
|
111
|
-
)
|
|
105
|
+
print(f"{param}:{value['inferred_type_name']} (default {value['default']})")
|
|
112
106
|
return
|
|
113
107
|
else:
|
|
114
108
|
notebook_parameters = (
|
|
115
|
-
{"host": args.host, "catalog": args.catalog}
|
|
109
|
+
{"host": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
|
|
116
110
|
| {k: v["default"] for k, v in notebook_parameters.items()}
|
|
117
111
|
| parameters
|
|
118
112
|
)
|
|
119
113
|
print(f"Running notebook {notebook_file.name} with parameters:")
|
|
120
114
|
for param, value in notebook_parameters.items():
|
|
121
115
|
print(f" {param}:{value}")
|
|
122
|
-
self.run_notebook(notebook_file.resolve(), parameters, args.kernel)
|
|
116
|
+
self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
|
|
123
117
|
|
|
124
|
-
def run_notebook(self, notebook_file, parameters, kernel=None):
|
|
118
|
+
def run_notebook(self, notebook_file, parameters, kernel=None, log=False):
|
|
125
119
|
url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
|
|
126
120
|
os.environ["DERIVA_ML_WORKFLOW_URL"] = url
|
|
127
121
|
os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
|
|
@@ -133,6 +127,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
133
127
|
output_path=notebook_output,
|
|
134
128
|
parameters=parameters,
|
|
135
129
|
kernel_name=kernel,
|
|
130
|
+
log_output=log,
|
|
136
131
|
)
|
|
137
132
|
catalog_id = execution_rid = None
|
|
138
133
|
with Path(notebook_output).open("r") as f:
|
|
@@ -171,15 +166,13 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
171
166
|
|
|
172
167
|
def main():
|
|
173
168
|
"""Main entry point for the notebook runner CLI.
|
|
174
|
-
|
|
169
|
+
|
|
175
170
|
Creates and runs the DerivaMLRunNotebookCLI instance.
|
|
176
|
-
|
|
171
|
+
|
|
177
172
|
Returns:
|
|
178
173
|
None. Executes the CLI.
|
|
179
174
|
"""
|
|
180
|
-
cli = DerivaMLRunNotebookCLI(
|
|
181
|
-
description="Deriva ML Execution Script Demo", epilog=""
|
|
182
|
-
)
|
|
175
|
+
cli = DerivaMLRunNotebookCLI(description="Deriva ML Execution Script Demo", epilog="")
|
|
183
176
|
cli.main()
|
|
184
177
|
|
|
185
178
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
deriva_ml/__init__.py,sha256=_aMdxGG4mRTcXodLZLNpXqH8v5uqMbqFUryE9KqNSB8,1158
|
|
2
2
|
deriva_ml/demo_catalog.py,sha256=JjPAIac_hKPh5krEhGJydjXquRnivi7kQoR8W4Khp-s,14928
|
|
3
3
|
deriva_ml/feature.py,sha256=L1XUXLWGnUGCjkxX5KsGu0I8SaUTJG7eDs__yUCWuCY,8445
|
|
4
|
-
deriva_ml/run_notebook.py,sha256=
|
|
4
|
+
deriva_ml/run_notebook.py,sha256=BvLtkD_uizukqkAYoSUb1teIfuBWtKKcHSyJoAnAC4c,6496
|
|
5
5
|
deriva_ml/core/__init__.py,sha256=V_i90pc5PB1F4UdOO6DZWzpEFaZDTaPRU-EzKXQ19eI,787
|
|
6
|
-
deriva_ml/core/base.py,sha256
|
|
6
|
+
deriva_ml/core/base.py,sha256=-cV-mgADLwK2BAUxOauRB99GUDQpPS7qZP33DLXpRCY,61087
|
|
7
7
|
deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
|
|
8
8
|
deriva_ml/core/definitions.py,sha256=uq_8uYFBVBVHS691Ri2kdQsN37z0GNYTaZskJIb_ocM,1385
|
|
9
9
|
deriva_ml/core/enums.py,sha256=sSN4B4OynbB-AXwxRszoFr-KWIWIAfhVa06EzAEHwVc,7194
|
|
@@ -20,7 +20,7 @@ deriva_ml/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
|
20
20
|
deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
|
|
21
21
|
deriva_ml/execution/execution.py,sha256=tXWkFLDoSre836x6MMkcmhtmr3zP5_VoSioQ72-XmvE,44298
|
|
22
22
|
deriva_ml/execution/execution_configuration.py,sha256=Rw4VWkBCZN9yatvSKdTqEWTfu470lpcVKfHFR0uN0jI,6248
|
|
23
|
-
deriva_ml/execution/workflow.py,sha256=
|
|
23
|
+
deriva_ml/execution/workflow.py,sha256=65z9p0u4EKduLykn-noWRgf14B1lRPeYJr_0SbqYlA4,13681
|
|
24
24
|
deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
deriva_ml/model/catalog.py,sha256=dzTBcRlqgEVkPY32AUax_iu75RgFiT4Pu5au7rmrv8k,14068
|
|
26
26
|
deriva_ml/model/database.py,sha256=MlXQQFgFmGxZbRx-unRFoRttXwpJspV4v2AIgppttCU,14805
|
|
@@ -32,9 +32,9 @@ deriva_ml/schema/create_schema.py,sha256=0ydJSZEg3C3-m8hWPN6k2MoUvm-RWxAlKFzVChx
|
|
|
32
32
|
deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
|
|
33
33
|
deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
|
|
34
34
|
deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
|
|
35
|
-
deriva_ml-1.14.
|
|
36
|
-
deriva_ml-1.14.
|
|
37
|
-
deriva_ml-1.14.
|
|
38
|
-
deriva_ml-1.14.
|
|
39
|
-
deriva_ml-1.14.
|
|
40
|
-
deriva_ml-1.14.
|
|
35
|
+
deriva_ml-1.14.30.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
36
|
+
deriva_ml-1.14.30.dist-info/METADATA,sha256=c3IN9kLi-Es1kMa6LAxJ-hgr6BqFnOPfw7gd6o_xvpM,1034
|
|
37
|
+
deriva_ml-1.14.30.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
38
|
+
deriva_ml-1.14.30.dist-info/entry_points.txt,sha256=dkf_z7E4V6_3_5Xjsm0hcixNg6ASHDw6NfYQuBvF1Wc,363
|
|
39
|
+
deriva_ml-1.14.30.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
|
|
40
|
+
deriva_ml-1.14.30.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|