deriva-ml 1.14.42__tar.gz → 1.14.44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/PKG-INFO +3 -1
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/pyproject.toml +7 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/dataset/dataset.py +4 -4
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/dataset/upload.py +1 -1
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/execution/execution.py +8 -4
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/execution/workflow.py +1 -1
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/run_notebook.py +45 -7
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/schema/create_schema.py +5 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml.egg-info/PKG-INFO +3 -1
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml.egg-info/SOURCES.txt +0 -1
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml.egg-info/requires.txt +2 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/execution/test_execution.py +3 -39
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/execution/workflow-test.ipynb +8 -2
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/uv.lock +676 -597
- deriva_ml-1.14.42/tests/dataset/test_dataset_export.py +0 -30
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/.github/release-drafter.yml +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/.github/workflows/publish-docs.yml +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/.github/workflows/release.yml +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/.gitignore +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/LICENSE +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/README.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/.DS_Store +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/Notebooks/DerivaML Features.ipynb +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/assets/ERD.png +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/assets/Launcher.png +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/assets/copy_minid.png +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/assets/deriva-logo.png +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/assets/deriva-ml.pdf +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/assets/sharing-at-home.pdf +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/dataset.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/dataset_aux_classes.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/dataset_bag.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/deriva_definitions.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/deriva_ml_base.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/deriva_model.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/execution.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/execution_configuration.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/feature.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/code-docs/upload.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/index.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/release-notes.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/user-guide/datasets.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/user-guide/deriva_ml_structure.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/user-guide/execution-configuration.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/user-guide/file-assets.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/user-guide/identifiers.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/user-guide/install.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/user-guide/notebooks.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/docs/user-guide/overview.md +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/mkdocs.yml +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/setup.cfg +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/bump_version.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/core/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/core/base.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/core/constants.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/core/definitions.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/core/enums.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/core/ermrest.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/core/exceptions.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/core/filespec.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/dataset/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/dataset/aux_classes.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/dataset/dataset_bag.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/dataset/history.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/demo_catalog.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/execution/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/execution/environment.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/execution/execution_configuration.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/feature.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/install_kernel.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/model/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/model/catalog.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/model/database.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/model/sql_mapper.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/schema/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/schema/annotations.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/schema/check_schema.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/schema/policy.json +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/schema/table_comments_utils.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml.egg-info/entry_points.txt +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml.egg-info/top_level.txt +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/conftest.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/core/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/core/test_basic_tables.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/core/test_file.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/core/test_vocabulary.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/dataset/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/dataset/demo-catalog-schema.json +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/dataset/deriva-ml-reference.json +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/dataset/eye-ai-catalog-schema.json +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/dataset/test_dataset_version.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/dataset/test_datasets.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/dataset/test_download.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/execution/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/execution/workflow-test.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/feature/test_features.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/model/__init__.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/model/test_database.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/model/test_models.py +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/test-files/execution-parameters.json +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/test-files/notebook-parameters.json +0 -0
- {deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/test_utils.py +0 -0
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.14.
|
|
3
|
+
Version: 1.14.44
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
Requires-Dist: bump-my-version
|
|
10
|
+
Requires-Dist: bdbag
|
|
10
11
|
Requires-Dist: deriva~=1.7.10
|
|
11
12
|
Requires-Dist: deepdiff
|
|
13
|
+
Requires-Dist: nbconvert
|
|
12
14
|
Requires-Dist: pandas
|
|
13
15
|
Requires-Dist: regex~=2024.7.24
|
|
14
16
|
Requires-Dist: pydantic>=2.11
|
|
@@ -9,8 +9,10 @@ readme = "README.md"
|
|
|
9
9
|
requires-python = ">=3.10"
|
|
10
10
|
dependencies = [
|
|
11
11
|
"bump-my-version",
|
|
12
|
+
"bdbag",
|
|
12
13
|
"deriva~=1.7.10",
|
|
13
14
|
"deepdiff",
|
|
15
|
+
"nbconvert",
|
|
14
16
|
"pandas",
|
|
15
17
|
"regex~=2024.7.24",
|
|
16
18
|
"pydantic>=2.11",
|
|
@@ -40,6 +42,11 @@ build-backend = "setuptools.build_meta"
|
|
|
40
42
|
|
|
41
43
|
[tool.uv]
|
|
42
44
|
|
|
45
|
+
[tool.uv.sources]
|
|
46
|
+
#bdbag = {git = "https://github.com/fair-research/bdbag", branch = "master" }
|
|
47
|
+
#deriva = {git = "https://github.com/informatics-isi-edu/deriva-py", branch = "master" }
|
|
48
|
+
|
|
49
|
+
|
|
43
50
|
[tool.setuptools.package-data]
|
|
44
51
|
deriva_ml = ["schema/*.json"]
|
|
45
52
|
|
|
@@ -1289,8 +1289,8 @@ class Dataset:
|
|
|
1289
1289
|
{
|
|
1290
1290
|
"processor": "fetch",
|
|
1291
1291
|
"processor_params": {
|
|
1292
|
-
"query_path": f"/attribute/{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5",
|
|
1293
|
-
"output_path":
|
|
1292
|
+
"query_path": f"/attribute/{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5,asset_rid:=RID",
|
|
1293
|
+
"output_path": "asset/{asset_rid}/" + table.name,
|
|
1294
1294
|
},
|
|
1295
1295
|
}
|
|
1296
1296
|
)
|
|
@@ -1341,9 +1341,9 @@ class Dataset:
|
|
|
1341
1341
|
"source": {
|
|
1342
1342
|
"skip_root_path": False,
|
|
1343
1343
|
"api": "attribute",
|
|
1344
|
-
"path": f"{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5",
|
|
1344
|
+
"path": f"{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5, asset_rid:=RID",
|
|
1345
1345
|
},
|
|
1346
|
-
"destination": {"name":
|
|
1346
|
+
"destination": {"name": "asset/{asset_rid}/" + table.name, "type": "fetch"},
|
|
1347
1347
|
}
|
|
1348
1348
|
)
|
|
1349
1349
|
return exports
|
|
@@ -306,7 +306,7 @@ def upload_directory(model: DerivaModel, directory: Path | str) -> dict[Any, Fil
|
|
|
306
306
|
)
|
|
307
307
|
try:
|
|
308
308
|
uploader.getUpdatedConfig()
|
|
309
|
-
uploader.scanDirectory(directory)
|
|
309
|
+
uploader.scanDirectory(directory, purge_state=True)
|
|
310
310
|
results = {
|
|
311
311
|
path: FileUploadState(
|
|
312
312
|
state=UploadState(result["State"]),
|
|
@@ -236,6 +236,7 @@ class Execution:
|
|
|
236
236
|
Raises:
|
|
237
237
|
DerivaMLException: If initialization fails or configuration is invalid.
|
|
238
238
|
"""
|
|
239
|
+
|
|
239
240
|
self.asset_paths: list[AssetFilePath] = []
|
|
240
241
|
self.configuration = configuration
|
|
241
242
|
self._ml_object = ml_object
|
|
@@ -378,6 +379,9 @@ class Execution:
|
|
|
378
379
|
# save runtime env
|
|
379
380
|
self._save_runtime_environment()
|
|
380
381
|
|
|
382
|
+
# Now upload the files so we have the info in case the execution fails.
|
|
383
|
+
self.uploaded_assets = self._upload_execution_dirs()
|
|
384
|
+
|
|
381
385
|
self.start_time = datetime.now()
|
|
382
386
|
self.update_status(Status.pending, "Initialize status finished.")
|
|
383
387
|
|
|
@@ -886,10 +890,10 @@ class Execution:
|
|
|
886
890
|
|
|
887
891
|
file_name = Path(file_name)
|
|
888
892
|
asset_path = asset_file_path(
|
|
889
|
-
self._working_dir,
|
|
890
|
-
self.execution_rid,
|
|
891
|
-
self._model.name_to_table(asset_name),
|
|
892
|
-
file_name.name,
|
|
893
|
+
prefix=self._working_dir,
|
|
894
|
+
exec_rid=self.execution_rid,
|
|
895
|
+
asset_table=self._model.name_to_table(asset_name),
|
|
896
|
+
file_name=file_name.name,
|
|
893
897
|
metadata=kwargs,
|
|
894
898
|
)
|
|
895
899
|
|
|
@@ -318,7 +318,7 @@ class Workflow(BaseModel):
|
|
|
318
318
|
]
|
|
319
319
|
# Get the caller's filename, which is two up the stack from here.
|
|
320
320
|
filename = Path(stack[-1])
|
|
321
|
-
if not (filename.exists() or Workflow._in_repl()
|
|
321
|
+
if not (filename.exists()) or Workflow._in_repl():
|
|
322
322
|
# Being called from the command line interpreter.
|
|
323
323
|
filename = Path.cwd() / Path("REPL")
|
|
324
324
|
# Get the caller's filename, which is two up the stack from here.
|
|
@@ -6,11 +6,13 @@ import tempfile
|
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
|
|
9
|
+
import nbformat
|
|
9
10
|
import papermill as pm
|
|
10
11
|
import regex as re
|
|
11
12
|
from deriva.core import BaseCLI
|
|
13
|
+
from nbconvert import MarkdownExporter
|
|
12
14
|
|
|
13
|
-
from deriva_ml import DerivaML, ExecAssetType, MLAsset, Workflow
|
|
15
|
+
from deriva_ml import DerivaML, ExecAssetType, Execution, ExecutionConfiguration, MLAsset, Workflow
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class DerivaMLRunNotebookCLI(BaseCLI):
|
|
@@ -120,8 +122,8 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
120
122
|
os.environ["DERIVA_ML_WORKFLOW_URL"] = url
|
|
121
123
|
os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
|
|
122
124
|
os.environ["DERIVA_ML_NOTEBOOK_PATH"] = notebook_file.as_posix()
|
|
123
|
-
|
|
124
125
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
126
|
+
print(f"Running notebook {notebook_file.name} with parameters:")
|
|
125
127
|
notebook_output = Path(tmpdirname) / Path(notebook_file).name
|
|
126
128
|
pm.execute_notebook(
|
|
127
129
|
input_path=notebook_file,
|
|
@@ -130,6 +132,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
130
132
|
kernel_name=kernel,
|
|
131
133
|
log_output=log,
|
|
132
134
|
)
|
|
135
|
+
print(f"Notebook output saved to {notebook_output}")
|
|
133
136
|
catalog_id = execution_rid = None
|
|
134
137
|
with Path(notebook_output).open("r") as f:
|
|
135
138
|
for line in f:
|
|
@@ -143,25 +146,56 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
143
146
|
if not execution_rid:
|
|
144
147
|
print("Execution RID not found.")
|
|
145
148
|
exit(1)
|
|
146
|
-
print("Uploaded notebook output for Execution RID:", execution_rid)
|
|
147
149
|
|
|
148
|
-
ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id)
|
|
150
|
+
ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id, working_dir=tmpdirname)
|
|
151
|
+
workflow_rid = ml_instance.retrieve_rid(execution_rid)["Workflow"]
|
|
152
|
+
|
|
153
|
+
execution = Execution(
|
|
154
|
+
configuration=ExecutionConfiguration(workflow=workflow_rid),
|
|
155
|
+
ml_object=ml_instance,
|
|
156
|
+
reload=execution_rid,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Generate an HTML version of the output notebook.
|
|
160
|
+
notebook_output_md = notebook_output.with_suffix(".md")
|
|
161
|
+
with notebook_output.open() as f:
|
|
162
|
+
nb = nbformat.read(f, as_version=4)
|
|
163
|
+
# Convert to Markdown
|
|
164
|
+
exporter = MarkdownExporter()
|
|
165
|
+
(body, resources) = exporter.from_notebook_node(nb)
|
|
166
|
+
|
|
167
|
+
with notebook_output_md.open("w") as f:
|
|
168
|
+
f.write(body)
|
|
169
|
+
nb = nbformat.read(notebook_output, as_version=4)
|
|
149
170
|
|
|
150
|
-
execution = ml_instance.restore_execution(execution_rid)
|
|
151
171
|
execution.asset_file_path(
|
|
152
172
|
asset_name=MLAsset.execution_asset,
|
|
153
173
|
file_name=notebook_output,
|
|
154
174
|
asset_types=ExecAssetType.notebook_output,
|
|
155
175
|
)
|
|
176
|
+
|
|
177
|
+
execution.asset_file_path(
|
|
178
|
+
asset_name=MLAsset.execution_asset,
|
|
179
|
+
file_name=notebook_output_md,
|
|
180
|
+
asset_types=ExecAssetType.notebook_output,
|
|
181
|
+
)
|
|
182
|
+
execution.asset_file_path(
|
|
183
|
+
asset_name=MLAsset.execution_asset,
|
|
184
|
+
file_name=notebook_output_md,
|
|
185
|
+
asset_types=ExecAssetType.notebook_output,
|
|
186
|
+
)
|
|
187
|
+
print("parameter....")
|
|
188
|
+
|
|
156
189
|
parameter_file = execution.asset_file_path(
|
|
157
190
|
asset_name=MLAsset.execution_asset,
|
|
158
191
|
file_name=f"notebook-parameters-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json",
|
|
159
192
|
asset_types=ExecAssetType.input_file.value,
|
|
160
193
|
)
|
|
194
|
+
|
|
161
195
|
with Path(parameter_file).open("w") as f:
|
|
162
196
|
json.dump(parameters, f)
|
|
163
|
-
|
|
164
197
|
execution.upload_execution_outputs()
|
|
198
|
+
|
|
165
199
|
print(ml_instance.cite(execution_rid))
|
|
166
200
|
|
|
167
201
|
|
|
@@ -178,4 +212,8 @@ def main():
|
|
|
178
212
|
|
|
179
213
|
|
|
180
214
|
if __name__ == "__main__":
|
|
181
|
-
|
|
215
|
+
try:
|
|
216
|
+
main()
|
|
217
|
+
except Exception as e:
|
|
218
|
+
print(e)
|
|
219
|
+
exit(1)
|
|
@@ -309,7 +309,12 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
|
|
|
309
309
|
"Description": "A file generated by an execution",
|
|
310
310
|
},
|
|
311
311
|
{"Name": "File", "Description": "A file that is not managed by Hatrac"},
|
|
312
|
+
{"Name": "Input_File", "Description": "A file input to an execution."},
|
|
312
313
|
{"Name": "Model_File", "Description": "The ML model."},
|
|
314
|
+
{
|
|
315
|
+
"Name": "Notebook_Output",
|
|
316
|
+
"Description": "A Jupyter notebook with output cells filled from an execution.",
|
|
317
|
+
},
|
|
313
318
|
],
|
|
314
319
|
defaults={"ID", "URI"},
|
|
315
320
|
)
|
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.14.
|
|
3
|
+
Version: 1.14.44
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
Requires-Dist: bump-my-version
|
|
10
|
+
Requires-Dist: bdbag
|
|
10
11
|
Requires-Dist: deriva~=1.7.10
|
|
11
12
|
Requires-Dist: deepdiff
|
|
13
|
+
Requires-Dist: nbconvert
|
|
12
14
|
Requires-Dist: pandas
|
|
13
15
|
Requires-Dist: regex~=2024.7.24
|
|
14
16
|
Requires-Dist: pydantic>=2.11
|
|
@@ -93,7 +93,6 @@ tests/dataset/__init__.py
|
|
|
93
93
|
tests/dataset/demo-catalog-schema.json
|
|
94
94
|
tests/dataset/deriva-ml-reference.json
|
|
95
95
|
tests/dataset/eye-ai-catalog-schema.json
|
|
96
|
-
tests/dataset/test_dataset_export.py
|
|
97
96
|
tests/dataset/test_dataset_version.py
|
|
98
97
|
tests/dataset/test_datasets.py
|
|
99
98
|
tests/dataset/test_download.py
|
|
@@ -7,8 +7,6 @@ from pathlib import Path
|
|
|
7
7
|
from tempfile import TemporaryDirectory
|
|
8
8
|
|
|
9
9
|
from deriva_ml import (
|
|
10
|
-
BuiltinTypes,
|
|
11
|
-
ColumnDefinition,
|
|
12
10
|
DatasetSpec,
|
|
13
11
|
DerivaML,
|
|
14
12
|
ExecAssetType,
|
|
@@ -101,6 +99,9 @@ class TestWorkflow:
|
|
|
101
99
|
workflow_url = workflows[0]["URL"]
|
|
102
100
|
assert workflow_url.endswith("workflow-test.ipynb")
|
|
103
101
|
|
|
102
|
+
# Check to make sure that result notebook and HTML version got uploaded.
|
|
103
|
+
execution_assets = ml_instance.list_assets("Execution_Asset")
|
|
104
|
+
|
|
104
105
|
|
|
105
106
|
class TestExecution:
|
|
106
107
|
def test_execution_no_download(self, test_ml):
|
|
@@ -193,43 +194,6 @@ class TestExecution:
|
|
|
193
194
|
assert 1 == len(execution.datasets)
|
|
194
195
|
assert execution.datasets[0].dataset_rid == dataset_rid
|
|
195
196
|
|
|
196
|
-
def test_download_asset(self, test_ml, tmp_path):
|
|
197
|
-
ml_instance = test_ml
|
|
198
|
-
|
|
199
|
-
# Create a workflow
|
|
200
|
-
ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
|
|
201
|
-
ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
|
|
202
|
-
api_workflow = ml_instance.create_workflow(
|
|
203
|
-
name="Test Workflow One",
|
|
204
|
-
workflow_type="Test Workflow",
|
|
205
|
-
description="A test operation",
|
|
206
|
-
)
|
|
207
|
-
ml_instance.create_asset(
|
|
208
|
-
"BarAsset",
|
|
209
|
-
column_defs=[ColumnDefinition(name="foo", type=BuiltinTypes.int4)],
|
|
210
|
-
)
|
|
211
|
-
manual_execution = ml_instance.create_execution(
|
|
212
|
-
ExecutionConfiguration(description="Sample Execution", workflow=api_workflow)
|
|
213
|
-
)
|
|
214
|
-
|
|
215
|
-
with manual_execution.execute() as execution:
|
|
216
|
-
model_file = execution.asset_file_path(
|
|
217
|
-
"BarAsset", "API_Model/modelfile.txt", asset_types=ExecAssetType.model_file, foo=23
|
|
218
|
-
)
|
|
219
|
-
print(model_file)
|
|
220
|
-
with model_file.open("w") as fp:
|
|
221
|
-
fp.write("My model")
|
|
222
|
-
# Now upload the file and retrieve the RID of the new asset from the returned results.
|
|
223
|
-
uploaded_assets = manual_execution.upload_execution_outputs()
|
|
224
|
-
print(uploaded_assets)
|
|
225
|
-
assert 1 == len(uploaded_assets["deriva-ml/Execution_Asset"])
|
|
226
|
-
|
|
227
|
-
file = manual_execution.download_asset(asset_rid, tmpdir, update_catalog=False)
|
|
228
|
-
assert file.name == "modelfile.txt"
|
|
229
|
-
|
|
230
|
-
assert "BarAsset" in [a.name for a in ml_instance.model.find_assets()]
|
|
231
|
-
assert ml_instance.model.asset_metadata("BarAsset") == {"foo"}
|
|
232
|
-
|
|
233
197
|
@staticmethod
|
|
234
198
|
def create_execution_asset(ml_instance: DerivaML, api_workflow):
|
|
235
199
|
manual_execution = ml_instance.create_execution(
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
"metadata": {},
|
|
18
18
|
"outputs": [],
|
|
19
19
|
"source": [
|
|
20
|
-
"from deriva_ml import DerivaML, MLVocab as vc\n",
|
|
20
|
+
"from deriva_ml import DerivaML, MLVocab as vc, ExecutionConfiguration\n",
|
|
21
21
|
"import os"
|
|
22
22
|
]
|
|
23
23
|
},
|
|
@@ -66,7 +66,13 @@
|
|
|
66
66
|
" workflow_type=\"Test Workflow\",\n",
|
|
67
67
|
" description=\"A test operation\",\n",
|
|
68
68
|
" )\n",
|
|
69
|
-
"rid = ml_instance.add_workflow(api_workflow)"
|
|
69
|
+
"rid = ml_instance.add_workflow(api_workflow)\n",
|
|
70
|
+
"\n",
|
|
71
|
+
"execution_config = ExecutionConfiguration(description=\"Sample Execution\", workflow=api_workflow)\n",
|
|
72
|
+
"execution = ml_instance.create_execution(execution_config)\n",
|
|
73
|
+
"with execution.execute() as e:\n",
|
|
74
|
+
" pass\n",
|
|
75
|
+
"execution.upload_execution_outputs()\n"
|
|
70
76
|
]
|
|
71
77
|
}
|
|
72
78
|
],
|