PyPI - deriva-ml - Versions diffs - 1.14.42__tar.gz → 1.14.44__tar.gz - Mend

deriva-ml 1.14.42tar.gz → 1.14.44tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/PKG-INFO RENAMED Viewed

@@ -1,14 +1,16 @@
 Metadata-Version: 2.4
 Name: deriva-ml
-Version: 1.14.42
+Version: 1.14.44
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: bump-my-version
+Requires-Dist: bdbag
 Requires-Dist: deriva~=1.7.10
 Requires-Dist: deepdiff
+Requires-Dist: nbconvert
 Requires-Dist: pandas
 Requires-Dist: regex~=2024.7.24
 Requires-Dist: pydantic>=2.11

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/pyproject.toml RENAMED Viewed

@@ -9,8 +9,10 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "bump-my-version",
+    "bdbag",
     "deriva~=1.7.10",
     "deepdiff",
+    "nbconvert",
     "pandas",
     "regex~=2024.7.24",
     "pydantic>=2.11",
@@ -40,6 +42,11 @@ build-backend = "setuptools.build_meta"
 [tool.uv]
+[tool.uv.sources]
+#bdbag = {git = "https://github.com/fair-research/bdbag", branch = "master" }
+#deriva = {git = "https://github.com/informatics-isi-edu/deriva-py", branch = "master" }
 [tool.setuptools.package-data]
 deriva_ml = ["schema/*.json"]

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/dataset/dataset.py RENAMED Viewed

@@ -1289,8 +1289,8 @@ class Dataset:
                 {
                     "processor": "fetch",
                     "processor_params": {
-                        "query_path": f"/attribute/{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5",
-                        "output_path": f"asset/{table.name}",
+                        "query_path": f"/attribute/{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5,asset_rid:=RID",
+                        "output_path": "asset/{asset_rid}/" + table.name,
                     },
                 }
             )
@@ -1341,9 +1341,9 @@ class Dataset:
                     "source": {
                         "skip_root_path": False,
                         "api": "attribute",
-                        "path": f"{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5",
+                        "path": f"{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5, asset_rid:=RID",
                     },
-                    "destination": {"name": f"asset/{table.name}", "type": "fetch"},
+                    "destination": {"name": "asset/{asset_rid}/" + table.name, "type": "fetch"},
                 }
             )
         return exports

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/dataset/upload.py RENAMED Viewed

@@ -306,7 +306,7 @@ def upload_directory(model: DerivaModel, directory: Path | str) -> dict[Any, Fil
         )
         try:
             uploader.getUpdatedConfig()
-            uploader.scanDirectory(directory)
+            uploader.scanDirectory(directory, purge_state=True)
             results = {
                 path: FileUploadState(
                     state=UploadState(result["State"]),

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/execution/execution.py RENAMED Viewed

@@ -236,6 +236,7 @@ class Execution:
         Raises:
             DerivaMLException: If initialization fails or configuration is invalid.
         """
         self.asset_paths: list[AssetFilePath] = []
         self.configuration = configuration
         self._ml_object = ml_object
@@ -378,6 +379,9 @@ class Execution:
             # save runtime env
             self._save_runtime_environment()
+            # Now upload the files so we have the info in case the execution fails.
+            self.uploaded_assets = self._upload_execution_dirs()
         self.start_time = datetime.now()
         self.update_status(Status.pending, "Initialize status finished.")
@@ -886,10 +890,10 @@ class Execution:
         file_name = Path(file_name)
         asset_path = asset_file_path(
-            self._working_dir,
-            self.execution_rid,
-            self._model.name_to_table(asset_name),
-            file_name.name,
+            prefix=self._working_dir,
+            exec_rid=self.execution_rid,
+            asset_table=self._model.name_to_table(asset_name),
+            file_name=file_name.name,
             metadata=kwargs,
         )

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/execution/workflow.py RENAMED Viewed

@@ -318,7 +318,7 @@ class Workflow(BaseModel):
             ]
             # Get the caller's filename, which is two up the stack from here.
             filename = Path(stack[-1])
-            if not (filename.exists() or Workflow._in_repl()):
+            if not (filename.exists()) or Workflow._in_repl():
                 # Being called from the command line interpreter.
                 filename = Path.cwd() / Path("REPL")
             # Get the caller's filename, which is two up the stack from here.

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/run_notebook.py RENAMED Viewed

@@ -6,11 +6,13 @@ import tempfile
 from datetime import datetime
 from pathlib import Path
+import nbformat
 import papermill as pm
 import regex as re
 from deriva.core import BaseCLI
+from nbconvert import MarkdownExporter
-from deriva_ml import DerivaML, ExecAssetType, MLAsset, Workflow
+from deriva_ml import DerivaML, ExecAssetType, Execution, ExecutionConfiguration, MLAsset, Workflow
 class DerivaMLRunNotebookCLI(BaseCLI):
@@ -120,8 +122,8 @@ class DerivaMLRunNotebookCLI(BaseCLI):
         os.environ["DERIVA_ML_WORKFLOW_URL"] = url
         os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
         os.environ["DERIVA_ML_NOTEBOOK_PATH"] = notebook_file.as_posix()
         with tempfile.TemporaryDirectory() as tmpdirname:
+            print(f"Running notebook {notebook_file.name} with parameters:")
             notebook_output = Path(tmpdirname) / Path(notebook_file).name
             pm.execute_notebook(
                 input_path=notebook_file,
@@ -130,6 +132,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
                 kernel_name=kernel,
                 log_output=log,
             )
+            print(f"Notebook output saved to {notebook_output}")
             catalog_id = execution_rid = None
             with Path(notebook_output).open("r") as f:
                 for line in f:
@@ -143,25 +146,56 @@ class DerivaMLRunNotebookCLI(BaseCLI):
             if not execution_rid:
                 print("Execution RID not found.")
                 exit(1)
-            print("Uploaded notebook output for Execution RID:", execution_rid)
-            ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id)
+            ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id, working_dir=tmpdirname)
+            workflow_rid = ml_instance.retrieve_rid(execution_rid)["Workflow"]
+            execution = Execution(
+                configuration=ExecutionConfiguration(workflow=workflow_rid),
+                ml_object=ml_instance,
+                reload=execution_rid,
+            )
+            # Generate an HTML version of the output notebook.
+            notebook_output_md = notebook_output.with_suffix(".md")
+            with notebook_output.open() as f:
+                nb = nbformat.read(f, as_version=4)
+            # Convert to Markdown
+            exporter = MarkdownExporter()
+            (body, resources) = exporter.from_notebook_node(nb)
+            with notebook_output_md.open("w") as f:
+                f.write(body)
+            nb = nbformat.read(notebook_output, as_version=4)
-            execution = ml_instance.restore_execution(execution_rid)
             execution.asset_file_path(
                 asset_name=MLAsset.execution_asset,
                 file_name=notebook_output,
                 asset_types=ExecAssetType.notebook_output,
             )
+            execution.asset_file_path(
+                asset_name=MLAsset.execution_asset,
+                file_name=notebook_output_md,
+                asset_types=ExecAssetType.notebook_output,
+            )
+            execution.asset_file_path(
+                asset_name=MLAsset.execution_asset,
+                file_name=notebook_output_md,
+                asset_types=ExecAssetType.notebook_output,
+            )
+            print("parameter....")
             parameter_file = execution.asset_file_path(
                 asset_name=MLAsset.execution_asset,
                 file_name=f"notebook-parameters-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json",
                 asset_types=ExecAssetType.input_file.value,
             )
             with Path(parameter_file).open("w") as f:
                 json.dump(parameters, f)
             execution.upload_execution_outputs()
             print(ml_instance.cite(execution_rid))
@@ -178,4 +212,8 @@ def main():
 if __name__ == "__main__":
-    main()
+    try:
+        main()
+    except Exception as e:
+        print(e)
+        exit(1)

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml/schema/create_schema.py RENAMED Viewed

@@ -309,7 +309,12 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
                 "Description": "A file generated by an execution",
             },
             {"Name": "File", "Description": "A file that is not managed by Hatrac"},
+            {"Name": "Input_File", "Description": "A file input to an execution."},
             {"Name": "Model_File", "Description": "The ML model."},
+            {
+                "Name": "Notebook_Output",
+                "Description": "A Jupyter notebook with output cells filled from an execution.",
+            },
         ],
         defaults={"ID", "URI"},
     )

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml.egg-info/PKG-INFO RENAMED Viewed

@@ -1,14 +1,16 @@
 Metadata-Version: 2.4
 Name: deriva-ml
-Version: 1.14.42
+Version: 1.14.44
 Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
 Author-email: ISRD <isrd-dev@isi.edu>
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: bump-my-version
+Requires-Dist: bdbag
 Requires-Dist: deriva~=1.7.10
 Requires-Dist: deepdiff
+Requires-Dist: nbconvert
 Requires-Dist: pandas
 Requires-Dist: regex~=2024.7.24
 Requires-Dist: pydantic>=2.11

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml.egg-info/SOURCES.txt RENAMED Viewed

@@ -93,7 +93,6 @@ tests/dataset/__init__.py
 tests/dataset/demo-catalog-schema.json
 tests/dataset/deriva-ml-reference.json
 tests/dataset/eye-ai-catalog-schema.json
-tests/dataset/test_dataset_export.py
 tests/dataset/test_dataset_version.py
 tests/dataset/test_datasets.py
 tests/dataset/test_download.py

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/src/deriva_ml.egg-info/requires.txt RENAMED Viewed

@@ -1,6 +1,8 @@
 bump-my-version
+bdbag
 deriva~=1.7.10
 deepdiff
+nbconvert
 pandas
 regex~=2024.7.24
 pydantic>=2.11

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/execution/test_execution.py RENAMED Viewed

@@ -7,8 +7,6 @@ from pathlib import Path
 from tempfile import TemporaryDirectory
 from deriva_ml import (
-    BuiltinTypes,
-    ColumnDefinition,
     DatasetSpec,
     DerivaML,
     ExecAssetType,
@@ -101,6 +99,9 @@ class TestWorkflow:
         workflow_url = workflows[0]["URL"]
         assert workflow_url.endswith("workflow-test.ipynb")
+        # Check to make sure that result notebook and HTML version got uploaded.
+        execution_assets = ml_instance.list_assets("Execution_Asset")
 class TestExecution:
     def test_execution_no_download(self, test_ml):
@@ -193,43 +194,6 @@ class TestExecution:
             assert 1 == len(execution.datasets)
             assert execution.datasets[0].dataset_rid == dataset_rid
-    def test_download_asset(self, test_ml, tmp_path):
-        ml_instance = test_ml
-        # Create a workflow
-        ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
-        ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
-        api_workflow = ml_instance.create_workflow(
-            name="Test Workflow One",
-            workflow_type="Test Workflow",
-            description="A test operation",
-        )
-        ml_instance.create_asset(
-            "BarAsset",
-            column_defs=[ColumnDefinition(name="foo", type=BuiltinTypes.int4)],
-        )
-        manual_execution = ml_instance.create_execution(
-            ExecutionConfiguration(description="Sample Execution", workflow=api_workflow)
-        )
-        with manual_execution.execute() as execution:
-            model_file = execution.asset_file_path(
-                "BarAsset", "API_Model/modelfile.txt", asset_types=ExecAssetType.model_file, foo=23
-            )
-            print(model_file)
-            with model_file.open("w") as fp:
-                fp.write("My model")
-            # Now upload the file and retrieve the RID of the new asset from the returned results.
-        uploaded_assets = manual_execution.upload_execution_outputs()
-        print(uploaded_assets)
-        assert 1 == len(uploaded_assets["deriva-ml/Execution_Asset"])
-        file = manual_execution.download_asset(asset_rid, tmpdir, update_catalog=False)
-        assert file.name == "modelfile.txt"
-        assert "BarAsset" in [a.name for a in ml_instance.model.find_assets()]
-        assert ml_instance.model.asset_metadata("BarAsset") == {"foo"}
     @staticmethod
     def create_execution_asset(ml_instance: DerivaML, api_workflow):
         manual_execution = ml_instance.create_execution(

{deriva_ml-1.14.42 → deriva_ml-1.14.44}/tests/execution/workflow-test.ipynb RENAMED Viewed

@@ -17,7 +17,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from deriva_ml import DerivaML, MLVocab as vc\n",
+    "from deriva_ml import DerivaML, MLVocab as vc, ExecutionConfiguration\n",
     "import os"
    ]
   },
@@ -66,7 +66,13 @@
     "     workflow_type=\"Test Workflow\",\n",
     "     description=\"A test operation\",\n",
     " )\n",
-    "rid = ml_instance.add_workflow(api_workflow)"
+    "rid = ml_instance.add_workflow(api_workflow)\n",
+    "\n",
+    "execution_config = ExecutionConfiguration(description=\"Sample Execution\", workflow=api_workflow)\n",
+    "execution = ml_instance.create_execution(execution_config)\n",
+    "with execution.execute() as e:\n",
+    "    pass\n",
+    "execution.upload_execution_outputs()\n"
    ]
   }
  ],

deriva-ml 1.14.42__tar.gz → 1.14.44__tar.gz

deriva-ml 1.14.42tar.gz → 1.14.44tar.gz