deriva-ml 1.14.42__py3-none-any.whl → 1.14.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1289,8 +1289,8 @@ class Dataset:
1289
1289
  {
1290
1290
  "processor": "fetch",
1291
1291
  "processor_params": {
1292
- "query_path": f"/attribute/{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5",
1293
- "output_path": f"asset/{table.name}",
1292
+ "query_path": f"/attribute/{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5,asset_rid:=RID",
1293
+ "output_path": "asset/{asset_rid}/" + table.name,
1294
1294
  },
1295
1295
  }
1296
1296
  )
@@ -1341,9 +1341,9 @@ class Dataset:
1341
1341
  "source": {
1342
1342
  "skip_root_path": False,
1343
1343
  "api": "attribute",
1344
- "path": f"{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5",
1344
+ "path": f"{spath}/!(URL::null::)/url:=URL,length:=Length,filename:=Filename,md5:=MD5, asset_rid:=RID",
1345
1345
  },
1346
- "destination": {"name": f"asset/{table.name}", "type": "fetch"},
1346
+ "destination": {"name": "asset/{asset_rid}/" + table.name, "type": "fetch"},
1347
1347
  }
1348
1348
  )
1349
1349
  return exports
@@ -306,7 +306,7 @@ def upload_directory(model: DerivaModel, directory: Path | str) -> dict[Any, Fil
306
306
  )
307
307
  try:
308
308
  uploader.getUpdatedConfig()
309
- uploader.scanDirectory(directory)
309
+ uploader.scanDirectory(directory, purge_state=True)
310
310
  results = {
311
311
  path: FileUploadState(
312
312
  state=UploadState(result["State"]),
@@ -236,6 +236,7 @@ class Execution:
236
236
  Raises:
237
237
  DerivaMLException: If initialization fails or configuration is invalid.
238
238
  """
239
+
239
240
  self.asset_paths: list[AssetFilePath] = []
240
241
  self.configuration = configuration
241
242
  self._ml_object = ml_object
@@ -378,6 +379,9 @@ class Execution:
378
379
  # save runtime env
379
380
  self._save_runtime_environment()
380
381
 
382
+ # Now upload the files so we have the info in case the execution fails.
383
+ self.uploaded_assets = self._upload_execution_dirs()
384
+
381
385
  self.start_time = datetime.now()
382
386
  self.update_status(Status.pending, "Initialize status finished.")
383
387
 
@@ -886,10 +890,10 @@ class Execution:
886
890
 
887
891
  file_name = Path(file_name)
888
892
  asset_path = asset_file_path(
889
- self._working_dir,
890
- self.execution_rid,
891
- self._model.name_to_table(asset_name),
892
- file_name.name,
893
+ prefix=self._working_dir,
894
+ exec_rid=self.execution_rid,
895
+ asset_table=self._model.name_to_table(asset_name),
896
+ file_name=file_name.name,
893
897
  metadata=kwargs,
894
898
  )
895
899
 
@@ -318,7 +318,7 @@ class Workflow(BaseModel):
318
318
  ]
319
319
  # Get the caller's filename, which is two up the stack from here.
320
320
  filename = Path(stack[-1])
321
- if not (filename.exists() or Workflow._in_repl()):
321
+ if not (filename.exists()) or Workflow._in_repl():
322
322
  # Being called from the command line interpreter.
323
323
  filename = Path.cwd() / Path("REPL")
324
324
  # Get the caller's filename, which is two up the stack from here.
deriva_ml/run_notebook.py CHANGED
@@ -6,11 +6,13 @@ import tempfile
6
6
  from datetime import datetime
7
7
  from pathlib import Path
8
8
 
9
+ import nbformat
9
10
  import papermill as pm
10
11
  import regex as re
11
12
  from deriva.core import BaseCLI
13
+ from nbconvert import MarkdownExporter
12
14
 
13
- from deriva_ml import DerivaML, ExecAssetType, MLAsset, Workflow
15
+ from deriva_ml import DerivaML, ExecAssetType, Execution, ExecutionConfiguration, MLAsset, Workflow
14
16
 
15
17
 
16
18
  class DerivaMLRunNotebookCLI(BaseCLI):
@@ -120,8 +122,8 @@ class DerivaMLRunNotebookCLI(BaseCLI):
120
122
  os.environ["DERIVA_ML_WORKFLOW_URL"] = url
121
123
  os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
122
124
  os.environ["DERIVA_ML_NOTEBOOK_PATH"] = notebook_file.as_posix()
123
-
124
125
  with tempfile.TemporaryDirectory() as tmpdirname:
126
+ print(f"Running notebook {notebook_file.name} with parameters:")
125
127
  notebook_output = Path(tmpdirname) / Path(notebook_file).name
126
128
  pm.execute_notebook(
127
129
  input_path=notebook_file,
@@ -130,6 +132,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
130
132
  kernel_name=kernel,
131
133
  log_output=log,
132
134
  )
135
+ print(f"Notebook output saved to {notebook_output}")
133
136
  catalog_id = execution_rid = None
134
137
  with Path(notebook_output).open("r") as f:
135
138
  for line in f:
@@ -143,25 +146,56 @@ class DerivaMLRunNotebookCLI(BaseCLI):
143
146
  if not execution_rid:
144
147
  print("Execution RID not found.")
145
148
  exit(1)
146
- print("Uploaded notebook output for Execution RID:", execution_rid)
147
149
 
148
- ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id)
150
+ ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id, working_dir=tmpdirname)
151
+ workflow_rid = ml_instance.retrieve_rid(execution_rid)["Workflow"]
152
+
153
+ execution = Execution(
154
+ configuration=ExecutionConfiguration(workflow=workflow_rid),
155
+ ml_object=ml_instance,
156
+ reload=execution_rid,
157
+ )
158
+
159
+ # Generate an HTML version of the output notebook.
160
+ notebook_output_md = notebook_output.with_suffix(".md")
161
+ with notebook_output.open() as f:
162
+ nb = nbformat.read(f, as_version=4)
163
+ # Convert to Markdown
164
+ exporter = MarkdownExporter()
165
+ (body, resources) = exporter.from_notebook_node(nb)
166
+
167
+ with notebook_output_md.open("w") as f:
168
+ f.write(body)
169
+ nb = nbformat.read(notebook_output, as_version=4)
149
170
 
150
- execution = ml_instance.restore_execution(execution_rid)
151
171
  execution.asset_file_path(
152
172
  asset_name=MLAsset.execution_asset,
153
173
  file_name=notebook_output,
154
174
  asset_types=ExecAssetType.notebook_output,
155
175
  )
176
+
177
+ execution.asset_file_path(
178
+ asset_name=MLAsset.execution_asset,
179
+ file_name=notebook_output_md,
180
+ asset_types=ExecAssetType.notebook_output,
181
+ )
182
+ execution.asset_file_path(
183
+ asset_name=MLAsset.execution_asset,
184
+ file_name=notebook_output_md,
185
+ asset_types=ExecAssetType.notebook_output,
186
+ )
187
+ print("parameter....")
188
+
156
189
  parameter_file = execution.asset_file_path(
157
190
  asset_name=MLAsset.execution_asset,
158
191
  file_name=f"notebook-parameters-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json",
159
192
  asset_types=ExecAssetType.input_file.value,
160
193
  )
194
+
161
195
  with Path(parameter_file).open("w") as f:
162
196
  json.dump(parameters, f)
163
-
164
197
  execution.upload_execution_outputs()
198
+
165
199
  print(ml_instance.cite(execution_rid))
166
200
 
167
201
 
@@ -178,4 +212,8 @@ def main():
178
212
 
179
213
 
180
214
  if __name__ == "__main__":
181
- main()
215
+ try:
216
+ main()
217
+ except Exception as e:
218
+ print(e)
219
+ exit(1)
@@ -309,7 +309,12 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
309
309
  "Description": "A file generated by an execution",
310
310
  },
311
311
  {"Name": "File", "Description": "A file that is not managed by Hatrac"},
312
+ {"Name": "Input_File", "Description": "A file input to an execution."},
312
313
  {"Name": "Model_File", "Description": "The ML model."},
314
+ {
315
+ "Name": "Notebook_Output",
316
+ "Description": "A Jupyter notebook with output cells filled from an execution.",
317
+ },
313
318
  ],
314
319
  defaults={"ID", "URI"},
315
320
  )
@@ -1,14 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.42
3
+ Version: 1.14.44
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: bump-my-version
10
+ Requires-Dist: bdbag
10
11
  Requires-Dist: deriva~=1.7.10
11
12
  Requires-Dist: deepdiff
13
+ Requires-Dist: nbconvert
12
14
  Requires-Dist: pandas
13
15
  Requires-Dist: regex~=2024.7.24
14
16
  Requires-Dist: pydantic>=2.11
@@ -3,7 +3,7 @@ deriva_ml/bump_version.py,sha256=KpHmkpEztly2QHYL4dyaIGdEMyP4F0D89rawyh5EDTs,398
3
3
  deriva_ml/demo_catalog.py,sha256=JjPAIac_hKPh5krEhGJydjXquRnivi7kQoR8W4Khp-s,14928
4
4
  deriva_ml/feature.py,sha256=6-aphkxdKjWa9oPSGFWxHcwAc_8hmWj-7I4M178YG5Y,8470
5
5
  deriva_ml/install_kernel.py,sha256=b62XY0SLViYO_Zye5r1Pl9qhYZyu_fk4KAO8NS1pxgM,2165
6
- deriva_ml/run_notebook.py,sha256=QDeYKv8Ug9lx36e-CMogDu1Y4omOFlkTXvMxLriulJQ,6604
6
+ deriva_ml/run_notebook.py,sha256=U3Bz-PdYYMIo_KZwDwlyKJtnZEMKwJjLh5Jn-M8ChUI,8039
7
7
  deriva_ml/core/__init__.py,sha256=V_i90pc5PB1F4UdOO6DZWzpEFaZDTaPRU-EzKXQ19eI,787
8
8
  deriva_ml/core/base.py,sha256=5H0GEMlKXUVRXBYXJQ25kxdscB6uX1wIKSUAM_CBOhI,61228
9
9
  deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
@@ -14,15 +14,15 @@ deriva_ml/core/exceptions.py,sha256=4MZNPOyN-UMaGeY9sqJDVwh_iOmz1ntp4usSyCNqVMg,
14
14
  deriva_ml/core/filespec.py,sha256=BQAAcRXfXq1lDcsKlokLOOXCBtEZpPgXxrFOIZYAgLg,4229
15
15
  deriva_ml/dataset/__init__.py,sha256=ukl2laJqa9J2AVqb4zlpIYc-3RaAlfRR33NMIQaoNrQ,104
16
16
  deriva_ml/dataset/aux_classes.py,sha256=9mZAln7_rrzaRbKhKA6dJOp3xeD6dHOC9NXOtJKROo4,6933
17
- deriva_ml/dataset/dataset.py,sha256=drL0YZovPpQC9G_eYe-fWuFQFysK2X-ad0vL4ZsYkf0,64396
17
+ deriva_ml/dataset/dataset.py,sha256=AU27ZtzDSpCodtbq9T-8AtqiA-x8r78wQvFBOCgaqsQ,64451
18
18
  deriva_ml/dataset/dataset_bag.py,sha256=mPIZRX5aTbVRcJbCFtdkmlnexquF8NE-onbVK_8IxVk,14224
19
19
  deriva_ml/dataset/history.py,sha256=FK5AYYz11p4E4FWMVg4r7UPWOD4eobrq3b3xMjWF59g,3197
20
- deriva_ml/dataset/upload.py,sha256=Ad5JDfGvkIvefE-plP8SN9pNAxHzYrBoid5isz_bnNs,16411
20
+ deriva_ml/dataset/upload.py,sha256=i_7KLfRSd2-THqZ1aG2OFAFGoyb8dJBCZZ5t1ftrtMQ,16429
21
21
  deriva_ml/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
23
- deriva_ml/execution/execution.py,sha256=NJT4qzZvvBXAlh73NVM39VE-uinSVBu2mHtuZD35G1M,44591
23
+ deriva_ml/execution/execution.py,sha256=JH2alK-7DoRzfGvjsrNhD_tZqoZkb736bxbbgXDP33o,44780
24
24
  deriva_ml/execution/execution_configuration.py,sha256=Rw4VWkBCZN9yatvSKdTqEWTfu470lpcVKfHFR0uN0jI,6248
25
- deriva_ml/execution/workflow.py,sha256=E5GkE1m76eWvL1_i88h8sBkNMNJEPH-3QXyqNSzSoec,13604
25
+ deriva_ml/execution/workflow.py,sha256=7CwPrgs3FKQHiEVus0PpK9w5hVKLKZnCrlu_nT8GFe8,13604
26
26
  deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  deriva_ml/model/catalog.py,sha256=dzTBcRlqgEVkPY32AUax_iu75RgFiT4Pu5au7rmrv8k,14068
28
28
  deriva_ml/model/database.py,sha256=SBkYFf0qwbGmvL0Xtn_n5DCz4roGfrhuYrM8G69Cy9Y,14837
@@ -30,13 +30,13 @@ deriva_ml/model/sql_mapper.py,sha256=_0QsJEVSgSPtxrWKSgjfPZCQ1aMVcjR_Tk2OxLhWEvY
30
30
  deriva_ml/schema/__init__.py,sha256=yV-MfzCF3FA4OOz7mZwMM2q6-x1vgOJ057kUvikFF6E,130
31
31
  deriva_ml/schema/annotations.py,sha256=TuQ3vWFnK0160fRmtvsCkHx9qAcRa63MSyERB4x5a98,18197
32
32
  deriva_ml/schema/check_schema.py,sha256=6dadLYHPqRex6AYVClmsESI8WhC7-rb-XnGf2G298xw,3609
33
- deriva_ml/schema/create_schema.py,sha256=IrnSfN0ufS3M31MD8M6ZWyfJidKllLPqDFBUDAIDPY0,12789
33
+ deriva_ml/schema/create_schema.py,sha256=9qK9_8SRQT-DwcEwTGSkhi3j2NaoH5EVgthvV2kO-gg,13042
34
34
  deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
35
35
  deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
36
36
  deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
37
- deriva_ml-1.14.42.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
- deriva_ml-1.14.42.dist-info/METADATA,sha256=kwydUUccmWn5SwtxH_HciAqeIyq2w1Pmk_IUo0VQvfc,1122
39
- deriva_ml-1.14.42.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- deriva_ml-1.14.42.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
41
- deriva_ml-1.14.42.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
42
- deriva_ml-1.14.42.dist-info/RECORD,,
37
+ deriva_ml-1.14.44.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
+ deriva_ml-1.14.44.dist-info/METADATA,sha256=Kq-OKAhKOm-JW2HbMoNsZIRWy8oT6v7-CYz4whIqCbo,1168
39
+ deriva_ml-1.14.44.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ deriva_ml-1.14.44.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
41
+ deriva_ml-1.14.44.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
42
+ deriva_ml-1.14.44.dist-info/RECORD,,