deriva-ml 1.14.41__py3-none-any.whl → 1.14.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -296,6 +296,7 @@ def upload_directory(model: DerivaModel, directory: Path | str) -> dict[Any, Fil
296
296
 
297
297
  with spec_file.open("w+") as cfile:
298
298
  json.dump(bulk_upload_configuration(model), cfile)
299
+ print("Creating GenericUploader...")
299
300
  uploader = GenericUploader(
300
301
  server={
301
302
  "host": model.hostname,
@@ -306,7 +307,7 @@ def upload_directory(model: DerivaModel, directory: Path | str) -> dict[Any, Fil
306
307
  )
307
308
  try:
308
309
  uploader.getUpdatedConfig()
309
- uploader.scanDirectory(directory)
310
+ uploader.scanDirectory(directory, purge_state=True)
310
311
  results = {
311
312
  path: FileUploadState(
312
313
  state=UploadState(result["State"]),
@@ -317,6 +318,7 @@ def upload_directory(model: DerivaModel, directory: Path | str) -> dict[Any, Fil
317
318
  }
318
319
  finally:
319
320
  uploader.cleanup()
321
+ print(f"Cleanup called...{uploader}")
320
322
  return results
321
323
 
322
324
 
@@ -236,6 +236,7 @@ class Execution:
236
236
  Raises:
237
237
  DerivaMLException: If initialization fails or configuration is invalid.
238
238
  """
239
+
239
240
  self.asset_paths: list[AssetFilePath] = []
240
241
  self.configuration = configuration
241
242
  self._ml_object = ml_object
@@ -378,6 +379,9 @@ class Execution:
378
379
  # save runtime env
379
380
  self._save_runtime_environment()
380
381
 
382
+ # Now upload the files so we have the info in case the execution fails.
383
+ self.uploaded_assets = self._upload_execution_dirs()
384
+
381
385
  self.start_time = datetime.now()
382
386
  self.update_status(Status.pending, "Initialize status finished.")
383
387
 
@@ -886,10 +890,10 @@ class Execution:
886
890
 
887
891
  file_name = Path(file_name)
888
892
  asset_path = asset_file_path(
889
- self._working_dir,
890
- self.execution_rid,
891
- self._model.name_to_table(asset_name),
892
- file_name.name,
893
+ prefix=self._working_dir,
894
+ exec_rid=self.execution_rid,
895
+ asset_table=self._model.name_to_table(asset_name),
896
+ file_name=file_name.name,
893
897
  metadata=kwargs,
894
898
  )
895
899
 
@@ -121,7 +121,7 @@ class Workflow(BaseModel):
121
121
  if "DERIVA_ML_WORKFLOW_URL" in os.environ:
122
122
  self.url = os.environ["DERIVA_ML_WORKFLOW_URL"]
123
123
  self.checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
124
- self.git_root = Workflow._get_git_root(Path(os.environ["DERIVA_ML_NOTEBOOK_PATHL"]))
124
+ self.git_root = Workflow._get_git_root(Path(os.environ["DERIVA_ML_NOTEBOOK_PATH"]))
125
125
  self.is_notebook = True
126
126
 
127
127
  if not self.url:
@@ -318,14 +318,12 @@ class Workflow(BaseModel):
318
318
  ]
319
319
  # Get the caller's filename, which is two up the stack from here.
320
320
  filename = Path(stack[-1])
321
- if not (filename.exists() or Workflow._in_repl()):
321
+ if not (filename.exists()) or Workflow._in_repl():
322
322
  # Being called from the command line interpreter.
323
323
  filename = Path.cwd() / Path("REPL")
324
324
  # Get the caller's filename, which is two up the stack from here.
325
- elif "PYTEST_CURRENT_TEST" in os.environ:
325
+ elif (not filename.exists()) and "PYTEST_CURRENT_TEST" in os.environ:
326
326
  filename = Path.cwd() / Path("pytest")
327
- else:
328
- raise DerivaMLException("Looking for caller failed") # Stack is too shallow
329
327
  return filename, is_notebook
330
328
 
331
329
  @staticmethod
deriva_ml/run_notebook.py CHANGED
@@ -6,11 +6,13 @@ import tempfile
6
6
  from datetime import datetime
7
7
  from pathlib import Path
8
8
 
9
+ import nbformat
9
10
  import papermill as pm
10
11
  import regex as re
11
12
  from deriva.core import BaseCLI
13
+ from nbconvert import HTMLExporter
12
14
 
13
- from deriva_ml import DerivaML, ExecAssetType, MLAsset, Workflow
15
+ from deriva_ml import DerivaML, ExecAssetType, Execution, ExecutionConfiguration, MLAsset, Workflow
14
16
 
15
17
 
16
18
  class DerivaMLRunNotebookCLI(BaseCLI):
@@ -120,8 +122,8 @@ class DerivaMLRunNotebookCLI(BaseCLI):
120
122
  os.environ["DERIVA_ML_WORKFLOW_URL"] = url
121
123
  os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
122
124
  os.environ["DERIVA_ML_NOTEBOOK_PATH"] = notebook_file.as_posix()
123
-
124
125
  with tempfile.TemporaryDirectory() as tmpdirname:
126
+ print(f"Running notebook {notebook_file.name} with parameters:")
125
127
  notebook_output = Path(tmpdirname) / Path(notebook_file).name
126
128
  pm.execute_notebook(
127
129
  input_path=notebook_file,
@@ -130,6 +132,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
130
132
  kernel_name=kernel,
131
133
  log_output=log,
132
134
  )
135
+ print(f"Notebook output saved to {notebook_output}")
133
136
  catalog_id = execution_rid = None
134
137
  with Path(notebook_output).open("r") as f:
135
138
  for line in f:
@@ -143,25 +146,51 @@ class DerivaMLRunNotebookCLI(BaseCLI):
143
146
  if not execution_rid:
144
147
  print("Execution RID not found.")
145
148
  exit(1)
146
- print("Uploaded notebook output for Execution RID:", execution_rid)
147
149
 
148
- ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id)
150
+ ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id, working_dir=tmpdirname)
151
+ workflow_rid = ml_instance.retrieve_rid(execution_rid)["Workflow"]
152
+
153
+ execution = Execution(
154
+ configuration=ExecutionConfiguration(workflow=workflow_rid),
155
+ ml_object=ml_instance,
156
+ reload=execution_rid,
157
+ )
158
+
159
+ # Generate an HTML version of the output notebook.
160
+ nb = nbformat.read(notebook_output, as_version=4)
161
+ html_exporter = HTMLExporter(template_name="classic")
162
+ body, resources = html_exporter.from_notebook_node(nb)
163
+ notebook_output_html = notebook_output.with_suffix(".html")
164
+ notebook_output_html.write_text(body, encoding="utf-8")
149
165
 
150
- execution = ml_instance.restore_execution(execution_rid)
151
166
  execution.asset_file_path(
152
167
  asset_name=MLAsset.execution_asset,
153
168
  file_name=notebook_output,
154
169
  asset_types=ExecAssetType.notebook_output,
155
170
  )
171
+
172
+ execution.asset_file_path(
173
+ asset_name=MLAsset.execution_asset,
174
+ file_name=notebook_output_html,
175
+ asset_types=ExecAssetType.notebook_output,
176
+ )
177
+ execution.asset_file_path(
178
+ asset_name=MLAsset.execution_asset,
179
+ file_name=notebook_output_html,
180
+ asset_types=ExecAssetType.notebook_output,
181
+ )
182
+ print("parameter....")
183
+
156
184
  parameter_file = execution.asset_file_path(
157
185
  asset_name=MLAsset.execution_asset,
158
186
  file_name=f"notebook-parameters-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json",
159
187
  asset_types=ExecAssetType.input_file.value,
160
188
  )
189
+
161
190
  with Path(parameter_file).open("w") as f:
162
191
  json.dump(parameters, f)
163
-
164
192
  execution.upload_execution_outputs()
193
+
165
194
  print(ml_instance.cite(execution_rid))
166
195
 
167
196
 
@@ -178,4 +207,8 @@ def main():
178
207
 
179
208
 
180
209
  if __name__ == "__main__":
181
- main()
210
+ try:
211
+ main()
212
+ except Exception as e:
213
+ print(e)
214
+ exit(1)
@@ -309,7 +309,12 @@ def initialize_ml_schema(model: Model, schema_name: str = "deriva-ml"):
309
309
  "Description": "A file generated by an execution",
310
310
  },
311
311
  {"Name": "File", "Description": "A file that is not managed by Hatrac"},
312
+ {"Name": "Input_File", "Description": "A file input to an execution."},
312
313
  {"Name": "Model_File", "Description": "The ML model."},
314
+ {
315
+ "Name": "Notebook_Output",
316
+ "Description": "A Jupyter notebook with output cells filled from an execution.",
317
+ },
313
318
  ],
314
319
  defaults={"ID", "URI"},
315
320
  )
@@ -1,14 +1,16 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.41
3
+ Version: 1.14.43
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: bump-my-version
10
+ Requires-Dist: bdbag
10
11
  Requires-Dist: deriva~=1.7.10
11
12
  Requires-Dist: deepdiff
13
+ Requires-Dist: nbconvert
12
14
  Requires-Dist: pandas
13
15
  Requires-Dist: regex~=2024.7.24
14
16
  Requires-Dist: pydantic>=2.11
@@ -3,7 +3,7 @@ deriva_ml/bump_version.py,sha256=KpHmkpEztly2QHYL4dyaIGdEMyP4F0D89rawyh5EDTs,398
3
3
  deriva_ml/demo_catalog.py,sha256=JjPAIac_hKPh5krEhGJydjXquRnivi7kQoR8W4Khp-s,14928
4
4
  deriva_ml/feature.py,sha256=6-aphkxdKjWa9oPSGFWxHcwAc_8hmWj-7I4M178YG5Y,8470
5
5
  deriva_ml/install_kernel.py,sha256=b62XY0SLViYO_Zye5r1Pl9qhYZyu_fk4KAO8NS1pxgM,2165
6
- deriva_ml/run_notebook.py,sha256=QDeYKv8Ug9lx36e-CMogDu1Y4omOFlkTXvMxLriulJQ,6604
6
+ deriva_ml/run_notebook.py,sha256=2pGv6dNTH0TuCz7E6GBS4MMzF5IyH-Tbv1i46rsJLLQ,7923
7
7
  deriva_ml/core/__init__.py,sha256=V_i90pc5PB1F4UdOO6DZWzpEFaZDTaPRU-EzKXQ19eI,787
8
8
  deriva_ml/core/base.py,sha256=5H0GEMlKXUVRXBYXJQ25kxdscB6uX1wIKSUAM_CBOhI,61228
9
9
  deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
@@ -17,12 +17,12 @@ deriva_ml/dataset/aux_classes.py,sha256=9mZAln7_rrzaRbKhKA6dJOp3xeD6dHOC9NXOtJKR
17
17
  deriva_ml/dataset/dataset.py,sha256=drL0YZovPpQC9G_eYe-fWuFQFysK2X-ad0vL4ZsYkf0,64396
18
18
  deriva_ml/dataset/dataset_bag.py,sha256=mPIZRX5aTbVRcJbCFtdkmlnexquF8NE-onbVK_8IxVk,14224
19
19
  deriva_ml/dataset/history.py,sha256=FK5AYYz11p4E4FWMVg4r7UPWOD4eobrq3b3xMjWF59g,3197
20
- deriva_ml/dataset/upload.py,sha256=Ad5JDfGvkIvefE-plP8SN9pNAxHzYrBoid5isz_bnNs,16411
20
+ deriva_ml/dataset/upload.py,sha256=7C9v4CmH3OGjBKdPl-OPvygseuAEtDjy3atSwmhgGBs,16524
21
21
  deriva_ml/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
23
- deriva_ml/execution/execution.py,sha256=NJT4qzZvvBXAlh73NVM39VE-uinSVBu2mHtuZD35G1M,44591
23
+ deriva_ml/execution/execution.py,sha256=JH2alK-7DoRzfGvjsrNhD_tZqoZkb736bxbbgXDP33o,44780
24
24
  deriva_ml/execution/execution_configuration.py,sha256=Rw4VWkBCZN9yatvSKdTqEWTfu470lpcVKfHFR0uN0jI,6248
25
- deriva_ml/execution/workflow.py,sha256=qMH3PlJRy6W5r8uKveZm8pbS5UEEWdUvZ1xyyQjdAOA,13688
25
+ deriva_ml/execution/workflow.py,sha256=7CwPrgs3FKQHiEVus0PpK9w5hVKLKZnCrlu_nT8GFe8,13604
26
26
  deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  deriva_ml/model/catalog.py,sha256=dzTBcRlqgEVkPY32AUax_iu75RgFiT4Pu5au7rmrv8k,14068
28
28
  deriva_ml/model/database.py,sha256=SBkYFf0qwbGmvL0Xtn_n5DCz4roGfrhuYrM8G69Cy9Y,14837
@@ -30,13 +30,13 @@ deriva_ml/model/sql_mapper.py,sha256=_0QsJEVSgSPtxrWKSgjfPZCQ1aMVcjR_Tk2OxLhWEvY
30
30
  deriva_ml/schema/__init__.py,sha256=yV-MfzCF3FA4OOz7mZwMM2q6-x1vgOJ057kUvikFF6E,130
31
31
  deriva_ml/schema/annotations.py,sha256=TuQ3vWFnK0160fRmtvsCkHx9qAcRa63MSyERB4x5a98,18197
32
32
  deriva_ml/schema/check_schema.py,sha256=6dadLYHPqRex6AYVClmsESI8WhC7-rb-XnGf2G298xw,3609
33
- deriva_ml/schema/create_schema.py,sha256=IrnSfN0ufS3M31MD8M6ZWyfJidKllLPqDFBUDAIDPY0,12789
33
+ deriva_ml/schema/create_schema.py,sha256=9qK9_8SRQT-DwcEwTGSkhi3j2NaoH5EVgthvV2kO-gg,13042
34
34
  deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
35
35
  deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
36
36
  deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
37
- deriva_ml-1.14.41.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
- deriva_ml-1.14.41.dist-info/METADATA,sha256=WTixw85aw7Qz8eFwBUcCCnf3WtjrLqng9XTc9S5ToZg,1122
39
- deriva_ml-1.14.41.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- deriva_ml-1.14.41.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
41
- deriva_ml-1.14.41.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
42
- deriva_ml-1.14.41.dist-info/RECORD,,
37
+ deriva_ml-1.14.43.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
+ deriva_ml-1.14.43.dist-info/METADATA,sha256=ktO6NAE5LHzYsCkS-CEK2OELB_Wqc97_3X7rhjLgrfY,1168
39
+ deriva_ml-1.14.43.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ deriva_ml-1.14.43.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
41
+ deriva_ml-1.14.43.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
42
+ deriva_ml-1.14.43.dist-info/RECORD,,