deriva-ml 1.12.0__tar.gz → 1.12.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {deriva_ml-1.12.0/src/deriva_ml.egg-info → deriva_ml-1.12.2}/PKG-INFO +1 -1
  2. deriva_ml-1.12.2/docs/user-guide/file-assets.md +3 -0
  3. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/deriva_ml_base.py +1 -5
  4. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/execution.py +2 -1
  5. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/upload.py +13 -13
  6. {deriva_ml-1.12.0 → deriva_ml-1.12.2/src/deriva_ml.egg-info}/PKG-INFO +1 -1
  7. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml.egg-info/SOURCES.txt +1 -0
  8. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/.github/workflows/publish-docs.yml +0 -0
  9. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/.gitignore +0 -0
  10. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/LICENSE +0 -0
  11. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/README.md +0 -0
  12. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/.DS_Store +0 -0
  13. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  14. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  15. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
  16. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  17. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  18. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/assets/ERD.png +0 -0
  19. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/assets/Launcher.png +0 -0
  20. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/assets/copy_minid.png +0 -0
  21. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/assets/deriva-logo.png +0 -0
  22. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/assets/deriva-ml.pdf +0 -0
  23. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/assets/sharing-at-home.pdf +0 -0
  24. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/code-docs/dataset.md +0 -0
  25. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/code-docs/dataset_aux_classes.md +0 -0
  26. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/code-docs/dataset_bag.md +0 -0
  27. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/code-docs/deriva_ml_base.md +0 -0
  28. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/code-docs/deriva_model.md +0 -0
  29. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/code-docs/execution.md +0 -0
  30. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/code-docs/execution_configuration.md +0 -0
  31. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/code-docs/feature.md +0 -0
  32. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/code-docs/upload.md +0 -0
  33. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/deriva_ml_structure.md +0 -0
  34. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/index.md +0 -0
  35. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/release-notes.md +0 -0
  36. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/user-guide/datasets.md +0 -0
  37. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/user-guide/execution-configuration.md +0 -0
  38. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/user-guide/identifiers.md +0 -0
  39. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/user-guide/install.md +0 -0
  40. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/docs/user-guide/ml_workflow_instruction.md +0 -0
  41. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/mkdocs.yml +0 -0
  42. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/pyproject.toml +0 -0
  43. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/release.sh +0 -0
  44. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/setup.cfg +0 -0
  45. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/__init__.py +0 -0
  46. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/database_model.py +0 -0
  47. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/dataset.py +0 -0
  48. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/dataset_aux_classes.py +0 -0
  49. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/dataset_bag.py +0 -0
  50. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/demo_catalog.py +0 -0
  51. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/deriva_definitions.py +0 -0
  52. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/deriva_model.py +0 -0
  53. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/execution_configuration.py +0 -0
  54. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/execution_environment.py +0 -0
  55. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/feature.py +0 -0
  56. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/history.py +0 -0
  57. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/schema_setup/__init__.py +0 -0
  58. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/schema_setup/annotations.py +0 -0
  59. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/schema_setup/create_schema.py +0 -0
  60. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/schema_setup/policy.json +0 -0
  61. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/schema_setup/table_comments_utils.py +0 -0
  62. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml/test_functions.py +0 -0
  63. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  64. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml.egg-info/entry_points.txt +0 -0
  65. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml.egg-info/requires.txt +0 -0
  66. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/src/deriva_ml.egg-info/top_level.txt +0 -0
  67. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/tests/__init__.py +0 -0
  68. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/tests/derivaml_test.py +0 -0
  69. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/tests/runner.py +0 -0
  70. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/tests/test_basic_tables.py +0 -0
  71. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/tests/test_dataset.py +0 -0
  72. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/tests/test_download.py +0 -0
  73. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/tests/test_execution.py +0 -0
  74. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/tests/test_features.py +0 -0
  75. {deriva_ml-1.12.0 → deriva_ml-1.12.2}/tests/test_upload.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.12.0
3
+ Version: 1.12.2
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -0,0 +1,3 @@
1
+ # File Assets
2
+
3
+
@@ -193,11 +193,7 @@ class DerivaML(Dataset):
193
193
  pass
194
194
 
195
195
  def _check_nbstrip_status(self) -> None:
196
- """Figure out if you are running in a Jupyter notebook
197
-
198
- Returns:
199
- A Path to the notebook file that is currently being executed.
200
- """
196
+ """Check to see if nbstrip is installed"""
201
197
  try:
202
198
  if subprocess.run(
203
199
  ["nbstripout", "--is-installed"],
@@ -651,6 +651,7 @@ class Execution:
651
651
  ] # Peel off the schema from the asset table
652
652
  asset_exe = self._model.find_association(asset_table_name, "Execution")
653
653
  asset_exe_path = pb.schemas[asset_exe.schema.name].tables[asset_exe.name]
654
+
654
655
  asset_exe_path.insert(
655
656
  [
656
657
  {
@@ -706,7 +707,7 @@ class Execution:
706
707
  """Return a pathlib Path to the directory in which to place files for the specified execution_asset type.
707
708
 
708
709
  Given the name of an asset table, and a file name, register the file for upload, and return a path to that
709
- file in the upload directory. In addition to the filename, additioal asset metadata and file asset types may
710
+ file in the upload directory. In addition to the filename, additional asset metadata and file asset types may
710
711
  be specified.
711
712
 
712
713
  This routine has three modes, depending on if file_name refers to an existing file. If it doesn't, a path
@@ -74,17 +74,16 @@ feature_table_dir_regex = (
74
74
  + r"/(?P<schema>[-\w]+)/(?P<target_table>[-\w]+)/(?P<feature_name>[-\w]+)"
75
75
  )
76
76
  feature_value_regex = (
77
- feature_table_dir_regex + r"/(?P=feature_name)[.](?P<file_ext>[(csv|json)]*)$"
77
+ feature_table_dir_regex + r"/(?P=feature_name)[.](?P<ext>[(csv|json)]*)$"
78
78
  )
79
79
  feature_asset_dir_regex = feature_table_dir_regex + r"/asset/(?P<asset_table>[-\w]+)"
80
80
  feature_asset_regex = (
81
- feature_asset_dir_regex
82
- + r"/(?P<file_name>[A-Za-z0-9_-]+)[.](?P<file_ext>[a-z0-9]*)$"
81
+ feature_asset_dir_regex + r"/(?P<file>[A-Za-z0-9_-]+)[.](?P<ext>[a-z0-9]*)$"
83
82
  )
84
83
 
85
84
  asset_path_regex = exec_dir_regex + r"/asset/(?P<schema>[-\w]+)/(?P<asset_table>[-\w]*)"
86
85
 
87
- asset_file_regex = r"(?P<file_name>[-\w]+)[.](?P<file_ext>[a-z0-9]*)$"
86
+ asset_file_regex = r"(?P<file>[-\w]+)[.](?P<ext>[a-z0-9]*)$"
88
87
 
89
88
  table_regex = (
90
89
  exec_dir_regex
@@ -211,15 +210,16 @@ def asset_table_upload_spec(model: DerivaModel, asset_table: str | Table):
211
210
  "Filename": "{file_name}",
212
211
  }
213
212
  | {c: f"{{{c}}}" for c in metadata_columns},
214
- "file_pattern": asset_path, # Sets schema, asset_table, file_name, file_ext
213
+ "file_pattern": asset_path, # Sets schema, asset_table, file
214
+ "asset_type": "file",
215
215
  "target_table": [schema, asset_table.name],
216
216
  "checksum_types": ["sha256", "md5"],
217
217
  "hatrac_options": {"versioned_urls": True},
218
218
  "hatrac_templates": {
219
- "hatrac_uri": f"/hatrac/{asset_table.name}/{{md5}}.{{file_name}}.{{file_ext}}",
220
- "content-disposition": "filename*=UTF-8''{file_name}.{file_ext}",
219
+ "hatrac_uri": f"/hatrac/{asset_table.name}/{{md5}}.{{file_name}}",
220
+ "content-disposition": "filename*=UTF-8''{file_name}",
221
221
  },
222
- "record_query_template": "/entity/{target_table}/MD5={{md5}}&Filename={file_name}.{file_ext}",
222
+ "record_query_template": "/entity/{target_table}/MD5={md5}&Filename={file_name}",
223
223
  }
224
224
 
225
225
 
@@ -244,18 +244,18 @@ def bulk_upload_configuration(model: DerivaModel) -> dict[str, Any]:
244
244
  "Length": "{file_size}",
245
245
  "Filename": "{file_name}",
246
246
  },
247
- "asset_type": "fetch",
247
+ "asset_type": "file",
248
248
  "target_table": ["{schema}", "{asset_table}"],
249
249
  "file_pattern": asset_path_regex
250
250
  + "/"
251
- + asset_file_regex, # Sets schema, asset_table, file_name, file_ext
251
+ + asset_file_regex, # Sets schema, asset_table, name, ext
252
252
  "checksum_types": ["sha256", "md5"],
253
253
  "hatrac_options": {"versioned_urls": True},
254
254
  "hatrac_templates": {
255
- "hatrac_uri": "/hatrac/{asset_table}/{md5}.{file_name}.{file_ext}",
256
- "content-disposition": "filename*=UTF-8''{file_name}.{file_ext}",
255
+ "hatrac_uri": "/hatrac/{asset_table}/{md5}.{file_name}",
256
+ "content-disposition": "filename*=UTF-8''{file_name}",
257
257
  },
258
- "record_query_template": "/entity/{target_table}/MD5={md5}&Filename={file_name}.{file_ext}",
258
+ "record_query_template": "/entity/{target_table}/MD5={md5}&Filename={file_name}",
259
259
  },
260
260
  # {
261
261
  # Upload the records into a table
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.12.0
3
+ Version: 1.12.2
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -31,6 +31,7 @@ docs/code-docs/feature.md
31
31
  docs/code-docs/upload.md
32
32
  docs/user-guide/datasets.md
33
33
  docs/user-guide/execution-configuration.md
34
+ docs/user-guide/file-assets.md
34
35
  docs/user-guide/identifiers.md
35
36
  docs/user-guide/install.md
36
37
  docs/user-guide/ml_workflow_instruction.md
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes