deriva-ml 1.14.38__tar.gz → 1.14.40__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/PKG-INFO +1 -1
  2. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/core/base.py +4 -2
  3. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/execution/workflow.py +6 -0
  4. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/install_kernel.py +0 -1
  5. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/model/database.py +1 -1
  6. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/run_notebook.py +1 -0
  7. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml.egg-info/PKG-INFO +1 -1
  8. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/dataset/test_download.py +26 -2
  9. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/execution/test_execution.py +41 -1
  10. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/.github/release-drafter.yml +0 -0
  11. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/.github/workflows/publish-docs.yml +0 -0
  12. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/.github/workflows/release.yml +0 -0
  13. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/.gitignore +0 -0
  14. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/LICENSE +0 -0
  15. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/README.md +0 -0
  16. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/.DS_Store +0 -0
  17. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  18. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  19. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
  20. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  21. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
  22. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  23. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/assets/ERD.png +0 -0
  24. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/assets/Launcher.png +0 -0
  25. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/assets/copy_minid.png +0 -0
  26. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/assets/deriva-logo.png +0 -0
  27. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/assets/deriva-ml.pdf +0 -0
  28. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/assets/sharing-at-home.pdf +0 -0
  29. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/dataset.md +0 -0
  30. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/dataset_aux_classes.md +0 -0
  31. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/dataset_bag.md +0 -0
  32. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/deriva_definitions.md +0 -0
  33. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/deriva_ml_base.md +0 -0
  34. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/deriva_model.md +0 -0
  35. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/execution.md +0 -0
  36. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/execution_configuration.md +0 -0
  37. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/feature.md +0 -0
  38. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/code-docs/upload.md +0 -0
  39. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/index.md +0 -0
  40. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/release-notes.md +0 -0
  41. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/user-guide/datasets.md +0 -0
  42. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/user-guide/deriva_ml_structure.md +0 -0
  43. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/user-guide/execution-configuration.md +0 -0
  44. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/user-guide/file-assets.md +0 -0
  45. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/user-guide/identifiers.md +0 -0
  46. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/user-guide/install.md +0 -0
  47. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/user-guide/notebooks.md +0 -0
  48. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/docs/user-guide/overview.md +0 -0
  49. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/mkdocs.yml +0 -0
  50. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/pyproject.toml +0 -0
  51. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/setup.cfg +0 -0
  52. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/__init__.py +0 -0
  53. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/bump_version.py +0 -0
  54. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/core/__init__.py +0 -0
  55. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/core/constants.py +0 -0
  56. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/core/definitions.py +0 -0
  57. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/core/enums.py +0 -0
  58. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/core/ermrest.py +0 -0
  59. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/core/exceptions.py +0 -0
  60. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/core/filespec.py +0 -0
  61. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/dataset/__init__.py +0 -0
  62. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/dataset/aux_classes.py +0 -0
  63. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/dataset/dataset.py +0 -0
  64. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/dataset/dataset_bag.py +0 -0
  65. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/dataset/history.py +0 -0
  66. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/dataset/upload.py +0 -0
  67. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/demo_catalog.py +0 -0
  68. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/execution/__init__.py +0 -0
  69. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/execution/environment.py +0 -0
  70. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/execution/execution.py +0 -0
  71. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/execution/execution_configuration.py +0 -0
  72. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/feature.py +0 -0
  73. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/model/__init__.py +0 -0
  74. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/model/catalog.py +0 -0
  75. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/model/sql_mapper.py +0 -0
  76. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/schema/__init__.py +0 -0
  77. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/schema/annotations.py +0 -0
  78. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/schema/check_schema.py +0 -0
  79. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/schema/create_schema.py +0 -0
  80. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
  81. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/schema/policy.json +0 -0
  82. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml/schema/table_comments_utils.py +0 -0
  83. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml.egg-info/SOURCES.txt +0 -0
  84. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  85. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml.egg-info/entry_points.txt +0 -0
  86. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml.egg-info/requires.txt +0 -0
  87. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/src/deriva_ml.egg-info/top_level.txt +0 -0
  88. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/__init__.py +0 -0
  89. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/conftest.py +0 -0
  90. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/core/__init__.py +0 -0
  91. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/core/test_basic_tables.py +0 -0
  92. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/core/test_file.py +0 -0
  93. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/core/test_vocabulary.py +0 -0
  94. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/dataset/__init__.py +0 -0
  95. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/dataset/demo-catalog-schema.json +0 -0
  96. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/dataset/deriva-ml-reference.json +0 -0
  97. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/dataset/eye-ai-catalog-schema.json +0 -0
  98. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/dataset/test_dataset_export.py +0 -0
  99. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/dataset/test_dataset_version.py +0 -0
  100. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/dataset/test_datasets.py +0 -0
  101. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/execution/__init__.py +0 -0
  102. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/execution/workflow-test.ipynb +0 -0
  103. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/execution/workflow-test.py +0 -0
  104. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/feature/test_features.py +0 -0
  105. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/model/__init__.py +0 -0
  106. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/model/test_database.py +0 -0
  107. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/model/test_models.py +0 -0
  108. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/test-files/execution-parameters.json +0 -0
  109. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/test-files/notebook-parameters.json +0 -0
  110. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/tests/test_utils.py +0 -0
  111. {deriva_ml-1.14.38 → deriva_ml-1.14.40}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.38
3
+ Version: 1.14.40
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -134,8 +134,10 @@ class DerivaML(Dataset):
134
134
  this argument must be provided a value.
135
135
  ml_schema: Schema name for ML schema. Used if you have a non-standard configuration of deriva-ml.
136
136
  project_name: Project name. Defaults to name of domain schema.
137
- cache_dir: Directory path for caching data downloaded from the Deriva server as bdbag.
138
- working_dir: Directory path for storing data used by or generated by any computations.
137
+ cache_dir: Directory path for caching data downloaded from the Deriva server as bdbag. If not provided,
138
+ will default to working_dir.
139
+ working_dir: Directory path for storing data used by or generated by any computations. If no value is
140
+ provided, will default to ${HOME}/deriva_ml
139
141
  use_minid: Use the MINID service when downloading dataset bags.
140
142
  """
141
143
  # Get or use provided credentials for server access
@@ -121,6 +121,7 @@ class Workflow(BaseModel):
121
121
  if "DERIVA_ML_WORKFLOW_URL" in os.environ:
122
122
  self.url = os.environ["DERIVA_ML_WORKFLOW_URL"]
123
123
  self.checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
124
+ self.git_root = Workflow._get_git_root(Path(os.environ["DERIVA_ML_NOTEBOOK_PATHL"]))
124
125
  self.is_notebook = True
125
126
 
126
127
  if not self.url:
@@ -320,6 +321,11 @@ class Workflow(BaseModel):
320
321
  if not (filename.exists() or Workflow._in_repl()):
321
322
  # Being called from the command line interpreter.
322
323
  filename = Path.cwd() / Path("REPL")
324
+ # Get the caller's filename, which is two up the stack from here.
325
+ elif "PYTEST_CURRENT_TEST" in os.environ:
326
+ filename = Path.cwd() / Path("pytest")
327
+ else:
328
+ raise DerivaMLException("Looking for caller failed") # Stack is too shallow
323
329
  return filename, is_notebook
324
330
 
325
331
  @staticmethod
@@ -1,4 +1,3 @@
1
- # your_pkg/install_kernel.py
2
1
  import re
3
2
  import sys
4
3
  from argparse import ArgumentParser
@@ -326,7 +326,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
326
326
  except ValueError:
327
327
  tname = table
328
328
  for sname in [self.domain_schema, self.ml_schema, "WWW"]: # Be careful of File table.
329
- if table in self.model.schemas[sname].tables:
329
+ if sname in self.model.schemas and table in self.model.schemas[sname].tables:
330
330
  break
331
331
  try:
332
332
  _ = self.model.schemas[sname].tables[tname]
@@ -119,6 +119,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
119
119
  url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
120
120
  os.environ["DERIVA_ML_WORKFLOW_URL"] = url
121
121
  os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
122
+ os.environ["DERIVA_ML_NOTEBOOK_PATH"] = notebook_file
122
123
 
123
124
  with tempfile.TemporaryDirectory() as tmpdirname:
124
125
  notebook_output = Path(tmpdirname) / Path(notebook_file).name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.38
3
+ Version: 1.14.40
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -1,7 +1,7 @@
1
1
  from pathlib import Path
2
2
 
3
3
  # Local imports
4
- from deriva_ml import DatasetSpec, DerivaML, MLVocab
4
+ from deriva_ml import DatasetSpec, DerivaML, MLVocab, TableDefinition, VersionPart
5
5
  from deriva_ml.demo_catalog import DatasetDescription
6
6
  from tests.test_utils import MLDatasetCatalog
7
7
 
@@ -98,7 +98,6 @@ class TestDatasetDownload:
98
98
 
99
99
  self.compare_datasets(ml_instance, dataset_test, dataset_spec)
100
100
 
101
-
102
101
  def test_dataset_download_recurse(self, dataset_test, tmp_path):
103
102
  hostname = dataset_test.catalog.hostname
104
103
  catalog_id = dataset_test.catalog.catalog_id
@@ -147,3 +146,28 @@ class TestDatasetDownload:
147
146
 
148
147
  self.compare_datasets(ml_instance, dataset_test, current_spec)
149
148
  self.compare_datasets(ml_instance, dataset_test, new_spec)
149
+
150
+ def test_dataset_download_schemas(self, dataset_test, tmp_path):
151
+ hostname = dataset_test.catalog.hostname
152
+ catalog_id = dataset_test.catalog.catalog_id
153
+ ml_instance = DerivaML(hostname, catalog_id, working_dir=tmp_path, use_minid=False)
154
+ dataset_description = dataset_test.dataset_description
155
+
156
+ current_version = ml_instance.dataset_version(dataset_description.rid)
157
+ current_spec = DatasetSpec(rid=dataset_description.rid, version=current_version)
158
+ ml_instance.create_table(
159
+ TableDefinition(
160
+ name="NewTable",
161
+ column_defs=[],
162
+ )
163
+ )
164
+ new_version = ml_instance.increment_dataset_version(
165
+ dataset_rid=dataset_description.rid, component=VersionPart.minor
166
+ )
167
+ new_spec = DatasetSpec(rid=dataset_description.rid, version=new_version)
168
+
169
+ current_bag = ml_instance.download_dataset_bag(current_spec)
170
+ new_bag = ml_instance.download_dataset_bag(new_spec)
171
+
172
+ assert "NewTable" in new_bag.model.schemas[ml_instance.domain_schema].tables
173
+ assert "NewTable" not in current_bag.model.schemas[ml_instance.domain_schema].tables
@@ -7,6 +7,8 @@ from pathlib import Path
7
7
  from tempfile import TemporaryDirectory
8
8
 
9
9
  from deriva_ml import (
10
+ BuiltinTypes,
11
+ ColumnDefinition,
10
12
  DatasetSpec,
11
13
  DerivaML,
12
14
  ExecAssetType,
@@ -47,7 +49,7 @@ class TestWorkflow:
47
49
  workflow_url = workflows[0]["URL"]
48
50
 
49
51
  workflow_rid = ml_instance.lookup_workflow(workflow_url)
50
-
52
+ print(f"Workflow url: {workflow_url}")
51
53
  assert workflow_url.endswith("workflow-test.py")
52
54
 
53
55
  # Make sure that workflow is not duplicated if created again.
@@ -186,10 +188,48 @@ class TestExecution:
186
188
  # Create manual execution
187
189
  test_execution = ml_instance.create_execution(config)
188
190
  with test_execution.execute() as execution:
191
+ assert execution.asset_paths["Execution_Asset"][0].asset_types[0] == "Model_File"
189
192
  assert 1 == len(execution.asset_paths)
190
193
  assert 1 == len(execution.datasets)
191
194
  assert execution.datasets[0].dataset_rid == dataset_rid
192
195
 
196
+ def test_download_asset(self, test_ml, tmp_path):
197
+ ml_instance = test_ml
198
+
199
+ # Create a workflow
200
+ ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
201
+ ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
202
+ api_workflow = ml_instance.create_workflow(
203
+ name="Test Workflow One",
204
+ workflow_type="Test Workflow",
205
+ description="A test operation",
206
+ )
207
+ ml_instance.create_asset(
208
+ "BarAsset",
209
+ column_defs=[ColumnDefinition(name="foo", type=BuiltinTypes.int4)],
210
+ )
211
+ manual_execution = ml_instance.create_execution(
212
+ ExecutionConfiguration(description="Sample Execution", workflow=api_workflow)
213
+ )
214
+
215
+ with manual_execution.execute() as execution:
216
+ model_file = execution.asset_file_path(
217
+ "BarAsset", "API_Model/modelfile.txt", asset_types=ExecAssetType.model_file, foo=23
218
+ )
219
+ print(model_file)
220
+ with model_file.open("w") as fp:
221
+ fp.write("My model")
222
+ # Now upload the file and retrieve the RID of the new asset from the returned results.
223
+ uploaded_assets = manual_execution.upload_execution_outputs()
224
+ print(uploaded_assets)
225
+ assert 1 == len(uploaded_assets["deriva-ml/Execution_Asset"])
226
+
227
+ file = manual_execution.download_asset(asset_rid, tmpdir, update_catalog=False)
228
+ assert file.name == "modelfile.txt"
229
+
230
+ assert "BarAsset" in [a.name for a in ml_instance.model.find_assets()]
231
+ assert ml_instance.model.asset_metadata("BarAsset") == {"foo"}
232
+
193
233
  @staticmethod
194
234
  def create_execution_asset(ml_instance: DerivaML, api_workflow):
195
235
  manual_execution = ml_instance.create_execution(
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes