deriva-ml 1.14.29__tar.gz → 1.14.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {deriva_ml-1.14.29/src/deriva_ml.egg-info → deriva_ml-1.14.31}/PKG-INFO +1 -1
  2. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/core/base.py +14 -7
  3. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/execution/workflow.py +17 -32
  4. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/run_notebook.py +12 -19
  5. {deriva_ml-1.14.29 → deriva_ml-1.14.31/src/deriva_ml.egg-info}/PKG-INFO +1 -1
  6. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/execution/test_execution.py +49 -28
  7. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/execution/workflow-test.ipynb +32 -27
  8. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/execution/workflow-test.py +2 -1
  9. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/.github/release-drafter.yml +0 -0
  10. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/.github/workflows/publish-docs.yml +0 -0
  11. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/.github/workflows/release.yml +0 -0
  12. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/.gitignore +0 -0
  13. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/LICENSE +0 -0
  14. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/README.md +0 -0
  15. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/.DS_Store +0 -0
  16. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  17. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  18. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
  19. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  20. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
  21. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  22. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/assets/ERD.png +0 -0
  23. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/assets/Launcher.png +0 -0
  24. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/assets/copy_minid.png +0 -0
  25. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/assets/deriva-logo.png +0 -0
  26. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/assets/deriva-ml.pdf +0 -0
  27. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/assets/sharing-at-home.pdf +0 -0
  28. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/dataset.md +0 -0
  29. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/dataset_aux_classes.md +0 -0
  30. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/dataset_bag.md +0 -0
  31. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/deriva_definitions.md +0 -0
  32. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/deriva_ml_base.md +0 -0
  33. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/deriva_model.md +0 -0
  34. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/execution.md +0 -0
  35. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/execution_configuration.md +0 -0
  36. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/feature.md +0 -0
  37. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/code-docs/upload.md +0 -0
  38. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/index.md +0 -0
  39. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/release-notes.md +0 -0
  40. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/user-guide/datasets.md +0 -0
  41. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/user-guide/deriva_ml_structure.md +0 -0
  42. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/user-guide/execution-configuration.md +0 -0
  43. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/user-guide/file-assets.md +0 -0
  44. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/user-guide/identifiers.md +0 -0
  45. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/user-guide/install.md +0 -0
  46. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/user-guide/notebooks.md +0 -0
  47. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/docs/user-guide/overview.md +0 -0
  48. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/mkdocs.yml +0 -0
  49. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/pyproject.toml +0 -0
  50. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/release.sh +0 -0
  51. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/setup.cfg +0 -0
  52. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/__init__.py +0 -0
  53. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/core/__init__.py +0 -0
  54. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/core/constants.py +0 -0
  55. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/core/definitions.py +0 -0
  56. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/core/enums.py +0 -0
  57. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/core/ermrest.py +0 -0
  58. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/core/exceptions.py +0 -0
  59. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/core/filespec.py +0 -0
  60. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/dataset/__init__.py +0 -0
  61. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/dataset/aux_classes.py +0 -0
  62. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/dataset/dataset.py +0 -0
  63. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/dataset/dataset_bag.py +0 -0
  64. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/dataset/history.py +0 -0
  65. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/dataset/upload.py +0 -0
  66. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/demo_catalog.py +0 -0
  67. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/execution/__init__.py +0 -0
  68. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/execution/environment.py +0 -0
  69. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/execution/execution.py +0 -0
  70. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/execution/execution_configuration.py +0 -0
  71. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/feature.py +0 -0
  72. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/model/__init__.py +0 -0
  73. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/model/catalog.py +0 -0
  74. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/model/database.py +0 -0
  75. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/model/sql_mapper.py +0 -0
  76. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/schema/__init__.py +0 -0
  77. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/schema/annotations.py +0 -0
  78. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/schema/check_schema.py +0 -0
  79. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/schema/create_schema.py +0 -0
  80. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
  81. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/schema/policy.json +0 -0
  82. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml/schema/table_comments_utils.py +0 -0
  83. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml.egg-info/SOURCES.txt +0 -0
  84. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  85. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml.egg-info/entry_points.txt +0 -0
  86. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml.egg-info/requires.txt +0 -0
  87. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/src/deriva_ml.egg-info/top_level.txt +0 -0
  88. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/__init__.py +0 -0
  89. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/conftest.py +0 -0
  90. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/core/__init__.py +0 -0
  91. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/core/test_basic_tables.py +0 -0
  92. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/core/test_file.py +0 -0
  93. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/core/test_vocabulary.py +0 -0
  94. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/dataset/__init__.py +0 -0
  95. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/dataset/demo-catalog-schema.json +0 -0
  96. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/dataset/deriva-ml-reference.json +0 -0
  97. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/dataset/eye-ai-catalog-schema.json +0 -0
  98. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/dataset/test_dataset_export.py +0 -0
  99. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/dataset/test_dataset_version.py +0 -0
  100. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/dataset/test_datasets.py +0 -0
  101. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/dataset/test_download.py +0 -0
  102. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/execution/__init__.py +0 -0
  103. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/feature/test_features.py +0 -0
  104. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/model/__init__.py +0 -0
  105. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/model/test_database.py +0 -0
  106. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/model/test_models.py +0 -0
  107. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/test-files/execution-parameters.json +0 -0
  108. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/test-files/notebook-parameters.json +0 -0
  109. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/tests/test_utils.py +0 -0
  110. {deriva_ml-1.14.29 → deriva_ml-1.14.31}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.29
3
+ Version: 1.14.31
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -1305,7 +1305,9 @@ class DerivaML(Dataset):
1305
1305
  def add_workflow(self, workflow: Workflow) -> RID:
1306
1306
  """Adds a workflow to the catalog.
1307
1307
 
1308
- Registers a new workflow in the catalog or returns the RID of an existing workflow with the same URL.
1308
+ Registers a new workflow in the catalog or returns the RID of an existing workflow with the same
1309
+ URL or checksum.
1310
+
1309
1311
  Each workflow represents a specific computational process or analysis pipeline.
1310
1312
 
1311
1313
  Args:
@@ -1328,11 +1330,12 @@ class DerivaML(Dataset):
1328
1330
  >>> workflow_rid = ml.add_workflow(workflow)
1329
1331
  """
1330
1332
  # Check if a workflow already exists by URL
1331
- if workflow_rid := self.lookup_workflow(workflow.url):
1333
+ if workflow_rid := self.lookup_workflow(workflow.checksum or workflow.url):
1332
1334
  return workflow_rid
1333
1335
 
1334
1336
  # Get an ML schema path for the workflow table
1335
1337
  ml_schema_path = self.pathBuilder.schemas[self.ml_schema]
1338
+
1336
1339
  try:
1337
1340
  # Create a workflow record
1338
1341
  workflow_record = {
@@ -1350,12 +1353,11 @@ class DerivaML(Dataset):
1350
1353
  raise DerivaMLException(f"Failed to insert workflow. Error: {error}")
1351
1354
  return workflow_rid
1352
1355
 
1353
- def lookup_workflow(self, url: str) -> RID | None:
1356
+ def lookup_workflow(self, url_or_checksum: str) -> RID | None:
1354
1357
  """Finds a workflow by URL.
1355
1358
 
1356
1359
  Args:
1357
- url: URL of the workflow to find.
1358
-
1360
+ url_or_checksum: URL or checksum of the workflow.
1359
1361
  Returns:
1360
1362
  RID: Resource Identifier of the workflow if found, None otherwise.
1361
1363
 
@@ -1369,7 +1371,12 @@ class DerivaML(Dataset):
1369
1371
  try:
1370
1372
  # Search for workflow by URL
1371
1373
  url_column = workflow_path.URL
1372
- return list(workflow_path.filter(url_column == url).entities())[0]["RID"]
1374
+ checksum_column = workflow_path.Checksum
1375
+ return list(
1376
+ workflow_path.path.filter(
1377
+ (url_column == url_or_checksum) | (checksum_column == url_or_checksum)
1378
+ ).entities()
1379
+ )[0]["RID"]
1373
1380
  except IndexError:
1374
1381
  return None
1375
1382
 
@@ -1403,7 +1410,7 @@ class DerivaML(Dataset):
1403
1410
  self.lookup_term(MLVocab.workflow_type, workflow_type)
1404
1411
 
1405
1412
  # Create and return a new workflow object
1406
- return Workflow.create_workflow(name, workflow_type, description)
1413
+ return Workflow(name=name, workflow_type=workflow_type, description=description)
1407
1414
 
1408
1415
  def create_execution(self, configuration: ExecutionConfiguration, dry_run: bool = False) -> "Execution":
1409
1416
  """Creates an execution environment.
@@ -7,10 +7,7 @@ from pathlib import Path
7
7
  from typing import Any
8
8
 
9
9
  import requests
10
- from pydantic import (
11
- BaseModel,
12
- PrivateAttr,
13
- )
10
+ from pydantic import BaseModel, PrivateAttr, model_validator
14
11
  from requests import RequestException
15
12
 
16
13
  from deriva_ml.core.definitions import RID
@@ -80,26 +77,18 @@ class Workflow(BaseModel):
80
77
  """
81
78
 
82
79
  name: str
83
- url: str
84
80
  workflow_type: str
85
- version: str | None = None
86
81
  description: str | None = None
82
+ url: str | None = None
83
+ version: str | None = None
87
84
  rid: RID | None = None
88
85
  checksum: str | None = None
89
86
  is_notebook: bool = False
90
87
 
91
- _logger: Any = PrivateAttr()
92
-
93
- def __post_init__(self):
94
- """Initializes logging for the workflow."""
95
- self._logger = logging.getLogger("deriva_ml")
88
+ _logger: logging.Logger = PrivateAttr(default=10)
96
89
 
97
- @staticmethod
98
- def create_workflow(
99
- name: str,
100
- workflow_type: str,
101
- description: str = "",
102
- ) -> "Workflow":
90
+ @model_validator(mode="after")
91
+ def setup_url_checksum(self) -> "Workflow":
103
92
  """Creates a workflow from the current execution context.
104
93
 
105
94
  Identifies the currently executing program (script or notebook) and creates
@@ -128,24 +117,20 @@ class Workflow(BaseModel):
128
117
  ... description="Process sample data"
129
118
  ... )
130
119
  """
120
+ """Initializes logging for the workflow."""
131
121
 
132
122
  # Check to see if execution file info is being passed in by calling program.
133
123
  if "DERIVA_ML_WORKFLOW_URL" in os.environ:
134
- github_url = os.environ["DERIVA_ML_WORKFLOW_URL"]
135
- checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
136
- is_notebook = True
137
- else:
138
- path, is_notebook = Workflow._get_python_script()
139
- github_url, checksum = Workflow.get_url_and_checksum(path)
140
-
141
- return Workflow(
142
- name=name,
143
- url=github_url,
144
- checksum=checksum,
145
- description=description,
146
- workflow_type=workflow_type,
147
- is_notebook=is_notebook,
148
- )
124
+ self.url = os.environ["DERIVA_ML_WORKFLOW_URL"]
125
+ self.checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
126
+ self.is_notebook = True
127
+
128
+ if not self.url:
129
+ path, self.is_notebook = Workflow._get_python_script()
130
+ self.url, self.checksum = Workflow.get_url_and_checksum(path)
131
+
132
+ self._logger = logging.getLogger("deriva_ml")
133
+ return self
149
134
 
150
135
  @staticmethod
151
136
  def get_url_and_checksum(executable_path: Path) -> tuple[str, str]:
@@ -19,9 +19,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
19
19
  def __init__(self, description, epilog, **kwargs):
20
20
  BaseCLI.__init__(self, description, epilog, **kwargs)
21
21
  Workflow._check_nbstrip_status()
22
- self.parser.add_argument(
23
- "notebook_file", type=Path, help="Path to the notebook file"
24
- )
22
+ self.parser.add_argument("notebook_file", type=Path, help="Path to the notebook file")
25
23
 
26
24
  self.parser.add_argument(
27
25
  "--file",
@@ -39,7 +37,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
39
37
 
40
38
  self.parser.add_argument(
41
39
  "--log-output",
42
- action="store_false",
40
+ action="store_true",
43
41
  help="Display logging output from notebook.",
44
42
  )
45
43
 
@@ -60,9 +58,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
60
58
  help="Provide a parameter name and value to inject into the notebook.",
61
59
  )
62
60
 
63
- self.parser.add_argument(
64
- "--kernel", "-k", nargs=1, help="Name of kernel to run..", default=None
65
- )
61
+ self.parser.add_argument("--kernel", "-k", nargs=1, help="Name of kernel to run..", default=None)
66
62
 
67
63
  @staticmethod
68
64
  def _coerce_number(val: str):
@@ -95,7 +91,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
95
91
  parameters |= json.load(f)
96
92
 
97
93
  if not (notebook_file.is_file() and notebook_file.suffix == ".ipynb"):
98
- print("Notebook file must be an ipynb file.")
94
+ print(f"Notebook file must be an ipynb file: {notebook_file.name}.")
99
95
  exit(1)
100
96
 
101
97
  os.environ["DERIVA_HOST"] = args.host
@@ -106,22 +102,20 @@ class DerivaMLRunNotebookCLI(BaseCLI):
106
102
  notebook_parameters = pm.inspect_notebook(notebook_file)
107
103
  if args.inspect:
108
104
  for param, value in notebook_parameters.items():
109
- print(
110
- f"{param}:{value['inferred_type_name']} (default {value['default']})"
111
- )
105
+ print(f"{param}:{value['inferred_type_name']} (default {value['default']})")
112
106
  return
113
107
  else:
114
108
  notebook_parameters = (
115
- {"host": args.host, "catalog": args.catalog}
109
+ {"host": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
116
110
  | {k: v["default"] for k, v in notebook_parameters.items()}
117
111
  | parameters
118
112
  )
119
113
  print(f"Running notebook {notebook_file.name} with parameters:")
120
114
  for param, value in notebook_parameters.items():
121
115
  print(f" {param}:{value}")
122
- self.run_notebook(notebook_file.resolve(), parameters, args.kernel)
116
+ self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
123
117
 
124
- def run_notebook(self, notebook_file, parameters, kernel=None):
118
+ def run_notebook(self, notebook_file, parameters, kernel=None, log=False):
125
119
  url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
126
120
  os.environ["DERIVA_ML_WORKFLOW_URL"] = url
127
121
  os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
@@ -133,6 +127,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
133
127
  output_path=notebook_output,
134
128
  parameters=parameters,
135
129
  kernel_name=kernel,
130
+ log_output=log,
136
131
  )
137
132
  catalog_id = execution_rid = None
138
133
  with Path(notebook_output).open("r") as f:
@@ -171,15 +166,13 @@ class DerivaMLRunNotebookCLI(BaseCLI):
171
166
 
172
167
  def main():
173
168
  """Main entry point for the notebook runner CLI.
174
-
169
+
175
170
  Creates and runs the DerivaMLRunNotebookCLI instance.
176
-
171
+
177
172
  Returns:
178
173
  None. Executes the CLI.
179
174
  """
180
- cli = DerivaMLRunNotebookCLI(
181
- description="Deriva ML Execution Script Demo", epilog=""
182
- )
175
+ cli = DerivaMLRunNotebookCLI(description="Deriva ML Execution Script Demo", epilog="")
183
176
  cli.main()
184
177
 
185
178
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.29
3
+ Version: 1.14.31
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -2,7 +2,7 @@
2
2
  Tests for the execution module.
3
3
  """
4
4
 
5
- import re
5
+ import json
6
6
  import subprocess
7
7
  from tempfile import TemporaryDirectory
8
8
 
@@ -24,6 +24,9 @@ class TestWorkflow:
24
24
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
25
25
  ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
26
26
  print("Running workflow-test.py ...")
27
+ workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
28
+ workflows = list(workflow_table.entities().fetch())
29
+ assert 0 == len(workflows)
27
30
  result = subprocess.run(
28
31
  [
29
32
  "python",
@@ -34,46 +37,64 @@ class TestWorkflow:
34
37
  capture_output=True,
35
38
  text=True,
36
39
  )
37
- print(result.stdout)
38
- m = re.match(".*url='(?P<url>.*?)'", result.stdout)
39
- url = m["url"]
40
- m = re.match(".*checksum='(?P<checksum>.*?)'", result.stdout)
41
- checksum = m["checksum"]
42
- m = re.match(".*is_notebook=(?P<is_notebook>True|False)", result.stdout)
43
- is_notebook = m["is_notebook"]
44
- print("URL", url)
45
- print("checksum", checksum)
46
- print("is_notebook", is_notebook)
47
- assert is_notebook == "False"
48
- assert url.endswith("workflow-test.py")
49
-
50
- def test_workflow_creation_notebook(self, test_ml):
40
+
41
+ workflows = list(workflow_table.entities().fetch())
42
+ assert 1 == len(workflows)
43
+ workflow_rid = workflows[0]["RID"]
44
+ workflow_url = workflows[0]["URL"]
45
+
46
+ workflow_rid = ml_instance.lookup_workflow(workflow_url)
47
+
48
+ assert workflow_url.endswith("workflow-test.py")
49
+
50
+ # Make sure that workflow is not duplicated if created again.
51
+ result = subprocess.run(
52
+ [
53
+ "python",
54
+ "execution/workflow-test.py",
55
+ ml_instance.catalog.deriva_server.server,
56
+ ml_instance.catalog_id,
57
+ ],
58
+ capture_output=True,
59
+ text=True,
60
+ )
61
+ new_workflow = result.stdout.strip()
62
+ assert new_workflow == workflow_rid
63
+
64
+ def test_workflow_creation_notebook(self, test_ml, tmp_path):
51
65
  ml_instance = test_ml
52
66
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
53
67
  ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
68
+ workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
69
+ workflows = list(workflow_table.entities().fetch())
70
+ assert 0 == len(workflows)
71
+
72
+ config_file = tmp_path / "config.json"
73
+ with config_file.open("w") as fp:
74
+ json.dump({"host": ml_instance.catalog.deriva_server.server, "catalog_id": ml_instance.catalog_id}, fp)
75
+
54
76
  print("Running notebook...")
55
77
  result = subprocess.run(
56
78
  [
57
79
  "deriva-ml-run-notebook",
58
- "execution/workflow-test.ipnb",
80
+ "execution/workflow-test.ipynb",
59
81
  "--host",
60
- "localhost",
82
+ ml_instance.catalog.deriva_server.server,
83
+ "--catalog",
84
+ ml_instance.catalog_id,
85
+ "--log-output",
61
86
  ],
62
87
  capture_output=True,
63
88
  text=True,
64
89
  )
65
90
  print(result)
66
- m = re.match(".*url='(?P<url>.*?)'", result.stdout)
67
- url = m["url"]
68
- m = re.match(".*checksum='(?P<checksum>.*?)'", result.stdout)
69
- checksum = m["checksum"]
70
- m = re.match(".*is_notebook=(?P<is_notebook>True|False)", result.stdout)
71
- is_notebook = m["is_notebook"]
72
- print("URL", url)
73
- print("checksum", checksum)
74
- print("is_notebook", is_notebook)
75
- assert is_notebook == "False"
76
- assert url.endswith("workflow-test.py")
91
+ workflows = list(workflow_table.entities().fetch())
92
+ assert 1 == len(workflows)
93
+ workflow_rid = workflows[0]["RID"]
94
+ workflow_url = workflows[0]["URL"]
95
+
96
+ print(workflow_url)
97
+ print(workflow_rid)
77
98
 
78
99
 
79
100
  class TestExecution:
@@ -11,63 +11,68 @@
11
11
  ]
12
12
  },
13
13
  {
14
- "metadata": {
15
- "ExecuteTime": {
16
- "end_time": "2025-04-18T20:05:17.505817Z",
17
- "start_time": "2025-04-18T20:05:16.235245Z"
18
- }
19
- },
20
14
  "cell_type": "code",
15
+ "execution_count": null,
16
+ "id": "1",
17
+ "metadata": {},
18
+ "outputs": [],
21
19
  "source": [
22
20
  "from deriva_ml import DerivaML, MLVocab as vc\n",
23
- "import os"
24
- ],
25
- "id": "2c6cbcfe94814682",
26
- "outputs": [],
27
- "execution_count": 1
21
+ "import os\n",
22
+ "import logging\n",
23
+ "logger = logging.getLogger()"
24
+ ]
28
25
  },
29
26
  {
30
- "metadata": {},
31
27
  "cell_type": "markdown",
32
- "source": "## Parameters cell\n",
33
- "id": "2af30ff79278ba00"
28
+ "id": "2",
29
+ "metadata": {},
30
+ "source": [
31
+ "## Parameters cell\n"
32
+ ]
34
33
  },
35
34
  {
35
+ "cell_type": "code",
36
+ "execution_count": null,
37
+ "id": "3",
36
38
  "metadata": {
37
39
  "tags": [
38
40
  "parameters"
39
41
  ]
40
42
  },
41
- "cell_type": "code",
42
43
  "outputs": [],
43
- "execution_count": null,
44
44
  "source": [
45
45
  "hostname = None\n",
46
46
  "catalog_id = None"
47
- ],
48
- "id": "3"
47
+ ]
49
48
  },
50
49
  {
51
- "metadata": {},
52
50
  "cell_type": "code",
53
- "outputs": [],
54
51
  "execution_count": null,
52
+ "id": "4",
53
+ "metadata": {},
54
+ "outputs": [],
55
55
  "source": [
56
56
  "# Modify these to your desired server and catalog.\n",
57
57
  "hostname = hostname or os.environ.get(\"DERIVA_HOST\")\n",
58
58
  "catalog_id = catalog_id or os.environ.get(\"DERIVA_CATALOG_ID\")\n",
59
59
  "\n",
60
60
  "# Change this line to call the domain specific class derived from DerivaML\n",
61
- "deriva_ml = DerivaML(hostname=hostname, catalog_id=catalog_id)\n",
62
- "deriva_ml.add_term(vc.asset_type, \"Test Model\", description=\"Model for our Test workflow\")\n",
63
- "deriva_ml.add_term(vc.workflow_type, \"Test Workflow\", description=\"A ML Workflow that uses Deriva ML API\")\n",
64
- "api_workflow = deriva_ml.create_workflow(\n",
61
+ "ml_instance = DerivaML(hostname, catalog_id)\n",
62
+ "logger.info(\"Got ML instance:\")\n",
63
+ "\n",
64
+ "ml_instance.add_term(vc.asset_type, \"Test Model\", description=\"Model for our Test workflow\")\n",
65
+ "ml_instance.add_term(vc.workflow_type, \"Test Workflow\", description=\"A ML Workflow that uses Deriva ML API\")\n",
66
+ "print(\"Added terms to ML instance\")\n",
67
+ "api_workflow = ml_instance.create_workflow(\n",
65
68
  " name=\"Test Workflow One\",\n",
66
69
  " workflow_type=\"Test Workflow\",\n",
67
70
  " description=\"A test operation\",\n",
68
- ")\n"
69
- ],
70
- "id": "1"
71
+ ")\n",
72
+ "logger.info(f\"URL: {api_workflow.url}\")\n",
73
+ "rid = ml_instance.add_workflow(api_workflow)\n",
74
+ "logger.info(f\"RID {rid}\")\n"
75
+ ]
71
76
  }
72
77
  ],
73
78
  "metadata": {
@@ -16,4 +16,5 @@ api_workflow = ml_instance.create_workflow(
16
16
  workflow_type="Test Workflow",
17
17
  description="A test operation",
18
18
  )
19
- print(api_workflow)
19
+ rid = ml_instance.add_workflow(api_workflow)
20
+ print(rid)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes