deriva-ml 1.14.29__tar.gz → 1.14.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {deriva_ml-1.14.29/src/deriva_ml.egg-info → deriva_ml-1.14.30}/PKG-INFO +1 -1
  2. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/core/base.py +14 -7
  3. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/execution/workflow.py +28 -39
  4. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/run_notebook.py +12 -19
  5. {deriva_ml-1.14.29 → deriva_ml-1.14.30/src/deriva_ml.egg-info}/PKG-INFO +1 -1
  6. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/execution/test_execution.py +49 -28
  7. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/execution/workflow-test.ipynb +32 -27
  8. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/execution/workflow-test.py +2 -1
  9. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/.github/release-drafter.yml +0 -0
  10. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/.github/workflows/publish-docs.yml +0 -0
  11. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/.github/workflows/release.yml +0 -0
  12. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/.gitignore +0 -0
  13. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/LICENSE +0 -0
  14. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/README.md +0 -0
  15. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/.DS_Store +0 -0
  16. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/Notebooks/DerivaML Create Notes.ipynb +0 -0
  17. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/Notebooks/DerivaML Dataset.ipynb +0 -0
  18. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/Notebooks/DerivaML Execution.ipynb +0 -0
  19. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/Notebooks/DerivaML Features.ipynb +0 -0
  20. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/Notebooks/DerivaML Ingest.ipynb +0 -0
  21. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/Notebooks/DerivaML Vocabulary.ipynb +0 -0
  22. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/assets/ERD.png +0 -0
  23. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/assets/Launcher.png +0 -0
  24. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/assets/copy_minid.png +0 -0
  25. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/assets/deriva-logo.png +0 -0
  26. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/assets/deriva-ml.pdf +0 -0
  27. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/assets/sharing-at-home.pdf +0 -0
  28. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/dataset.md +0 -0
  29. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/dataset_aux_classes.md +0 -0
  30. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/dataset_bag.md +0 -0
  31. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/deriva_definitions.md +0 -0
  32. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/deriva_ml_base.md +0 -0
  33. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/deriva_model.md +0 -0
  34. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/execution.md +0 -0
  35. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/execution_configuration.md +0 -0
  36. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/feature.md +0 -0
  37. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/code-docs/upload.md +0 -0
  38. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/index.md +0 -0
  39. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/release-notes.md +0 -0
  40. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/user-guide/datasets.md +0 -0
  41. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/user-guide/deriva_ml_structure.md +0 -0
  42. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/user-guide/execution-configuration.md +0 -0
  43. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/user-guide/file-assets.md +0 -0
  44. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/user-guide/identifiers.md +0 -0
  45. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/user-guide/install.md +0 -0
  46. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/user-guide/notebooks.md +0 -0
  47. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/docs/user-guide/overview.md +0 -0
  48. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/mkdocs.yml +0 -0
  49. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/pyproject.toml +0 -0
  50. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/release.sh +0 -0
  51. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/setup.cfg +0 -0
  52. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/__init__.py +0 -0
  53. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/core/__init__.py +0 -0
  54. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/core/constants.py +0 -0
  55. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/core/definitions.py +0 -0
  56. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/core/enums.py +0 -0
  57. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/core/ermrest.py +0 -0
  58. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/core/exceptions.py +0 -0
  59. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/core/filespec.py +0 -0
  60. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/dataset/__init__.py +0 -0
  61. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/dataset/aux_classes.py +0 -0
  62. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/dataset/dataset.py +0 -0
  63. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/dataset/dataset_bag.py +0 -0
  64. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/dataset/history.py +0 -0
  65. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/dataset/upload.py +0 -0
  66. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/demo_catalog.py +0 -0
  67. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/execution/__init__.py +0 -0
  68. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/execution/environment.py +0 -0
  69. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/execution/execution.py +0 -0
  70. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/execution/execution_configuration.py +0 -0
  71. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/feature.py +0 -0
  72. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/model/__init__.py +0 -0
  73. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/model/catalog.py +0 -0
  74. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/model/database.py +0 -0
  75. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/model/sql_mapper.py +0 -0
  76. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/schema/__init__.py +0 -0
  77. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/schema/annotations.py +0 -0
  78. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/schema/check_schema.py +0 -0
  79. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/schema/create_schema.py +0 -0
  80. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/schema/deriva-ml-reference.json +0 -0
  81. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/schema/policy.json +0 -0
  82. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml/schema/table_comments_utils.py +0 -0
  83. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml.egg-info/SOURCES.txt +0 -0
  84. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml.egg-info/dependency_links.txt +0 -0
  85. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml.egg-info/entry_points.txt +0 -0
  86. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml.egg-info/requires.txt +0 -0
  87. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/src/deriva_ml.egg-info/top_level.txt +0 -0
  88. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/__init__.py +0 -0
  89. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/conftest.py +0 -0
  90. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/core/__init__.py +0 -0
  91. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/core/test_basic_tables.py +0 -0
  92. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/core/test_file.py +0 -0
  93. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/core/test_vocabulary.py +0 -0
  94. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/dataset/__init__.py +0 -0
  95. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/dataset/demo-catalog-schema.json +0 -0
  96. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/dataset/deriva-ml-reference.json +0 -0
  97. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/dataset/eye-ai-catalog-schema.json +0 -0
  98. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/dataset/test_dataset_export.py +0 -0
  99. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/dataset/test_dataset_version.py +0 -0
  100. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/dataset/test_datasets.py +0 -0
  101. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/dataset/test_download.py +0 -0
  102. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/execution/__init__.py +0 -0
  103. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/feature/test_features.py +0 -0
  104. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/model/__init__.py +0 -0
  105. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/model/test_database.py +0 -0
  106. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/model/test_models.py +0 -0
  107. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/test-files/execution-parameters.json +0 -0
  108. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/test-files/notebook-parameters.json +0 -0
  109. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/tests/test_utils.py +0 -0
  110. {deriva_ml-1.14.29 → deriva_ml-1.14.30}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.29
3
+ Version: 1.14.30
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -1305,7 +1305,9 @@ class DerivaML(Dataset):
1305
1305
  def add_workflow(self, workflow: Workflow) -> RID:
1306
1306
  """Adds a workflow to the catalog.
1307
1307
 
1308
- Registers a new workflow in the catalog or returns the RID of an existing workflow with the same URL.
1308
+ Registers a new workflow in the catalog or returns the RID of an existing workflow with the same
1309
+ URL or checksum.
1310
+
1309
1311
  Each workflow represents a specific computational process or analysis pipeline.
1310
1312
 
1311
1313
  Args:
@@ -1328,11 +1330,12 @@ class DerivaML(Dataset):
1328
1330
  >>> workflow_rid = ml.add_workflow(workflow)
1329
1331
  """
1330
1332
  # Check if a workflow already exists by URL
1331
- if workflow_rid := self.lookup_workflow(workflow.url):
1333
+ if workflow_rid := self.lookup_workflow(workflow.checksum or workflow.url):
1332
1334
  return workflow_rid
1333
1335
 
1334
1336
  # Get an ML schema path for the workflow table
1335
1337
  ml_schema_path = self.pathBuilder.schemas[self.ml_schema]
1338
+
1336
1339
  try:
1337
1340
  # Create a workflow record
1338
1341
  workflow_record = {
@@ -1350,12 +1353,11 @@ class DerivaML(Dataset):
1350
1353
  raise DerivaMLException(f"Failed to insert workflow. Error: {error}")
1351
1354
  return workflow_rid
1352
1355
 
1353
- def lookup_workflow(self, url: str) -> RID | None:
1356
+ def lookup_workflow(self, url_or_checksum: str) -> RID | None:
1354
1357
  """Finds a workflow by URL.
1355
1358
 
1356
1359
  Args:
1357
- url: URL of the workflow to find.
1358
-
1360
+ url_or_checksum: URL or checksum of the workflow.
1359
1361
  Returns:
1360
1362
  RID: Resource Identifier of the workflow if found, None otherwise.
1361
1363
 
@@ -1369,7 +1371,12 @@ class DerivaML(Dataset):
1369
1371
  try:
1370
1372
  # Search for workflow by URL
1371
1373
  url_column = workflow_path.URL
1372
- return list(workflow_path.filter(url_column == url).entities())[0]["RID"]
1374
+ checksum_column = workflow_path.Checksum
1375
+ return list(
1376
+ workflow_path.path.filter(
1377
+ (url_column == url_or_checksum) | (checksum_column == url_or_checksum)
1378
+ ).entities()
1379
+ )[0]["RID"]
1373
1380
  except IndexError:
1374
1381
  return None
1375
1382
 
@@ -1403,7 +1410,7 @@ class DerivaML(Dataset):
1403
1410
  self.lookup_term(MLVocab.workflow_type, workflow_type)
1404
1411
 
1405
1412
  # Create and return a new workflow object
1406
- return Workflow.create_workflow(name, workflow_type, description)
1413
+ return Workflow(name=name, workflow_type=workflow_type, description=description)
1407
1414
 
1408
1415
  def create_execution(self, configuration: ExecutionConfiguration, dry_run: bool = False) -> "Execution":
1409
1416
  """Creates an execution environment.
@@ -7,10 +7,6 @@ from pathlib import Path
7
7
  from typing import Any
8
8
 
9
9
  import requests
10
- from pydantic import (
11
- BaseModel,
12
- PrivateAttr,
13
- )
14
10
  from requests import RequestException
15
11
 
16
12
  from deriva_ml.core.definitions import RID
@@ -52,7 +48,7 @@ except ImportError:
52
48
  return get_connection_file()
53
49
 
54
50
 
55
- class Workflow(BaseModel):
51
+ class Workflow:
56
52
  """Represents a computational workflow in DerivaML.
57
53
 
58
54
  A workflow defines a computational process or analysis pipeline. Each workflow has
@@ -79,27 +75,17 @@ class Workflow(BaseModel):
79
75
  ... )
80
76
  """
81
77
 
82
- name: str
83
- url: str
84
- workflow_type: str
85
- version: str | None = None
86
- description: str | None = None
87
- rid: RID | None = None
88
- checksum: str | None = None
89
- is_notebook: bool = False
90
-
91
- _logger: Any = PrivateAttr()
92
-
93
- def __post_init__(self):
94
- """Initializes logging for the workflow."""
95
- self._logger = logging.getLogger("deriva_ml")
96
-
97
- @staticmethod
98
- def create_workflow(
78
+ def __init__(
79
+ self,
99
80
  name: str,
100
81
  workflow_type: str,
101
- description: str = "",
102
- ) -> "Workflow":
82
+ description: str | None = None,
83
+ url: str | None = None,
84
+ version: str | None = None,
85
+ rid: RID | None = None,
86
+ checksum: str | None = None,
87
+ is_notebook: bool = False,
88
+ ):
103
89
  """Creates a workflow from the current execution context.
104
90
 
105
91
  Identifies the currently executing program (script or notebook) and creates
@@ -128,24 +114,27 @@ class Workflow(BaseModel):
128
114
  ... description="Process sample data"
129
115
  ... )
130
116
  """
117
+ self.name = name
118
+ self.url = url
119
+ self.workflow_type = workflow_type
120
+ self.version = version
121
+ self.description = description
122
+ self.rid = rid
123
+ self.checksum = checksum
124
+ self.is_notebook = is_notebook
125
+ """Initializes logging for the workflow."""
131
126
 
132
127
  # Check to see if execution file info is being passed in by calling program.
133
128
  if "DERIVA_ML_WORKFLOW_URL" in os.environ:
134
- github_url = os.environ["DERIVA_ML_WORKFLOW_URL"]
135
- checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
136
- is_notebook = True
137
- else:
138
- path, is_notebook = Workflow._get_python_script()
139
- github_url, checksum = Workflow.get_url_and_checksum(path)
140
-
141
- return Workflow(
142
- name=name,
143
- url=github_url,
144
- checksum=checksum,
145
- description=description,
146
- workflow_type=workflow_type,
147
- is_notebook=is_notebook,
148
- )
129
+ self.url = os.environ["DERIVA_ML_WORKFLOW_URL"]
130
+ self.checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
131
+ self.is_notebook = True
132
+
133
+ if not self.url:
134
+ path, self.is_notebook = Workflow._get_python_script()
135
+ self.url, self.checksum = Workflow.get_url_and_checksum(path)
136
+
137
+ self._logger = logging.getLogger("deriva_ml")
149
138
 
150
139
  @staticmethod
151
140
  def get_url_and_checksum(executable_path: Path) -> tuple[str, str]:
@@ -19,9 +19,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
19
19
  def __init__(self, description, epilog, **kwargs):
20
20
  BaseCLI.__init__(self, description, epilog, **kwargs)
21
21
  Workflow._check_nbstrip_status()
22
- self.parser.add_argument(
23
- "notebook_file", type=Path, help="Path to the notebook file"
24
- )
22
+ self.parser.add_argument("notebook_file", type=Path, help="Path to the notebook file")
25
23
 
26
24
  self.parser.add_argument(
27
25
  "--file",
@@ -39,7 +37,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
39
37
 
40
38
  self.parser.add_argument(
41
39
  "--log-output",
42
- action="store_false",
40
+ action="store_true",
43
41
  help="Display logging output from notebook.",
44
42
  )
45
43
 
@@ -60,9 +58,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
60
58
  help="Provide a parameter name and value to inject into the notebook.",
61
59
  )
62
60
 
63
- self.parser.add_argument(
64
- "--kernel", "-k", nargs=1, help="Name of kernel to run..", default=None
65
- )
61
+ self.parser.add_argument("--kernel", "-k", nargs=1, help="Name of kernel to run..", default=None)
66
62
 
67
63
  @staticmethod
68
64
  def _coerce_number(val: str):
@@ -95,7 +91,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
95
91
  parameters |= json.load(f)
96
92
 
97
93
  if not (notebook_file.is_file() and notebook_file.suffix == ".ipynb"):
98
- print("Notebook file must be an ipynb file.")
94
+ print(f"Notebook file must be an ipynb file: {notebook_file.name}.")
99
95
  exit(1)
100
96
 
101
97
  os.environ["DERIVA_HOST"] = args.host
@@ -106,22 +102,20 @@ class DerivaMLRunNotebookCLI(BaseCLI):
106
102
  notebook_parameters = pm.inspect_notebook(notebook_file)
107
103
  if args.inspect:
108
104
  for param, value in notebook_parameters.items():
109
- print(
110
- f"{param}:{value['inferred_type_name']} (default {value['default']})"
111
- )
105
+ print(f"{param}:{value['inferred_type_name']} (default {value['default']})")
112
106
  return
113
107
  else:
114
108
  notebook_parameters = (
115
- {"host": args.host, "catalog": args.catalog}
109
+ {"host": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
116
110
  | {k: v["default"] for k, v in notebook_parameters.items()}
117
111
  | parameters
118
112
  )
119
113
  print(f"Running notebook {notebook_file.name} with parameters:")
120
114
  for param, value in notebook_parameters.items():
121
115
  print(f" {param}:{value}")
122
- self.run_notebook(notebook_file.resolve(), parameters, args.kernel)
116
+ self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
123
117
 
124
- def run_notebook(self, notebook_file, parameters, kernel=None):
118
+ def run_notebook(self, notebook_file, parameters, kernel=None, log=False):
125
119
  url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
126
120
  os.environ["DERIVA_ML_WORKFLOW_URL"] = url
127
121
  os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
@@ -133,6 +127,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
133
127
  output_path=notebook_output,
134
128
  parameters=parameters,
135
129
  kernel_name=kernel,
130
+ log_output=log,
136
131
  )
137
132
  catalog_id = execution_rid = None
138
133
  with Path(notebook_output).open("r") as f:
@@ -171,15 +166,13 @@ class DerivaMLRunNotebookCLI(BaseCLI):
171
166
 
172
167
  def main():
173
168
  """Main entry point for the notebook runner CLI.
174
-
169
+
175
170
  Creates and runs the DerivaMLRunNotebookCLI instance.
176
-
171
+
177
172
  Returns:
178
173
  None. Executes the CLI.
179
174
  """
180
- cli = DerivaMLRunNotebookCLI(
181
- description="Deriva ML Execution Script Demo", epilog=""
182
- )
175
+ cli = DerivaMLRunNotebookCLI(description="Deriva ML Execution Script Demo", epilog="")
183
176
  cli.main()
184
177
 
185
178
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.29
3
+ Version: 1.14.30
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -2,7 +2,7 @@
2
2
  Tests for the execution module.
3
3
  """
4
4
 
5
- import re
5
+ import json
6
6
  import subprocess
7
7
  from tempfile import TemporaryDirectory
8
8
 
@@ -24,6 +24,9 @@ class TestWorkflow:
24
24
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
25
25
  ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
26
26
  print("Running workflow-test.py ...")
27
+ workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
28
+ workflows = list(workflow_table.entities().fetch())
29
+ assert 0 == len(workflows)
27
30
  result = subprocess.run(
28
31
  [
29
32
  "python",
@@ -34,46 +37,64 @@ class TestWorkflow:
34
37
  capture_output=True,
35
38
  text=True,
36
39
  )
37
- print(result.stdout)
38
- m = re.match(".*url='(?P<url>.*?)'", result.stdout)
39
- url = m["url"]
40
- m = re.match(".*checksum='(?P<checksum>.*?)'", result.stdout)
41
- checksum = m["checksum"]
42
- m = re.match(".*is_notebook=(?P<is_notebook>True|False)", result.stdout)
43
- is_notebook = m["is_notebook"]
44
- print("URL", url)
45
- print("checksum", checksum)
46
- print("is_notebook", is_notebook)
47
- assert is_notebook == "False"
48
- assert url.endswith("workflow-test.py")
49
-
50
- def test_workflow_creation_notebook(self, test_ml):
40
+
41
+ workflows = list(workflow_table.entities().fetch())
42
+ assert 1 == len(workflows)
43
+ workflow_rid = workflows[0]["RID"]
44
+ workflow_url = workflows[0]["URL"]
45
+
46
+ workflow_rid = ml_instance.lookup_workflow(workflow_url)
47
+
48
+ assert workflow_url.endswith("workflow-test.py")
49
+
50
+ # Make sure that workflow is not duplicated if created again.
51
+ result = subprocess.run(
52
+ [
53
+ "python",
54
+ "execution/workflow-test.py",
55
+ ml_instance.catalog.deriva_server.server,
56
+ ml_instance.catalog_id,
57
+ ],
58
+ capture_output=True,
59
+ text=True,
60
+ )
61
+ new_workflow = result.stdout.strip()
62
+ assert new_workflow == workflow_rid
63
+
64
+ def test_workflow_creation_notebook(self, test_ml, tmp_path):
51
65
  ml_instance = test_ml
52
66
  ml_instance.add_term(vc.asset_type, "Test Model", description="Model for our Test workflow")
53
67
  ml_instance.add_term(vc.workflow_type, "Test Workflow", description="A ML Workflow that uses Deriva ML API")
68
+ workflow_table = ml_instance.pathBuilder.schemas[ml_instance.ml_schema].Workflow
69
+ workflows = list(workflow_table.entities().fetch())
70
+ assert 0 == len(workflows)
71
+
72
+ config_file = tmp_path / "config.json"
73
+ with config_file.open("w") as fp:
74
+ json.dump({"host": ml_instance.catalog.deriva_server.server, "catalog_id": ml_instance.catalog_id}, fp)
75
+
54
76
  print("Running notebook...")
55
77
  result = subprocess.run(
56
78
  [
57
79
  "deriva-ml-run-notebook",
58
- "execution/workflow-test.ipnb",
80
+ "execution/workflow-test.ipynb",
59
81
  "--host",
60
- "localhost",
82
+ ml_instance.catalog.deriva_server.server,
83
+ "--catalog",
84
+ ml_instance.catalog_id,
85
+ "--log-output",
61
86
  ],
62
87
  capture_output=True,
63
88
  text=True,
64
89
  )
65
90
  print(result)
66
- m = re.match(".*url='(?P<url>.*?)'", result.stdout)
67
- url = m["url"]
68
- m = re.match(".*checksum='(?P<checksum>.*?)'", result.stdout)
69
- checksum = m["checksum"]
70
- m = re.match(".*is_notebook=(?P<is_notebook>True|False)", result.stdout)
71
- is_notebook = m["is_notebook"]
72
- print("URL", url)
73
- print("checksum", checksum)
74
- print("is_notebook", is_notebook)
75
- assert is_notebook == "False"
76
- assert url.endswith("workflow-test.py")
91
+ workflows = list(workflow_table.entities().fetch())
92
+ assert 1 == len(workflows)
93
+ workflow_rid = workflows[0]["RID"]
94
+ workflow_url = workflows[0]["URL"]
95
+
96
+ print(workflow_url)
97
+ print(workflow_rid)
77
98
 
78
99
 
79
100
  class TestExecution:
@@ -11,63 +11,68 @@
11
11
  ]
12
12
  },
13
13
  {
14
- "metadata": {
15
- "ExecuteTime": {
16
- "end_time": "2025-04-18T20:05:17.505817Z",
17
- "start_time": "2025-04-18T20:05:16.235245Z"
18
- }
19
- },
20
14
  "cell_type": "code",
15
+ "execution_count": null,
16
+ "id": "1",
17
+ "metadata": {},
18
+ "outputs": [],
21
19
  "source": [
22
20
  "from deriva_ml import DerivaML, MLVocab as vc\n",
23
- "import os"
24
- ],
25
- "id": "2c6cbcfe94814682",
26
- "outputs": [],
27
- "execution_count": 1
21
+ "import os\n",
22
+ "import logging\n",
23
+ "logger = logging.getLogger()"
24
+ ]
28
25
  },
29
26
  {
30
- "metadata": {},
31
27
  "cell_type": "markdown",
32
- "source": "## Parameters cell\n",
33
- "id": "2af30ff79278ba00"
28
+ "id": "2",
29
+ "metadata": {},
30
+ "source": [
31
+ "## Parameters cell\n"
32
+ ]
34
33
  },
35
34
  {
35
+ "cell_type": "code",
36
+ "execution_count": null,
37
+ "id": "3",
36
38
  "metadata": {
37
39
  "tags": [
38
40
  "parameters"
39
41
  ]
40
42
  },
41
- "cell_type": "code",
42
43
  "outputs": [],
43
- "execution_count": null,
44
44
  "source": [
45
45
  "hostname = None\n",
46
46
  "catalog_id = None"
47
- ],
48
- "id": "3"
47
+ ]
49
48
  },
50
49
  {
51
- "metadata": {},
52
50
  "cell_type": "code",
53
- "outputs": [],
54
51
  "execution_count": null,
52
+ "id": "4",
53
+ "metadata": {},
54
+ "outputs": [],
55
55
  "source": [
56
56
  "# Modify these to your desired server and catalog.\n",
57
57
  "hostname = hostname or os.environ.get(\"DERIVA_HOST\")\n",
58
58
  "catalog_id = catalog_id or os.environ.get(\"DERIVA_CATALOG_ID\")\n",
59
59
  "\n",
60
60
  "# Change this line to call the domain specific class derived from DerivaML\n",
61
- "deriva_ml = DerivaML(hostname=hostname, catalog_id=catalog_id)\n",
62
- "deriva_ml.add_term(vc.asset_type, \"Test Model\", description=\"Model for our Test workflow\")\n",
63
- "deriva_ml.add_term(vc.workflow_type, \"Test Workflow\", description=\"A ML Workflow that uses Deriva ML API\")\n",
64
- "api_workflow = deriva_ml.create_workflow(\n",
61
+ "ml_instance = DerivaML(hostname, catalog_id)\n",
62
+ "logger.info(\"Got ML instance:\")\n",
63
+ "\n",
64
+ "ml_instance.add_term(vc.asset_type, \"Test Model\", description=\"Model for our Test workflow\")\n",
65
+ "ml_instance.add_term(vc.workflow_type, \"Test Workflow\", description=\"A ML Workflow that uses Deriva ML API\")\n",
66
+ "print(\"Added terms to ML instance\")\n",
67
+ "api_workflow = ml_instance.create_workflow(\n",
65
68
  " name=\"Test Workflow One\",\n",
66
69
  " workflow_type=\"Test Workflow\",\n",
67
70
  " description=\"A test operation\",\n",
68
- ")\n"
69
- ],
70
- "id": "1"
71
+ ")\n",
72
+ "logger.info(f\"URL: {api_workflow.url}\")\n",
73
+ "rid = ml_instance.add_workflow(api_workflow)\n",
74
+ "logger.info(f\"RID {rid}\")\n"
75
+ ]
71
76
  }
72
77
  ],
73
78
  "metadata": {
@@ -16,4 +16,5 @@ api_workflow = ml_instance.create_workflow(
16
16
  workflow_type="Test Workflow",
17
17
  description="A test operation",
18
18
  )
19
- print(api_workflow)
19
+ rid = ml_instance.add_workflow(api_workflow)
20
+ print(rid)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes