deriva-ml 1.13.0__py3-none-any.whl → 1.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva_ml/__init__.py CHANGED
@@ -15,7 +15,7 @@ __all__ = [
15
15
  "UploadState",
16
16
  "MLVocab",
17
17
  "MLAsset",
18
- "ExecMetadataVocab",
18
+ "ExecAssetType",
19
19
  "RID",
20
20
  "DerivaSystemColumns",
21
21
  "VersionPart",
@@ -34,7 +34,7 @@ from .deriva_definitions import (
34
34
  DerivaMLException,
35
35
  MLVocab,
36
36
  MLAsset,
37
- ExecMetadataVocab,
37
+ ExecAssetType,
38
38
  DerivaSystemColumns,
39
39
  )
40
40
  from .deriva_ml_base import DerivaML
deriva_ml/dataset.py CHANGED
@@ -964,7 +964,8 @@ class Dataset:
964
964
  for the dataset.
965
965
  """
966
966
  if (
967
- execution_rid != DRY_RUN_RID
967
+ execution_rid
968
+ and execution_rid != DRY_RUN_RID
968
969
  and self._model.catalog.resolve_rid(execution_rid).table.name != "Execution"
969
970
  ):
970
971
  raise DerivaMLException(f"RID {execution_rid} is not an execution")
@@ -1120,17 +1121,18 @@ class Dataset:
1120
1121
 
1121
1122
  def update_status(status: Status, msg: str) -> None:
1122
1123
  """Update the current status for this execution in the catalog"""
1123
- self._model.catalog.getPathBuilder().schemas[
1124
- self._ml_schema
1125
- ].Execution.update(
1126
- [
1127
- {
1128
- "RID": execution_rid,
1129
- "Status": status.value,
1130
- "Status_Detail": msg,
1131
- }
1132
- ]
1133
- )
1124
+ if execution_rid and execution_rid != DRY_RUN_RID:
1125
+ self._model.catalog.getPathBuilder().schemas[
1126
+ self._ml_schema
1127
+ ].Execution.update(
1128
+ [
1129
+ {
1130
+ "RID": execution_rid,
1131
+ "Status": status.value,
1132
+ "Status_Detail": msg,
1133
+ }
1134
+ ]
1135
+ )
1134
1136
  self._logger.info(msg)
1135
1137
 
1136
1138
  def fetch_progress_callback(current, total):
@@ -197,7 +197,7 @@ class MLAsset(StrEnum):
197
197
  execution_asset = "Execution_Asset"
198
198
 
199
199
 
200
- class ExecMetadataVocab(StrEnum):
200
+ class ExecMetadataType(StrEnum):
201
201
  """
202
202
  Predefined execution metadata types.
203
203
  """
@@ -206,6 +206,16 @@ class ExecMetadataVocab(StrEnum):
206
206
  runtime_env = "Runtime_Env"
207
207
 
208
208
 
209
+ class ExecAssetType(StrEnum):
210
+ """
211
+ Predefined execution metadata types.
212
+ """
213
+
214
+ input_file = "Input_File"
215
+ output_file = "Output_File"
216
+ notebook_output = "Notebook_Output"
217
+
218
+
209
219
  class ColumnDefinition(BaseModel):
210
220
  """Pydantic model for deriva_py Column.define"""
211
221
 
@@ -974,7 +974,7 @@ class DerivaML(Dataset):
974
974
  ) -> Workflow:
975
975
  """Identify current executing program and return a workflow RID for it
976
976
 
977
- Determine the notebook or script that is currently being executed. Assume that this is
977
+ Determine the notebook or script that is currently being executed. Assume that this is
978
978
  being executed from a cloned GitHub repository. Determine the remote repository name for
979
979
  this object. Then either retrieve an existing workflow for this executable or create
980
980
  a new one.
@@ -983,6 +983,9 @@ class DerivaML(Dataset):
983
983
  name: The name of the workflow.
984
984
  workflow_type: The type of the workflow.
985
985
  description: The description of the workflow.
986
+
987
+ Returns:
988
+ A workflow object.
986
989
  """
987
990
  # Make sure type is correct.
988
991
  self.lookup_term(MLVocab.workflow_type, workflow_type)
@@ -1001,6 +1004,9 @@ class DerivaML(Dataset):
1001
1004
  1. The datasets specified in the configuration are downloaded and placed in the cache-dir. If a version is
1002
1005
  not specified in the configuration, then a new minor version number is created for the dataset and downloaded.
1003
1006
 
1007
+ 2. If any execution assets are provided in the configuration, they are downloaded and placed in the working directory.
1008
+
1009
+
1004
1010
  Args:
1005
1011
  configuration: ExecutionConfiguration:
1006
1012
  dry_run: Do not create an execution record or upload results.
deriva_ml/execution.py CHANGED
@@ -12,15 +12,12 @@ import os
12
12
  from pathlib import Path
13
13
 
14
14
  from pydantic import validate_call, ConfigDict
15
- import regex as re
16
15
  import sys
17
16
  import shutil
18
17
  from typing import Iterable, Any, Optional
19
18
 
20
19
  from deriva.core import format_exception
21
- from deriva.core.datapath import DataPathException
22
20
  from deriva.core.hatrac_store import HatracStore
23
- from .deriva_definitions import ExecMetadataVocab
24
21
  from .deriva_definitions import (
25
22
  RID,
26
23
  Status,
@@ -28,6 +25,8 @@ from .deriva_definitions import (
28
25
  DerivaMLException,
29
26
  MLVocab,
30
27
  MLAsset,
28
+ ExecMetadataType,
29
+ ExecAssetType,
31
30
  DRY_RUN_RID,
32
31
  )
33
32
  from .deriva_ml_base import DerivaML, FeatureRecord
@@ -65,29 +64,43 @@ except ImportError:
65
64
  return s
66
65
 
67
66
 
68
- class AssetFilePath(type(Path())):
69
- """Derived class of Path that also includes information about a downloaded.
67
+ # Platform-specific base class
68
+ if sys.version_info >= (3, 12):
70
69
 
71
- An AssetFilePath has all the methods associated with a pathlib.Path object. In addition, it defines additional
72
- attributes associated with a DerviaML asset.
70
+ class AssetFilePath(Path):
71
+ """
72
+ Create a new Path object that has additional information related to the use of this path as an asset.
73
73
 
74
- Attributes:
75
- asset_types: A list of the types associated with this asset. From the Asset_Type controlled vocabulary.
76
- asset_metadata: A dictionary of names and values of any additional columns associated with this asset.
77
- asset_name: The name of the asset table
78
- file_name: The name of the file in the local file system that has the asset contents
79
- asset_rid: The RID of the asset if it has been uploaded into an asset table
80
- """
74
+ Args:
75
+ asset_path: Local path to the location of the asset.
76
+ asset_name: The name of the asset in the catalog (e.g. the asset table name).
77
+ file_name: Name of the local file that contains the contents of the asset.
78
+ asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
79
+ asset_types: A list of terms from the Asset_Type controlled vocabulary.
80
+ asset_rid: The RID of the asset if it has been uploaded into an asset table
81
+ """
81
82
 
82
- def __new__(
83
- cls,
84
- asset_path,
85
- asset_name: str,
86
- file_name: str,
87
- asset_metadata: dict[str, Any],
88
- asset_types: list[str] | str,
89
- asset_rid: Optional[RID] = None,
90
- ):
83
+ def __init__(
84
+ self,
85
+ asset_path: str | Path,
86
+ asset_name: str,
87
+ file_name: str,
88
+ asset_metadata: dict[str, Any],
89
+ asset_types: list[str] | str,
90
+ asset_rid: Optional["RID"] = None,
91
+ ):
92
+ super().__init__(asset_path)
93
+ # These assignments happen after __new__ returns the instance
94
+ self.asset_name = asset_name
95
+ self.file_name = file_name
96
+ self.asset_metadata = asset_metadata
97
+ self.asset_types = (
98
+ asset_types if isinstance(asset_types, list) else [asset_types]
99
+ )
100
+ self.asset_rid = asset_rid
101
+ else:
102
+
103
+ class AssetFilePath(type(Path())):
91
104
  """
92
105
  Create a new Path object that has additional information related to the use of this path as an asset.
93
106
 
@@ -99,15 +112,26 @@ class AssetFilePath(type(Path())):
99
112
  asset_types: A list of terms from the Asset_Type controlled vocabulary.
100
113
  asset_rid: The RID of the asset if it has been uploaded into an asset table
101
114
  """
102
- obj = super().__new__(cls, asset_path)
103
- obj.asset_types = (
104
- asset_types if isinstance(asset_types, list) else [asset_types]
105
- )
106
- obj.asset_metadata = asset_metadata
107
- obj.asset_name = asset_name
108
- obj.file_name = file_name
109
- obj.asset_rid = asset_rid
110
- return obj
115
+
116
+ def __new__(
117
+ cls,
118
+ asset_path: str | Path,
119
+ asset_name: str,
120
+ file_name: str,
121
+ asset_metadata: dict[str, Any],
122
+ asset_types: list[str] | str,
123
+ asset_rid: Optional["RID"] = None,
124
+ ):
125
+ # Only pass the path to the base Path class
126
+ obj = super().__new__(cls, asset_path)
127
+ obj.asset_name = asset_name
128
+ obj.file_name = file_name
129
+ obj.asset_metadata = asset_metadata
130
+ obj.asset_types = (
131
+ asset_types if isinstance(asset_types, list) else [asset_types]
132
+ )
133
+ obj.asset_rid = asset_rid
134
+ return obj
111
135
 
112
136
 
113
137
  class Execution:
@@ -154,7 +178,7 @@ class Execution:
154
178
  Args:
155
179
  configuration: Execution configuration object that describes the execution.
156
180
  ml_object: The DerivaML instance that created the execution.
157
- reload: RID of previously initialized execution object.
181
+ reload: RID of a previously initialized execution object.
158
182
  """
159
183
  self.asset_paths: list[AssetFilePath] = []
160
184
  self.configuration = configuration
@@ -237,9 +261,9 @@ class Execution:
237
261
 
238
262
  def _save_runtime_environment(self):
239
263
  runtime_env_path = self.asset_file_path(
240
- asset_name="Execution_Metadata",
241
- file_name=f"environment_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
242
- asset_types=ExecMetadataVocab.runtime_env.value,
264
+ "Execution_Metadata",
265
+ f"environment_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
266
+ ExecMetadataType.runtime_env.value,
243
267
  )
244
268
  with open(runtime_env_path, "w") as fp:
245
269
  json.dump(get_execution_environment(), fp)
@@ -296,15 +320,19 @@ class Execution:
296
320
  # Save configuration details for later upload
297
321
  if not reload:
298
322
  cfile = self.asset_file_path(
299
- asset_name=MLAsset.execution_metadata,
300
- file_name="configuration.json",
301
- asset_types=ExecMetadataVocab.execution_config.value,
323
+ MLAsset.execution_metadata,
324
+ "configuration.json",
325
+ ExecMetadataType.execution_config.value,
302
326
  )
303
327
  with open(cfile.as_posix(), "w", encoding="utf-8") as config_file:
304
328
  json.dump(self.configuration.model_dump(), config_file)
305
329
 
306
330
  for parameter_file in self.configuration.parameters:
307
- self.asset_file_path(MLAsset.execution_assets, parameter_file)
331
+ self.asset_file_path(
332
+ MLAsset.execution_asset,
333
+ parameter_file,
334
+ ExecAssetType.input_file.value,
335
+ )
308
336
 
309
337
  # save runtime env
310
338
  self._save_runtime_environment()
@@ -471,7 +499,7 @@ class Execution:
471
499
  """Download an asset from a URL and place it in a local directory.
472
500
 
473
501
  Args:
474
- asset_rid: URL of the asset.
502
+ asset_rid: RID of the asset.
475
503
  dest_dir: Destination directory for the asset.
476
504
  update_catalog: Whether to update the catalog execution information after downloading.
477
505
 
@@ -651,20 +679,9 @@ class Execution:
651
679
  with open(feature_file, "r") as feature_values:
652
680
  entities = [json.loads(line.strip()) for line in feature_values]
653
681
  # Update the asset columns in the feature and add to the catalog.
654
- try:
655
- self._ml_object.domain_path.tables[feature_table].insert(
656
- [map_path(e) for e in entities]
657
- )
658
- except DataPathException as e:
659
- if re.match(
660
- rf'DETAIL: +Key +\("Execution", +"{target_table}", +"Feature_Name"\)=\(.*\) already exists',
661
- e.message,
662
- ):
663
- self._logger.info(
664
- f"Skipping reload of feature values for {feature_table}"
665
- )
666
- else:
667
- raise e
682
+ self._ml_object.domain_path.tables[feature_table].insert(
683
+ [map_path(e) for e in entities], on_conflict_skip=True
684
+ )
668
685
 
669
686
  def _update_asset_execution_table(
670
687
  self,
@@ -689,27 +706,17 @@ class Execution:
689
706
  asset_exe = self._model.find_association(asset_table_name, "Execution")
690
707
  asset_exe_path = pb.schemas[asset_exe.schema.name].tables[asset_exe.name]
691
708
 
692
- try:
693
- asset_exe_path.insert(
694
- [
695
- {
696
- asset_table_name: asset_path.asset_rid,
697
- "Execution": self.execution_rid,
698
- "Asset_Role": asset_role,
699
- }
700
- for asset_path in asset_list
701
- ]
702
- )
703
- except DataPathException as e:
704
- if re.match(
705
- rf'DETAIL: +Key +\("{asset_table_name}", +"Execution"\)=\(.*\) already exists',
706
- e.message,
707
- ):
708
- self._logger.info(
709
- f"Skipping reload of execution assocations for {asset_table_name}"
710
- )
711
- else:
712
- raise e
709
+ asset_exe_path.insert(
710
+ [
711
+ {
712
+ asset_table_name: asset_path.asset_rid,
713
+ "Execution": self.execution_rid,
714
+ "Asset_Role": asset_role,
715
+ }
716
+ for asset_path in asset_list
717
+ ],
718
+ on_conflict_skip=True,
719
+ )
713
720
 
714
721
  # Now add in the type names via the asset_asset_type association table.
715
722
  # Get the list of types for each file in the asset.
@@ -735,24 +742,15 @@ class Execution:
735
742
  type_path = pb.schemas[asset_asset_type.schema.name].tables[
736
743
  asset_asset_type.name
737
744
  ]
738
- try:
739
- type_path.insert(
740
- [
741
- {asset_table_name: asset.asset_rid, "Asset_Type": t}
742
- for asset in asset_list
743
- for t in asset_type_map[asset.file_name]
744
- ]
745
- )
746
- except DataPathException as e:
747
- if re.match(
748
- rf'DETAIL: +Key +\("{asset_table_name}", +"Asset_Type"\)=\(.*\) already exists',
749
- e.message,
750
- ):
751
- self._logger.info(
752
- f"Skipping reload of execution asset types for {asset_table_name}"
753
- )
754
- else:
755
- raise e
745
+
746
+ type_path.insert(
747
+ [
748
+ {asset_table_name: asset.asset_rid, "Asset_Type": t}
749
+ for asset in asset_list
750
+ for t in asset_type_map[asset.file_name]
751
+ ],
752
+ on_conflict_skip=True,
753
+ )
756
754
 
757
755
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
758
756
  def asset_file_path(
@@ -264,7 +264,7 @@ class Workflow(BaseModel):
264
264
  checksum = os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"]
265
265
  is_notebook = True
266
266
  else:
267
- path, is_notebook = Workflow._get_notebook_path()
267
+ path, is_notebook = Workflow._get_python_script()
268
268
  github_url, checksum = Workflow.get_url_and_checksum(path)
269
269
 
270
270
  return Workflow(
@@ -325,14 +325,15 @@ class ExecutionConfiguration(BaseModel):
325
325
  should be materialized.
326
326
  assets: List of assets to be downloaded prior to execution. The values must be RIDs in an asset table
327
327
  parameters: Either a dictionary or a path to a JSON file that contains configuration parameters for the execution.
328
- workflow: A RID for a workflow instance. Must have a name, URI to the workflow instance, and a type.
328
+ workflow: Either a Workflow object, or a RID for a workflow instance.
329
+ parameters: Either a dictionary or a path to a JSON file that contains configuration parameters for the execution.
329
330
  description: A description of the execution. Can use Markdown format.
330
331
  """
331
332
 
332
333
  datasets: conlist(DatasetSpec) = []
333
334
  assets: list[RID | str] = [] # List of RIDs to model files.
334
335
  workflow: RID | Workflow
335
- parameters: dict[str, Any] = {}
336
+ parameters: dict[str, Any] | Path = {}
336
337
  description: str = ""
337
338
  argv: conlist(str) = Field(default_factory=lambda: sys.argv)
338
339
 
@@ -341,7 +342,7 @@ class ExecutionConfiguration(BaseModel):
341
342
  @field_validator("parameters", mode="before")
342
343
  @classmethod
343
344
  def validate_parameters(cls, value: Any) -> Any:
344
- """If parameter is a file, assume that it has JSON contents for configuration parameters"""
345
+ """If a parameter is a file, assume that it has JSON contents for configuration parameters"""
345
346
  if isinstance(value, str) or isinstance(value, Path):
346
347
  with open(value, "r") as f:
347
348
  return json.load(f)
deriva_ml/run_notebook.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Module to run a notebook using papermill"""
2
2
 
3
+ from datetime import datetime
3
4
  import json
4
5
  import os
5
6
  import papermill as pm
@@ -7,9 +8,9 @@ from pathlib import Path
7
8
  import regex as re
8
9
  import tempfile
9
10
 
10
- from deriva_ml import Workflow, DerivaML, MLVocab
11
+ from deriva_ml import Workflow, DerivaML
11
12
  from deriva.core import BaseCLI
12
- from deriva_ml import MLAsset
13
+ from deriva_ml import MLAsset, ExecAssetType
13
14
 
14
15
 
15
16
  class DerivaMLRunNotebookCLI(BaseCLI):
@@ -36,6 +37,19 @@ class DerivaMLRunNotebookCLI(BaseCLI):
36
37
  help="Display parameters information for the given notebook path.",
37
38
  )
38
39
 
40
+ self.parser.add_argument(
41
+ "--log-output",
42
+ action="store_false",
43
+ help="Display logging output from notebook.",
44
+ )
45
+
46
+ self.parser.add_argument(
47
+ "--catalog",
48
+ metavar="<1>",
49
+ default=1,
50
+ help="Catalog number. Default 1",
51
+ )
52
+
39
53
  self.parser.add_argument(
40
54
  "--parameter",
41
55
  "-p",
@@ -43,7 +57,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
43
57
  action="append",
44
58
  metavar=("KEY", "VALUE"),
45
59
  default=[],
46
- help="Provide a parameter name band value to inject into the notebook.",
60
+ help="Provide a parameter name and value to inject into the notebook.",
47
61
  )
48
62
 
49
63
  self.parser.add_argument(
@@ -84,19 +98,24 @@ class DerivaMLRunNotebookCLI(BaseCLI):
84
98
  print("Notebook file must be an ipynb file.")
85
99
  exit(1)
86
100
 
101
+ os.environ["DERIVA_HOST"] = args.host
102
+ os.environ["DERIVA_CATALOG_ID"] = args.catalog
103
+
87
104
  # Create a workflow instance for this specific version of the script. Return an existing workflow if one is found.
88
105
  notebook_parameters = pm.inspect_notebook(notebook_file)
89
106
  if args.inspect:
90
- for param, value in notebook_parameters:
107
+ for param, value in notebook_parameters.items():
91
108
  print(
92
109
  f"{param}:{value['inferred_type_name']} (default {value['default']})"
93
110
  )
94
111
  return
95
112
  else:
96
- notebook_parameters = {
97
- k: v["default"] for k, v in notebook_parameters.items()
98
- } | parameters
99
- print(f"Running notebook {notebook_file.name} with paremeters:")
113
+ notebook_parameters = (
114
+ {"host": args.host, "catalog": args.catalog}
115
+ | {k: v["default"] for k, v in notebook_parameters.items()}
116
+ | parameters
117
+ )
118
+ print(f"Running notebook {notebook_file.name} with parameters:")
100
119
  for param, value in notebook_parameters.items():
101
120
  print(f" {param}:{value}")
102
121
  self.run_notebook(notebook_file.resolve(), parameters, args.kernel)
@@ -121,7 +140,7 @@ class DerivaMLRunNotebookCLI(BaseCLI):
121
140
  r"Execution RID: https://(?P<host>.*)/id/(?P<catalog_id>.*)/(?P<execution_rid>[\w-]+)",
122
141
  line,
123
142
  ):
124
- host = m["host"]
143
+ hostname = m["host"]
125
144
  catalog_id = m["catalog_id"]
126
145
  execution_rid = m["execution_rid"]
127
146
  if not execution_rid:
@@ -129,19 +148,24 @@ class DerivaMLRunNotebookCLI(BaseCLI):
129
148
  exit(1)
130
149
  print("Uploaded notebook output for Execution RID:", execution_rid)
131
150
 
132
- ml_instance = DerivaML(hostname=host, catalog_id=catalog_id)
133
- ml_instance.add_term(
134
- MLVocab.asset_type,
135
- "Notebook_Output",
136
- description="Jupyter Notebook Output",
137
- )
151
+ ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id)
152
+
138
153
  execution = ml_instance.restore_execution(execution_rid)
139
154
  execution.asset_file_path(
140
155
  asset_name=MLAsset.execution_asset,
141
156
  file_name=notebook_output,
142
- asset_types=["Notebook_Output"],
157
+ asset_types=ExecAssetType.notebook_output,
143
158
  )
159
+ parameter_file = execution.asset_file_path(
160
+ asset_name=MLAsset.execution_asset,
161
+ file_name=f"notebook-parameters-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json",
162
+ asset_types=ExecAssetType.input_file.value,
163
+ )
164
+ with open(parameter_file, "w") as f:
165
+ json.dump(parameters, f)
166
+
144
167
  execution.upload_execution_outputs()
168
+ print(ml_instance.cite(execution_rid))
145
169
 
146
170
 
147
171
  def main():
@@ -0,0 +1 @@
1
+ {"local-file": "My local file.txt"}
@@ -0,0 +1,5 @@
1
+ {
2
+ "assets": ["2-7J8M"],
3
+ "datasets": ["2-7K8W"],
4
+ "parameters": "test-files/execution-parameters.json"
5
+ }
@@ -3,15 +3,21 @@
3
3
  {
4
4
  "cell_type": "code",
5
5
  "id": "0",
6
- "metadata": {},
6
+ "metadata": {
7
+ "ExecuteTime": {
8
+ "end_time": "2025-04-18T22:52:49.930351Z",
9
+ "start_time": "2025-04-18T22:52:48.926842Z"
10
+ }
11
+ },
7
12
  "source": [
8
13
  "import builtins\n",
14
+ "import os\n",
15
+ "\n",
9
16
  "from deriva.core.utils.globus_auth_utils import GlobusNativeLogin\n",
10
- "from deriva_ml import ExecutionConfiguration, MLVocab, DerivaSystemColumns, DatasetSpec, DerivaML, Workflow\n",
11
- "from deriva_ml.demo_catalog import create_demo_catalog, DemoML"
17
+ "from deriva_ml import ExecutionConfiguration, MLVocab, DerivaML, DatasetSpec"
12
18
  ],
13
19
  "outputs": [],
14
- "execution_count": null
20
+ "execution_count": 1
15
21
  },
16
22
  {
17
23
  "cell_type": "code",
@@ -19,36 +25,61 @@
19
25
  "metadata": {
20
26
  "tags": [
21
27
  "parameters"
22
- ]
28
+ ],
29
+ "ExecuteTime": {
30
+ "end_time": "2025-04-18T22:52:49.988873Z",
31
+ "start_time": "2025-04-18T22:52:49.986713Z"
32
+ }
23
33
  },
24
34
  "source": [
25
35
  "foo: int = 1\n",
26
- "bar: str = \"hello\"\n",
27
- "list_parameter: list[float] = [1, 2, 3]"
36
+ "assets = []\n",
37
+ "datasets = []\n",
38
+ "parameters = None"
28
39
  ],
29
40
  "outputs": [],
30
- "execution_count": null
41
+ "execution_count": 2
31
42
  },
32
43
  {
33
- "metadata": {},
44
+ "metadata": {
45
+ "ExecuteTime": {
46
+ "end_time": "2025-04-18T22:52:50.002808Z",
47
+ "start_time": "2025-04-18T22:52:49.999450Z"
48
+ }
49
+ },
34
50
  "cell_type": "code",
35
- "outputs": [],
36
- "execution_count": null,
37
51
  "source": [
38
- "print('foo', foo)\n",
39
- "print('bar', bar)\n",
40
- "print('list_parameter', list_parameter)"
52
+ "print(\"foo\", foo)\n",
53
+ "print(\"assets\", assets)\n",
54
+ "print(\"datasets\", datasets)\n",
55
+ "print(\"parameters\", parameters)"
41
56
  ],
42
- "id": "70b23cdd933ce669"
57
+ "id": "70b23cdd933ce669",
58
+ "outputs": [
59
+ {
60
+ "name": "stdout",
61
+ "output_type": "stream",
62
+ "text": [
63
+ "foo 1\n",
64
+ "assets []\n",
65
+ "datasets []\n",
66
+ "parameters None\n"
67
+ ]
68
+ }
69
+ ],
70
+ "execution_count": 3
43
71
  },
44
72
  {
45
- "metadata": {},
73
+ "metadata": {
74
+ "ExecuteTime": {
75
+ "end_time": "2025-04-18T22:52:50.344660Z",
76
+ "start_time": "2025-04-18T22:52:50.013816Z"
77
+ }
78
+ },
46
79
  "cell_type": "code",
47
- "outputs": [],
48
- "execution_count": null,
49
80
  "source": [
50
- "hostname = 'dev.eye-ai.org'\n",
51
- "domain_schema = 'eye-ai'\n",
81
+ "hostname = os.environ.get(\"DERIVA_HOST\") #or \"dev.eye-ai.org\"\n",
82
+ "catalog_id = os.environ.get(\"DERIVA_CATALOG_ID\") #or 'eye-ai'\n",
52
83
  "\n",
53
84
  "gnl = GlobusNativeLogin(host=hostname)\n",
54
85
  "if gnl.is_logged_in([hostname]):\n",
@@ -57,15 +88,31 @@
57
88
  " gnl.login([hostname], no_local_server=True, no_browser=True, refresh_tokens=True, update_bdbag_keychain=True)\n",
58
89
  " print(\"Login Successful\")\n"
59
90
  ],
60
- "id": "2"
91
+ "id": "2",
92
+ "outputs": [
93
+ {
94
+ "ename": "AttributeError",
95
+ "evalue": "'NoneType' object has no attribute 'lower'",
96
+ "output_type": "error",
97
+ "traceback": [
98
+ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
99
+ "\u001B[0;31mAttributeError\u001B[0m Traceback (most recent call last)",
100
+ "Cell \u001B[0;32mIn[4], line 5\u001B[0m\n\u001B[1;32m 2\u001B[0m catalog_id \u001B[38;5;241m=\u001B[39m os\u001B[38;5;241m.\u001B[39menviron\u001B[38;5;241m.\u001B[39mget(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mDERIVA_CATALOG_ID\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;66;03m#or 'eye-ai'\u001B[39;00m\n\u001B[1;32m 4\u001B[0m gnl \u001B[38;5;241m=\u001B[39m GlobusNativeLogin(host\u001B[38;5;241m=\u001B[39mhostname)\n\u001B[0;32m----> 5\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[43mgnl\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mis_logged_in\u001B[49m\u001B[43m(\u001B[49m\u001B[43m[\u001B[49m\u001B[43mhostname\u001B[49m\u001B[43m]\u001B[49m\u001B[43m)\u001B[49m:\n\u001B[1;32m 6\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mYou are already logged in.\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 7\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n",
101
+ "File \u001B[0;32m~/opt/anaconda3/envs/deriva-test/lib/python3.10/site-packages/deriva/core/utils/globus_auth_utils.py:582\u001B[0m, in \u001B[0;36mGlobusNativeLogin.is_logged_in\u001B[0;34m(self, hosts, requested_scopes, hosts_to_scope_map, exclude_defaults)\u001B[0m\n\u001B[1;32m 576\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[38;5;21mis_logged_in\u001B[39m(\u001B[38;5;28mself\u001B[39m,\n\u001B[1;32m 577\u001B[0m hosts\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m 578\u001B[0m requested_scopes\u001B[38;5;241m=\u001B[39m(),\n\u001B[1;32m 579\u001B[0m hosts_to_scope_map\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m 580\u001B[0m exclude_defaults\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mFalse\u001B[39;00m):\n\u001B[1;32m 581\u001B[0m scopes \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mset\u001B[39m(requested_scopes)\n\u001B[0;32m--> 582\u001B[0m scope_map \u001B[38;5;241m=\u001B[39m hosts_to_scope_map \u001B[38;5;28;01mif\u001B[39;00m hosts_to_scope_map \u001B[38;5;28;01melse\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mhosts_to_scope_map\u001B[49m\u001B[43m(\u001B[49m\u001B[43mhosts\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;129;43;01mor\u001B[39;49;00m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mhosts\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 583\u001B[0m scopes\u001B[38;5;241m.\u001B[39mupdate(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mscope_set_from_scope_map(scope_map))\n\u001B[1;32m 584\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m exclude_defaults:\n",
102
+ "File \u001B[0;32m~/opt/anaconda3/envs/deriva-test/lib/python3.10/site-packages/deriva/core/utils/globus_auth_utils.py:607\u001B[0m, in \u001B[0;36mGlobusNativeLogin.hosts_to_scope_map\u001B[0;34m(self, hosts, match_scope_tag, all_tagged_scopes, force_refresh, warn_on_discovery_failure)\u001B[0m\n\u001B[1;32m 605\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m host \u001B[38;5;129;01min\u001B[39;00m hosts:\n\u001B[1;32m 606\u001B[0m scope_map\u001B[38;5;241m.\u001B[39mupdate({host: []})\n\u001B[0;32m--> 607\u001B[0m scopes \u001B[38;5;241m=\u001B[39m \u001B[43mget_oauth_scopes_for_host\u001B[49m\u001B[43m(\u001B[49m\u001B[43mhost\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 608\u001B[0m \u001B[43m \u001B[49m\u001B[43mconfig_file\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mconfig_file\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 609\u001B[0m \u001B[43m \u001B[49m\u001B[43mforce_refresh\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mforce_refresh\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 610\u001B[0m \u001B[43m \u001B[49m\u001B[43mwarn_on_discovery_failure\u001B[49m\u001B[38;5;241;43m=\u001B[39;49m\u001B[43mwarn_on_discovery_failure\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 611\u001B[0m scope_list \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mlist\u001B[39m()\n\u001B[1;32m 612\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m scopes:\n",
103
+ "File \u001B[0;32m~/opt/anaconda3/envs/deriva-test/lib/python3.10/site-packages/deriva/core/utils/core_utils.py:300\u001B[0m, in \u001B[0;36mget_oauth_scopes_for_host\u001B[0;34m(host, config_file, force_refresh, warn_on_discovery_failure)\u001B[0m\n\u001B[1;32m 298\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m required_scopes:\n\u001B[1;32m 299\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m hostname, scopes \u001B[38;5;129;01min\u001B[39;00m required_scopes\u001B[38;5;241m.\u001B[39mitems():\n\u001B[0;32m--> 300\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[43mhost\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mlower\u001B[49m() \u001B[38;5;241m==\u001B[39m hostname\u001B[38;5;241m.\u001B[39mlower():\n\u001B[1;32m 301\u001B[0m result \u001B[38;5;241m=\u001B[39m scopes\n\u001B[1;32m 302\u001B[0m \u001B[38;5;28;01mbreak\u001B[39;00m\n",
104
+ "\u001B[0;31mAttributeError\u001B[0m: 'NoneType' object has no attribute 'lower'"
105
+ ]
106
+ }
107
+ ],
108
+ "execution_count": 4
61
109
  },
62
110
  {
63
111
  "cell_type": "code",
64
112
  "id": "3",
65
113
  "metadata": {},
66
114
  "source": [
67
- "ml_instance = DemoML(hostname, domain_schema)\n",
68
- "print(f'Creating catalog at {ml_instance.catalog_id}')\n",
115
+ "ml_instance = DerivaML(hostname, catalog_id)\n",
69
116
  "\n",
70
117
  "ml_instance.add_term(MLVocab.workflow_type, \"Manual Workflow\", description=\"Initial setup of Model File\")\n",
71
118
  "ml_instance.add_term(MLVocab.asset_type, \"API_Model\", description=\"Model for our API workflow\")"
@@ -92,12 +139,38 @@
92
139
  "id": "6",
93
140
  "metadata": {},
94
141
  "source": [
95
- "manual_execution = ml_instance.create_execution(ExecutionConfiguration( description=\"Sample Execution\", workflow=api_workflow))\n",
96
- "manual_execution.upload_execution_outputs()\n",
97
- "# Now lets create model configuration for our program."
142
+ "manual_execution = ml_instance.create_execution(\n",
143
+ " ExecutionConfiguration(\n",
144
+ " description=\"Sample Execution\",\n",
145
+ " workflow=api_workflow,\n",
146
+ " datasets=[DatasetSpec(rid=ds, version=ml_instance.dataset_version(ds)) for ds in datasets],\n",
147
+ " assets=assets,\n",
148
+ " parameters=parameters\n",
149
+ " )\n",
150
+ ")"
98
151
  ],
99
152
  "outputs": [],
100
153
  "execution_count": null
154
+ },
155
+ {
156
+ "metadata": {},
157
+ "cell_type": "code",
158
+ "source": [
159
+ "print(f'parameters: {manual_execution.parameters}')\n",
160
+ "print(f'datasets: {manual_execution.datasets}')\n",
161
+ "print(f'assets: {manual_execution.asset_paths}')"
162
+ ],
163
+ "id": "4b2a3b8c16333645",
164
+ "outputs": [],
165
+ "execution_count": null
166
+ },
167
+ {
168
+ "metadata": {},
169
+ "cell_type": "code",
170
+ "source": "manual_execution.upload_execution_outputs()",
171
+ "id": "efa8cb1b0ed438bb",
172
+ "outputs": [],
173
+ "execution_count": null
101
174
  }
102
175
  ],
103
176
  "metadata": {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.13.0
3
+ Version: 1.13.2
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
@@ -1,29 +1,31 @@
1
- deriva_ml/__init__.py,sha256=2sRcX2s72Guo4M7IGW_0_ZyKokZNCbVE6de65tvHBlw,1109
1
+ deriva_ml/__init__.py,sha256=GfneBq7xDphMqUQY96sW9ixRj74M3UTUCmD4KMIRSaM,1101
2
2
  deriva_ml/database_model.py,sha256=lMbAEqn4n0m7h_JstMX_LX9gbvBIEydG3sRilPn3eLU,14885
3
- deriva_ml/dataset.py,sha256=OyWUKWnYeP0ctimSBQ4em-uJrzCNOohx4GPT2uIl6R4,60649
3
+ deriva_ml/dataset.py,sha256=W1TSHgkdXNw2v5hC0UBrivCKadMK1LaFd6YIjHE9jZA,60786
4
4
  deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
5
5
  deriva_ml/dataset_bag.py,sha256=yS8oYVshfFtRDyhGPRqtbvxjyd3ZFF29lrB783OP4vM,11849
6
6
  deriva_ml/demo_catalog.py,sha256=9Qo3JD4bUIwnL3ngPctc2QBeWApvMR_5UyaK9ockTrY,11536
7
- deriva_ml/deriva_definitions.py,sha256=MZl3c23gArbS-0HZ24VDAyb8HI2Kcb8hFdhSnBLOLfo,9030
8
- deriva_ml/deriva_ml_base.py,sha256=JYTG_a8SURhrPQBTz6OaGMk0D0sSPWpXqCnoVnSNViI,38501
7
+ deriva_ml/deriva_definitions.py,sha256=avdOgxtB60yb8XsWm-AYtCdvg2QkQbyfkZuA9xx9t2U,9221
8
+ deriva_ml/deriva_ml_base.py,sha256=FYSTQl4mNePC8IxC70rS5D0VmLNPccfFkkiVneDxJpY,38678
9
9
  deriva_ml/deriva_model.py,sha256=wytGCAHutiUaRfnRKr80Ks_P6ci0_wXRU3vq3lthfYU,13260
10
- deriva_ml/execution.py,sha256=t20sGqPRcUaG-5LLHPaQ01pPP8XpqiCveS1h-Fw_XbQ,38093
11
- deriva_ml/execution_configuration.py,sha256=WiA4PPijNZUftExN6Qm1YScVD1OY3depNKTutIwOfUg,14063
10
+ deriva_ml/execution.py,sha256=otMkdjF15SEWg99mvWrTpnKz7-BWp9b8XbFf6iwfmtg,37697
11
+ deriva_ml/execution_configuration.py,sha256=7fiIbtzz9nmkxA9-GTiN6Ln2twfaOLivwJwGZb8gAL0,14163
12
12
  deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
13
13
  deriva_ml/feature.py,sha256=07g0uSrhumdopJluWuWSRMrzagaikAOihqB09bzXBP4,5475
14
14
  deriva_ml/history.py,sha256=qTDLDs8Ow_6r7mDO0gZm0Fg81SWKOAgtCU5pzZoDRgM,2828
15
- deriva_ml/run_notebook.py,sha256=XzI38WNsu9CKDYbWMt8b5ODtlp27dsWsSuMkKwfeWOE,5484
15
+ deriva_ml/run_notebook.py,sha256=vhmij4P1Va52MIj8hOc-WmjLRp3sTmK6p7LXCWrzejc,6308
16
16
  deriva_ml/test_functions.py,sha256=-eqLHjjCQCLBNAr1ofbZekNiCOfMISSACRxT_YHER8I,4396
17
- deriva_ml/test_notebook.ipynb,sha256=CatQIh9whsmYWGpwuyw9XMggQ9-TlCueTyH3Wiv4aBc,3116
17
+ deriva_ml/test_notebook.ipynb,sha256=_5D6rkSGbmENPJZbDgfZ6-yt94BNEwxytVUDmG3RE3w,10166
18
18
  deriva_ml/upload.py,sha256=gHTGXAVlf56EwNzmw5zY0gbBf8h08eU2q2GBbb2FdVc,16087
19
19
  deriva_ml/schema_setup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  deriva_ml/schema_setup/annotations.py,sha256=v0gTpmWYxRqsQ-bcnQzsr8WowGv2pi9pZUsO3WWnu1U,9528
21
21
  deriva_ml/schema_setup/create_schema.py,sha256=hNMc-v5tferd0UjfdB6nBw7Rc_o-Mg6NkPqQGie9YOw,11700
22
22
  deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
23
23
  deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
24
- deriva_ml-1.13.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
25
- deriva_ml-1.13.0.dist-info/METADATA,sha256=YxPB1VnpB-Y8KL4Yp3VKAYq7F5EUp-R7MfZ1uhWpRZs,999
26
- deriva_ml-1.13.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
27
- deriva_ml-1.13.0.dist-info/entry_points.txt,sha256=cJnALMa6pjdk6RQCt4HFbKHqALpVa0k6wPeQDPedLJI,295
28
- deriva_ml-1.13.0.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
29
- deriva_ml-1.13.0.dist-info/RECORD,,
24
+ deriva_ml/test-files/execution-parameters.json,sha256=1vBqXlaMa0cysonE20TweVDfTGRdSi9CUuAkW1xiYNo,36
25
+ deriva_ml/test-files/notebook-parameters.json,sha256=7uEE2sLQSrSc9cEGQ_RKE7t5dwkEYv0qLo5mRbzo8Og,108
26
+ deriva_ml-1.13.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
+ deriva_ml-1.13.2.dist-info/METADATA,sha256=uuvCztFgxOwWM34egjr65pW8-2pYGCtV_xofT5TmcLg,999
28
+ deriva_ml-1.13.2.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
29
+ deriva_ml-1.13.2.dist-info/entry_points.txt,sha256=cJnALMa6pjdk6RQCt4HFbKHqALpVa0k6wPeQDPedLJI,295
30
+ deriva_ml-1.13.2.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
31
+ deriva_ml-1.13.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5