deriva-ml 1.10.1__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/database_model.py +3 -2
- deriva_ml/dataset.py +7 -16
- deriva_ml/dataset_bag.py +10 -3
- deriva_ml/demo_catalog.py +84 -78
- deriva_ml/deriva_definitions.py +2 -2
- deriva_ml/deriva_ml_base.py +105 -132
- deriva_ml/deriva_model.py +31 -0
- deriva_ml/execution.py +422 -315
- deriva_ml/execution_configuration.py +4 -0
- deriva_ml/feature.py +1 -2
- deriva_ml/schema_setup/create_schema.py +223 -183
- deriva_ml/upload.py +99 -236
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/METADATA +3 -1
- deriva_ml-1.12.0.dist-info/RECORD +27 -0
- deriva_ml-1.10.1.dist-info/RECORD +0 -27
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/WHEEL +0 -0
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/entry_points.txt +0 -0
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.10.1.dist-info → deriva_ml-1.12.0.dist-info}/top_level.txt +0 -0
deriva_ml/execution.py
CHANGED
|
@@ -5,45 +5,37 @@ This module defined the Execution class which is used to interact with the state
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
|
|
7
7
|
from collections import defaultdict
|
|
8
|
-
import csv
|
|
9
8
|
import json
|
|
10
9
|
import logging
|
|
11
10
|
import os
|
|
12
11
|
import shutil
|
|
13
12
|
from datetime import datetime
|
|
14
13
|
from pathlib import Path
|
|
15
|
-
import requests
|
|
16
|
-
from tempfile import NamedTemporaryFile
|
|
17
14
|
from typing import Iterable, Any, Optional
|
|
15
|
+
|
|
18
16
|
from deriva.core import format_exception
|
|
19
|
-
from deriva.core.ermrest_model import Table
|
|
20
17
|
from pydantic import validate_call, ConfigDict
|
|
21
18
|
import sys
|
|
19
|
+
from deriva.core.hatrac_store import HatracStore
|
|
22
20
|
|
|
23
|
-
from .deriva_definitions import
|
|
24
|
-
from .deriva_definitions import
|
|
25
|
-
RID,
|
|
26
|
-
Status,
|
|
27
|
-
FileUploadState,
|
|
28
|
-
UploadState,
|
|
29
|
-
DerivaMLException,
|
|
30
|
-
)
|
|
21
|
+
from .deriva_definitions import ExecMetadataVocab
|
|
22
|
+
from .deriva_definitions import RID, Status, FileUploadState, DerivaMLException, MLVocab
|
|
31
23
|
from .deriva_ml_base import DerivaML, FeatureRecord
|
|
32
24
|
from .dataset_aux_classes import DatasetSpec, DatasetVersion, VersionPart
|
|
33
25
|
from .dataset_bag import DatasetBag
|
|
34
26
|
from .execution_configuration import ExecutionConfiguration, Workflow
|
|
35
27
|
from .execution_environment import get_execution_environment
|
|
36
28
|
from .upload import (
|
|
37
|
-
execution_metadata_dir,
|
|
38
|
-
execution_asset_dir,
|
|
39
29
|
execution_root,
|
|
40
30
|
feature_root,
|
|
41
|
-
|
|
31
|
+
asset_root,
|
|
42
32
|
feature_value_path,
|
|
43
33
|
is_feature_dir,
|
|
44
|
-
is_feature_asset_dir,
|
|
45
34
|
table_path,
|
|
46
35
|
upload_directory,
|
|
36
|
+
normalize_asset_dir,
|
|
37
|
+
asset_file_path,
|
|
38
|
+
asset_type_path,
|
|
47
39
|
)
|
|
48
40
|
|
|
49
41
|
try:
|
|
@@ -60,6 +52,51 @@ except ImportError:
|
|
|
60
52
|
return []
|
|
61
53
|
|
|
62
54
|
|
|
55
|
+
class AssetFilePath(type(Path())):
|
|
56
|
+
"""Derived class of Path that also includes information about a downloaded.
|
|
57
|
+
|
|
58
|
+
An AssetFilePath has all the methods associated with a pathlib.Path object. In addition, it defines additional
|
|
59
|
+
attributes associated with a DerviaML asset.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
asset_types: A list of the types associated with this asset. From the Asset_Type controlled vocabulary.
|
|
63
|
+
asset_metadata: A dictionary of names and values of any additional columns associated with this asset.
|
|
64
|
+
asset_name: The name of the asset table
|
|
65
|
+
file_name: The name of the file in the local file system that has the asset contents
|
|
66
|
+
asset_rid: The RID of the asset if it has been uploaded into an asset table
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __new__(
|
|
70
|
+
cls,
|
|
71
|
+
asset_path,
|
|
72
|
+
asset_name: str,
|
|
73
|
+
file_name: str,
|
|
74
|
+
asset_metadata: dict[str, Any],
|
|
75
|
+
asset_types: list[str] | str,
|
|
76
|
+
asset_rid: Optional[RID] = None,
|
|
77
|
+
):
|
|
78
|
+
"""
|
|
79
|
+
Create a new Path object that has additional information related to the use of this path as an asset.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
asset_path: Local path to the location of the asset.
|
|
83
|
+
asset_name: The name of the asset in the catalog (e.g. the asset table name).
|
|
84
|
+
file_name: Name of the local file that contains the contents of the asset.
|
|
85
|
+
asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
|
|
86
|
+
asset_types: A list of terms from the Asset_Type controlled vocabulary.
|
|
87
|
+
asset_rid: The RID of the asset if it has been uploaded into an asset table
|
|
88
|
+
"""
|
|
89
|
+
obj = super().__new__(cls, asset_path)
|
|
90
|
+
obj.asset_types = (
|
|
91
|
+
asset_types if isinstance(asset_types, list) else [asset_types]
|
|
92
|
+
)
|
|
93
|
+
obj.asset_metadata = asset_metadata
|
|
94
|
+
obj.asset_name = asset_name
|
|
95
|
+
obj.file_name = file_name
|
|
96
|
+
obj.asset_rid = asset_rid
|
|
97
|
+
return obj
|
|
98
|
+
|
|
99
|
+
|
|
63
100
|
class Execution:
|
|
64
101
|
"""The Execution class is used to capture the context of an activity within DerivaML. While these are primarily
|
|
65
102
|
computational, manual processes can be represented by an execution as well.
|
|
@@ -102,18 +139,19 @@ class Execution:
|
|
|
102
139
|
"""
|
|
103
140
|
|
|
104
141
|
Args:
|
|
105
|
-
configuration:
|
|
106
|
-
ml_object:
|
|
142
|
+
configuration: Execution configuration object that describes the execution.
|
|
143
|
+
ml_object: The DerivaML instance that created the execution.
|
|
107
144
|
reload: RID of previously initialized execution object.
|
|
108
145
|
"""
|
|
109
|
-
self.asset_paths: list[
|
|
146
|
+
self.asset_paths: list[AssetFilePath] = []
|
|
110
147
|
self.configuration = configuration
|
|
111
148
|
self._ml_object = ml_object
|
|
149
|
+
self._model = ml_object.model
|
|
112
150
|
self._logger = ml_object._logger
|
|
113
151
|
self.start_time = None
|
|
114
152
|
self.stop_time = None
|
|
115
153
|
self.status = Status.created
|
|
116
|
-
self.uploaded_assets: list[
|
|
154
|
+
self.uploaded_assets: Optional[dict[str, list[AssetFilePath]]] = None
|
|
117
155
|
self.configuration.argv = sys.argv
|
|
118
156
|
|
|
119
157
|
self.dataset_rids: list[RID] = []
|
|
@@ -124,6 +162,7 @@ class Execution:
|
|
|
124
162
|
self._cache_dir = self._ml_object.cache_dir
|
|
125
163
|
self._dry_run = dry_run
|
|
126
164
|
|
|
165
|
+
# Make sure we have a good workflow.
|
|
127
166
|
if isinstance(self.configuration.workflow, Workflow):
|
|
128
167
|
self.workflow_rid = (
|
|
129
168
|
self._ml_object.add_workflow(self.configuration.workflow)
|
|
@@ -140,6 +179,7 @@ class Execution:
|
|
|
140
179
|
"Workflow specified in execution configuration is not a Workflow"
|
|
141
180
|
)
|
|
142
181
|
|
|
182
|
+
# Validate the datasets and assets to be valid.
|
|
143
183
|
for d in self.configuration.datasets:
|
|
144
184
|
if self._ml_object.resolve_rid(d.rid).table.name != "Dataset":
|
|
145
185
|
raise DerivaMLException(
|
|
@@ -147,9 +187,7 @@ class Execution:
|
|
|
147
187
|
)
|
|
148
188
|
|
|
149
189
|
for a in self.configuration.assets:
|
|
150
|
-
if not self._ml_object.
|
|
151
|
-
self._ml_object.resolve_rid(a).table.name
|
|
152
|
-
):
|
|
190
|
+
if not self._model.is_asset(self._ml_object.resolve_rid(a).table.name):
|
|
153
191
|
raise DerivaMLException(
|
|
154
192
|
"Asset specified in execution configuration is not a asset table"
|
|
155
193
|
)
|
|
@@ -176,15 +214,12 @@ class Execution:
|
|
|
176
214
|
self._initialize_execution(reload)
|
|
177
215
|
|
|
178
216
|
def _save_runtime_environment(self):
|
|
179
|
-
runtime_env_path =
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
suffix=".txt",
|
|
186
|
-
delete=False,
|
|
187
|
-
) as fp:
|
|
217
|
+
runtime_env_path = self.asset_file_path(
|
|
218
|
+
asset_name="Execution_Metadata",
|
|
219
|
+
file_name=f"environment_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
|
|
220
|
+
asset_types=ExecMetadataVocab.runtime_env.value,
|
|
221
|
+
)
|
|
222
|
+
with open(runtime_env_path, "w") as fp:
|
|
188
223
|
json.dump(get_execution_environment(), fp)
|
|
189
224
|
|
|
190
225
|
def _initialize_execution(self, reload: Optional[RID] = None) -> None:
|
|
@@ -206,6 +241,7 @@ class Execution:
|
|
|
206
241
|
)
|
|
207
242
|
self.datasets.append(self.download_dataset_bag(dataset))
|
|
208
243
|
self.dataset_rids.append(dataset.rid)
|
|
244
|
+
|
|
209
245
|
# Update execution info
|
|
210
246
|
schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
211
247
|
if self.dataset_rids and not (reload or self._dry_run):
|
|
@@ -218,17 +254,30 @@ class Execution:
|
|
|
218
254
|
|
|
219
255
|
# Download assets....
|
|
220
256
|
self.update_status(Status.running, "Downloading assets ...")
|
|
221
|
-
self.asset_paths =
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
257
|
+
self.asset_paths = {}
|
|
258
|
+
for asset_rid in self.configuration.assets:
|
|
259
|
+
asset_table = self._ml_object.resolve_rid(asset_rid).table.name
|
|
260
|
+
dest_dir = (
|
|
261
|
+
execution_root(self._ml_object.working_dir, self.execution_rid)
|
|
262
|
+
/ "downloaded-assets"
|
|
263
|
+
/ asset_table
|
|
264
|
+
)
|
|
265
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
266
|
+
self.asset_paths.setdefault(asset_table, []).append(
|
|
267
|
+
self.download_asset(
|
|
268
|
+
asset_rid=asset_rid,
|
|
269
|
+
dest_dir=dest_dir,
|
|
270
|
+
update_catalog=not (reload or self._dry_run),
|
|
271
|
+
)
|
|
272
|
+
)
|
|
227
273
|
|
|
228
274
|
# Save configuration details for later upload
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
275
|
+
cfile = self.asset_file_path(
|
|
276
|
+
asset_name="Execution_Metadata",
|
|
277
|
+
file_name="configuration.json",
|
|
278
|
+
asset_types=ExecMetadataVocab.execution_config.value,
|
|
279
|
+
)
|
|
280
|
+
with open(cfile.as_posix(), "w", encoding="utf-8") as config_file:
|
|
232
281
|
json.dump(self.configuration.model_dump(), config_file)
|
|
233
282
|
|
|
234
283
|
# save runtime env
|
|
@@ -237,6 +286,42 @@ class Execution:
|
|
|
237
286
|
self.start_time = datetime.now()
|
|
238
287
|
self.update_status(Status.pending, "Initialize status finished.")
|
|
239
288
|
|
|
289
|
+
@property
|
|
290
|
+
def _execution_root(self) -> Path:
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
:return:
|
|
297
|
+
|
|
298
|
+
"""
|
|
299
|
+
return execution_root(self._working_dir, self.execution_rid)
|
|
300
|
+
|
|
301
|
+
@property
|
|
302
|
+
def _feature_root(self) -> Path:
|
|
303
|
+
"""The root path to all execution specific files.
|
|
304
|
+
:return:
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
|
|
310
|
+
"""
|
|
311
|
+
return feature_root(self._working_dir, self.execution_rid)
|
|
312
|
+
|
|
313
|
+
@property
|
|
314
|
+
def _asset_root(self) -> Path:
|
|
315
|
+
"""The root path to all execution specific files.
|
|
316
|
+
:return:
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
|
|
322
|
+
"""
|
|
323
|
+
return asset_root(self._working_dir, self.execution_rid)
|
|
324
|
+
|
|
240
325
|
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
241
326
|
def download_dataset_bag(self, dataset: DatasetSpec) -> DatasetBag:
|
|
242
327
|
"""Given a RID to a dataset_table, or a MINID to an existing bag, download the bag file, extract it and validate
|
|
@@ -276,27 +361,6 @@ class Execution:
|
|
|
276
361
|
]
|
|
277
362
|
)
|
|
278
363
|
|
|
279
|
-
def _create_notebook_checkpoint(self):
|
|
280
|
-
"""Trigger a checkpoint creation using Jupyter's API."""
|
|
281
|
-
|
|
282
|
-
server, session = self._ml_object._get_notebook_session()
|
|
283
|
-
notebook_name = session["notebook"]["path"]
|
|
284
|
-
notebook_url = f"{server['url']}api/contents/{notebook_name}"
|
|
285
|
-
|
|
286
|
-
# Get notebook content
|
|
287
|
-
response = requests.get(
|
|
288
|
-
notebook_url, headers={"Authorization": f"Token {server['token']}"}
|
|
289
|
-
)
|
|
290
|
-
if response.status_code == 200:
|
|
291
|
-
notebook_content = response.json()["content"]
|
|
292
|
-
# Execution metadata cannot be in a directory, so map path into filename.
|
|
293
|
-
checkpoint_path = (
|
|
294
|
-
self.execution_metadata_path(ExecMetadataVocab.runtime_env.value)
|
|
295
|
-
/ f"{notebook_name.replace('/', '_')}.checkpoint"
|
|
296
|
-
)
|
|
297
|
-
with open(checkpoint_path, "w", encoding="utf-8") as f:
|
|
298
|
-
json.dump(notebook_content, f)
|
|
299
|
-
|
|
300
364
|
def execution_start(self) -> None:
|
|
301
365
|
"""Start an execution, uploading status to catalog"""
|
|
302
366
|
|
|
@@ -318,7 +382,7 @@ class Execution:
|
|
|
318
382
|
self._ml_object.ml_schema
|
|
319
383
|
].Execution.update([{"RID": self.execution_rid, "Duration": duration}])
|
|
320
384
|
|
|
321
|
-
def _upload_execution_dirs(self) -> dict[str,
|
|
385
|
+
def _upload_execution_dirs(self) -> dict[str, list[AssetFilePath]]:
|
|
322
386
|
"""Upload execution assets at _working_dir/Execution_asset.
|
|
323
387
|
|
|
324
388
|
This routine uploads the contents of the
|
|
@@ -332,86 +396,142 @@ class Execution:
|
|
|
332
396
|
DerivaMLException: If there is an issue uploading the assets.
|
|
333
397
|
"""
|
|
334
398
|
|
|
335
|
-
def asset_name(p: str) -> str:
|
|
336
|
-
return Path(*Path(p).parts[-2:]).as_posix()
|
|
337
|
-
|
|
338
399
|
try:
|
|
339
400
|
self.update_status(Status.running, "Uploading execution files...")
|
|
340
|
-
results = upload_directory(self.
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
execution_assets = [
|
|
344
|
-
r.result["RID"]
|
|
345
|
-
for r in results.values()
|
|
346
|
-
if r.state == UploadState.success and "Execution_Asset_Type" in r.result
|
|
347
|
-
]
|
|
348
|
-
execution_metadata = [
|
|
349
|
-
r.result["RID"]
|
|
350
|
-
for r in results.values()
|
|
351
|
-
if r.state == UploadState.success
|
|
352
|
-
and "Execution_Metadata_Type" in r.result
|
|
353
|
-
]
|
|
354
|
-
self._update_execution_asset_table(execution_assets)
|
|
355
|
-
self._update_execution_metadata_table(execution_metadata)
|
|
356
|
-
|
|
357
|
-
except Exception as e:
|
|
401
|
+
results = upload_directory(self._model, self._asset_root)
|
|
402
|
+
except RuntimeError as e:
|
|
358
403
|
error = format_exception(e)
|
|
359
404
|
self.update_status(Status.failed, error)
|
|
360
405
|
raise DerivaMLException(f"Fail to upload execution_assets. Error: {error}")
|
|
361
406
|
|
|
407
|
+
asset_map = {}
|
|
408
|
+
for path, status in results.items():
|
|
409
|
+
asset_table, file_name = normalize_asset_dir(path)
|
|
410
|
+
|
|
411
|
+
asset_map.setdefault(asset_table, []).append(
|
|
412
|
+
AssetFilePath(
|
|
413
|
+
asset_path=path,
|
|
414
|
+
asset_name=asset_table,
|
|
415
|
+
file_name=file_name,
|
|
416
|
+
asset_metadata={
|
|
417
|
+
k: v
|
|
418
|
+
for k, v in status.result.items()
|
|
419
|
+
if k in self._model.asset_metadata(asset_table.split("/")[1])
|
|
420
|
+
},
|
|
421
|
+
asset_types=[],
|
|
422
|
+
asset_rid=status.result["RID"],
|
|
423
|
+
)
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
self._update_asset_execution_table(asset_map)
|
|
362
427
|
self.update_status(Status.running, "Updating features...")
|
|
363
428
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
Returns:
|
|
373
|
-
|
|
374
|
-
"""
|
|
375
|
-
entries = list(directory.iterdir())
|
|
376
|
-
for entry in entries:
|
|
377
|
-
if entry.is_dir():
|
|
378
|
-
yield from traverse_bottom_up(entry)
|
|
379
|
-
yield directory
|
|
380
|
-
|
|
381
|
-
for p in traverse_bottom_up(self._feature_root):
|
|
382
|
-
if m := is_feature_asset_dir(p):
|
|
383
|
-
try:
|
|
384
|
-
self.update_status(
|
|
385
|
-
Status.running, f"Uploading feature {m['feature_name']}..."
|
|
386
|
-
)
|
|
387
|
-
feature_assets[m["target_table"], m["feature_name"]] = (
|
|
388
|
-
self._ml_object.upload_assets(p)
|
|
389
|
-
)
|
|
390
|
-
results |= feature_assets[m["target_table"], m["feature_name"]]
|
|
391
|
-
except Exception as e:
|
|
392
|
-
error = format_exception(e)
|
|
393
|
-
self.update_status(Status.failed, error)
|
|
394
|
-
raise DerivaMLException(
|
|
395
|
-
f"Fail to upload execution metadata. Error: {error}"
|
|
396
|
-
)
|
|
397
|
-
elif m := is_feature_dir(p):
|
|
398
|
-
files = [f for f in p.iterdir() if f.is_file()]
|
|
399
|
-
if files:
|
|
400
|
-
self._update_feature_table(
|
|
401
|
-
target_table=m["target_table"],
|
|
402
|
-
feature_name=m["feature_name"],
|
|
403
|
-
feature_file=files[0],
|
|
404
|
-
uploaded_files=feature_assets[
|
|
405
|
-
m["target_table"], m["feature_name"]
|
|
406
|
-
],
|
|
407
|
-
)
|
|
429
|
+
for p in self._feature_root.glob("**/*.jsonl"):
|
|
430
|
+
m = is_feature_dir(p.parent)
|
|
431
|
+
self._update_feature_table(
|
|
432
|
+
target_table=m["target_table"],
|
|
433
|
+
feature_name=m["feature_name"],
|
|
434
|
+
feature_file=p,
|
|
435
|
+
uploaded_files=asset_map,
|
|
436
|
+
)
|
|
408
437
|
|
|
409
438
|
self.update_status(Status.running, "Upload assets complete")
|
|
410
|
-
return
|
|
439
|
+
return asset_map
|
|
440
|
+
|
|
441
|
+
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
442
|
+
def download_asset(
|
|
443
|
+
self, asset_rid: RID, dest_dir: Path, update_catalog=True
|
|
444
|
+
) -> AssetFilePath:
|
|
445
|
+
"""Download an asset from a URL and place it in a local directory.
|
|
446
|
+
|
|
447
|
+
Args:
|
|
448
|
+
asset_rid: URL of the asset.
|
|
449
|
+
dest_dir: Destination directory for the asset.
|
|
450
|
+
update_catalog: Whether to update the catalog execution information after downloading.
|
|
451
|
+
|
|
452
|
+
Returns:
|
|
453
|
+
A tuple with the name of the asset table and a Path object to the downloaded asset.
|
|
454
|
+
"""
|
|
455
|
+
|
|
456
|
+
asset_table = self._ml_object.resolve_rid(asset_rid).table
|
|
457
|
+
if not self._model.is_asset(asset_table):
|
|
458
|
+
raise DerivaMLException(f"RID {asset_rid} is not for an asset table.")
|
|
459
|
+
|
|
460
|
+
asset_record = self._ml_object.retrieve_rid(asset_rid)
|
|
461
|
+
asset_metadata = {
|
|
462
|
+
k: v
|
|
463
|
+
for k, v in asset_record.items()
|
|
464
|
+
if k in self._model.asset_metadata(asset_table)
|
|
465
|
+
}
|
|
466
|
+
asset_url = asset_record["URL"]
|
|
467
|
+
asset_filename = dest_dir / asset_record["Filename"]
|
|
468
|
+
hs = HatracStore("https", self._ml_object.host_name, self._ml_object.credential)
|
|
469
|
+
hs.get_obj(path=asset_url, destfilename=asset_filename.as_posix())
|
|
470
|
+
|
|
471
|
+
asset_type_table = self._model.find_association(asset_table, MLVocab.asset_type)
|
|
472
|
+
type_path = self._ml_object.pathBuilder.schemas[
|
|
473
|
+
asset_type_table.schema.name
|
|
474
|
+
].tables[asset_type_table.name]
|
|
475
|
+
asset_types = [
|
|
476
|
+
asset_type[MLVocab.asset_type.value]
|
|
477
|
+
for asset_type in type_path.filter(
|
|
478
|
+
type_path.columns[asset_table.name] == asset_rid
|
|
479
|
+
)
|
|
480
|
+
.attributes(type_path.Asset_Type)
|
|
481
|
+
.fetch()
|
|
482
|
+
]
|
|
483
|
+
|
|
484
|
+
asset_path = AssetFilePath(
|
|
485
|
+
file_name=asset_filename,
|
|
486
|
+
asset_rid=asset_rid,
|
|
487
|
+
asset_path=asset_filename,
|
|
488
|
+
asset_metadata=asset_metadata,
|
|
489
|
+
asset_name=asset_table.name,
|
|
490
|
+
asset_types=asset_types,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
if update_catalog:
|
|
494
|
+
self._update_asset_execution_table(
|
|
495
|
+
{f"{asset_table.schema.name}/{asset_table.name}": [asset_path]},
|
|
496
|
+
asset_role="Input",
|
|
497
|
+
)
|
|
498
|
+
return asset_path
|
|
499
|
+
|
|
500
|
+
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
501
|
+
def upload_assets(
|
|
502
|
+
self,
|
|
503
|
+
assets_dir: str | Path,
|
|
504
|
+
) -> dict[Any, FileUploadState] | None:
|
|
505
|
+
"""Upload assets from a directory.
|
|
506
|
+
|
|
507
|
+
This routine assumes that the current upload specification includes a configuration for the specified directory.
|
|
508
|
+
Every asset in the specified directory is uploaded
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
assets_dir: Directory containing the assets to upload.
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
Results of the upload operation.
|
|
515
|
+
|
|
516
|
+
Raises:
|
|
517
|
+
DerivaMLException: If there is an issue uploading the assets.
|
|
518
|
+
"""
|
|
519
|
+
|
|
520
|
+
def path_to_asset(path: str) -> str:
|
|
521
|
+
"""Pull the asset name out of a path to that asset in the filesystem"""
|
|
522
|
+
components = path.split("/")
|
|
523
|
+
return components[
|
|
524
|
+
components.index("asset") + 2
|
|
525
|
+
] # Look for asset in the path to find the name
|
|
526
|
+
|
|
527
|
+
if not self._model.is_asset(Path(assets_dir).name):
|
|
528
|
+
raise DerivaMLException("Directory does not have name of an asset table.")
|
|
529
|
+
results = upload_directory(self._model, assets_dir)
|
|
530
|
+
return {path_to_asset(p): r for p, r in results.items()}
|
|
411
531
|
|
|
412
532
|
def upload_execution_outputs(
|
|
413
533
|
self, clean_folder: bool = True
|
|
414
|
-
) -> dict[str,
|
|
534
|
+
) -> dict[str, list[AssetFilePath]]:
|
|
415
535
|
"""Upload all the assets and metadata associated with the current execution.
|
|
416
536
|
|
|
417
537
|
This will include any new assets, features, or table values.
|
|
@@ -427,29 +547,16 @@ class Execution:
|
|
|
427
547
|
if self._dry_run:
|
|
428
548
|
return {}
|
|
429
549
|
try:
|
|
430
|
-
uploaded_assets = self._upload_execution_dirs()
|
|
550
|
+
self.uploaded_assets = self._upload_execution_dirs()
|
|
431
551
|
self.update_status(Status.completed, "Successfully end the execution.")
|
|
432
552
|
if clean_folder:
|
|
433
553
|
self._clean_folder_contents(self._execution_root)
|
|
434
|
-
return uploaded_assets
|
|
554
|
+
return self.uploaded_assets
|
|
435
555
|
except Exception as e:
|
|
436
556
|
error = format_exception(e)
|
|
437
557
|
self.update_status(Status.failed, error)
|
|
438
558
|
raise e
|
|
439
559
|
|
|
440
|
-
def _asset_dir(self) -> Path:
|
|
441
|
-
"""
|
|
442
|
-
|
|
443
|
-
Args:
|
|
444
|
-
|
|
445
|
-
Returns:
|
|
446
|
-
:return: PathLib path object to model directory.
|
|
447
|
-
|
|
448
|
-
"""
|
|
449
|
-
path = self._working_dir / self.execution_rid / "asset"
|
|
450
|
-
path.mkdir(parents=True, exist_ok=True)
|
|
451
|
-
return path
|
|
452
|
-
|
|
453
560
|
def _clean_folder_contents(self, folder_path: Path):
|
|
454
561
|
"""
|
|
455
562
|
|
|
@@ -472,7 +579,7 @@ class Execution:
|
|
|
472
579
|
target_table: str,
|
|
473
580
|
feature_name: str,
|
|
474
581
|
feature_file: str | Path,
|
|
475
|
-
uploaded_files: dict[str,
|
|
582
|
+
uploaded_files: dict[str, list[AssetFilePath]],
|
|
476
583
|
) -> None:
|
|
477
584
|
"""
|
|
478
585
|
|
|
@@ -480,121 +587,140 @@ class Execution:
|
|
|
480
587
|
target_table: str:
|
|
481
588
|
feature_name: str:
|
|
482
589
|
feature_file: str | Path:
|
|
483
|
-
uploaded_files:
|
|
590
|
+
uploaded_files: Dictionary whose key ia an asset name, file-name pair, and whose value is a filename, RID of that asset.
|
|
484
591
|
"""
|
|
485
592
|
|
|
593
|
+
# Get the column names of all the Feature columns that should be the RID of an asset
|
|
486
594
|
asset_columns = [
|
|
487
595
|
c.name
|
|
488
596
|
for c in self._ml_object.feature_record_class(
|
|
489
597
|
target_table, feature_name
|
|
490
598
|
).feature.asset_columns
|
|
491
599
|
]
|
|
600
|
+
|
|
601
|
+
# Get the names of the columns in the feature that are assets.
|
|
602
|
+
asset_columns = [
|
|
603
|
+
c.name
|
|
604
|
+
for c in self._ml_object.feature_record_class(
|
|
605
|
+
target_table, feature_name
|
|
606
|
+
).feature.asset_columns
|
|
607
|
+
]
|
|
608
|
+
|
|
492
609
|
feature_table = self._ml_object.feature_record_class(
|
|
493
610
|
target_table, feature_name
|
|
494
611
|
).feature.feature_table.name
|
|
612
|
+
asset_map = {
|
|
613
|
+
(asset_table, asset.file_name): asset.asset_rid
|
|
614
|
+
for asset_table, assets in uploaded_files.items()
|
|
615
|
+
for asset in assets
|
|
616
|
+
}
|
|
495
617
|
|
|
496
618
|
def map_path(e):
|
|
497
|
-
"""
|
|
498
|
-
|
|
499
|
-
Args:
|
|
500
|
-
e:
|
|
501
|
-
|
|
502
|
-
Returns:
|
|
503
|
-
|
|
504
|
-
"""
|
|
505
|
-
# Go through the asset columns and replace the file name with the RID for the uploaded file.
|
|
619
|
+
"""Go through the asset columns and replace the file name with the RID for the uploaded file."""
|
|
506
620
|
for c in asset_columns:
|
|
507
|
-
e[c] = asset_map[e[c]]
|
|
621
|
+
e[c] = asset_map[normalize_asset_dir(e[c])]
|
|
508
622
|
return e
|
|
509
623
|
|
|
510
|
-
#
|
|
511
|
-
asset_map = {
|
|
512
|
-
file: asset.result["RID"]
|
|
513
|
-
for file, asset in uploaded_files.items()
|
|
514
|
-
if asset.state == UploadState.success and asset.result
|
|
515
|
-
}
|
|
624
|
+
# Load the JSON file that has the set of records that contain the feature values.
|
|
516
625
|
with open(feature_file, "r") as feature_values:
|
|
517
|
-
entities = [
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
"""Upload execution metadata at _working_dir/Execution_metadata."""
|
|
522
|
-
ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
523
|
-
entities = [
|
|
524
|
-
{"Execution_Metadata": metadata_rid, "Execution": self.execution_rid}
|
|
525
|
-
for metadata_rid in assets
|
|
526
|
-
]
|
|
527
|
-
ml_schema_path.Execution_Metadata_Execution.insert(entities)
|
|
528
|
-
|
|
529
|
-
def _update_execution_asset_table(self, assets: list[RID]) -> None:
|
|
530
|
-
"""Assets associated with an execution must be linked to an execution entity after they are uploaded into
|
|
531
|
-
the catalog. This routine takes a list of uploaded assets and makes that association.
|
|
532
|
-
|
|
533
|
-
Args:
|
|
534
|
-
assets: list of RIDS for execution assets.:
|
|
535
|
-
"""
|
|
536
|
-
ml_schema_path = self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema]
|
|
537
|
-
entities = [
|
|
538
|
-
{"Execution_Asset": asset_rid, "Execution": self.execution_rid}
|
|
539
|
-
for asset_rid in assets
|
|
540
|
-
]
|
|
541
|
-
ml_schema_path.Execution_Asset_Execution.insert(entities)
|
|
542
|
-
|
|
543
|
-
@property
|
|
544
|
-
def _execution_metadata_dir(self) -> Path:
|
|
545
|
-
"""
|
|
546
|
-
|
|
547
|
-
Args:
|
|
548
|
-
|
|
549
|
-
Returns:
|
|
550
|
-
to the catalog by the execution_upload method in an execution object.
|
|
551
|
-
|
|
552
|
-
:return:
|
|
553
|
-
|
|
554
|
-
"""
|
|
555
|
-
return execution_metadata_dir(
|
|
556
|
-
self._working_dir, exec_rid=self.execution_rid, metadata_type=""
|
|
626
|
+
entities = [json.loads(line.strip()) for line in feature_values]
|
|
627
|
+
# Update the asset columns in the feature and add to the catalog.
|
|
628
|
+
self._ml_object.domain_path.tables[feature_table].insert(
|
|
629
|
+
[map_path(e) for e in entities]
|
|
557
630
|
)
|
|
558
631
|
|
|
559
|
-
def
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
632
|
+
def _update_asset_execution_table(
|
|
633
|
+
self,
|
|
634
|
+
uploaded_assets: dict[str, list[AssetFilePath]],
|
|
635
|
+
asset_role: str = "Output",
|
|
636
|
+
):
|
|
637
|
+
"""Add entry to association table connecting an asset to an execution RID
|
|
563
638
|
|
|
564
639
|
Args:
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
self._ml_object.lookup_term(
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
640
|
+
uploaded_assets: Dictionary whose key is the name of an asset table, and whose value is a list of RIDs for
|
|
641
|
+
newly added assets to that table.
|
|
642
|
+
asset_role: A term or list of terms from the Asset_Role vocabulary.
|
|
643
|
+
"""
|
|
644
|
+
# Make sure the asset role is in the controlled vocabulary table.
|
|
645
|
+
self._ml_object.lookup_term(MLVocab.asset_role, asset_role)
|
|
646
|
+
|
|
647
|
+
pb = self._ml_object.pathBuilder
|
|
648
|
+
for asset_table, asset_list in uploaded_assets.items():
|
|
649
|
+
asset_table_name = asset_table.split("/")[
|
|
650
|
+
1
|
|
651
|
+
] # Peel off the schema from the asset table
|
|
652
|
+
asset_exe = self._model.find_association(asset_table_name, "Execution")
|
|
653
|
+
asset_exe_path = pb.schemas[asset_exe.schema.name].tables[asset_exe.name]
|
|
654
|
+
asset_exe_path.insert(
|
|
655
|
+
[
|
|
656
|
+
{
|
|
657
|
+
asset_table_name: asset_path.asset_rid,
|
|
658
|
+
"Execution": self.execution_rid,
|
|
659
|
+
"Asset_Role": asset_role,
|
|
660
|
+
}
|
|
661
|
+
for asset_path in asset_list
|
|
662
|
+
]
|
|
663
|
+
)
|
|
580
664
|
|
|
581
|
-
|
|
665
|
+
# Now add in the type names via the asset_asset_type association table.
|
|
666
|
+
# Get the list of types for each file in the asset.
|
|
667
|
+
if asset_role == "Input":
|
|
668
|
+
return
|
|
669
|
+
asset_type_map = {}
|
|
670
|
+
with open(
|
|
671
|
+
asset_type_path(
|
|
672
|
+
self._working_dir,
|
|
673
|
+
self.execution_rid,
|
|
674
|
+
self._model.name_to_table(asset_table_name),
|
|
675
|
+
),
|
|
676
|
+
"r",
|
|
677
|
+
) as f:
|
|
678
|
+
for line in f:
|
|
679
|
+
asset_type_map.update(json.loads(line.strip()))
|
|
680
|
+
for asset_path in asset_list:
|
|
681
|
+
asset_path.asset_types = asset_type_map[asset_path.file_name]
|
|
682
|
+
|
|
683
|
+
asset_asset_type = self._model.find_association(
|
|
684
|
+
asset_table_name, "Asset_Type"
|
|
685
|
+
)
|
|
686
|
+
type_path = pb.schemas[asset_asset_type.schema.name].tables[
|
|
687
|
+
asset_asset_type.name
|
|
688
|
+
]
|
|
689
|
+
type_path.insert(
|
|
690
|
+
[
|
|
691
|
+
{asset_table_name: asset.asset_rid, "Asset_Type": t}
|
|
692
|
+
for asset in asset_list
|
|
693
|
+
for t in asset_type_map[asset.file_name]
|
|
694
|
+
]
|
|
695
|
+
)
|
|
582
696
|
|
|
583
|
-
|
|
584
|
-
|
|
697
|
+
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
|
|
698
|
+
def asset_file_path(
|
|
699
|
+
self,
|
|
700
|
+
asset_name: str,
|
|
701
|
+
file_name: str,
|
|
702
|
+
asset_types: Optional[list[str] | str] = None,
|
|
703
|
+
copy_file=False,
|
|
704
|
+
**kwargs,
|
|
705
|
+
) -> AssetFilePath:
|
|
706
|
+
"""Return a pathlib Path to the directory in which to place files for the specified execution_asset type.
|
|
585
707
|
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
)
|
|
708
|
+
Given the name of an asset table, and a file name, register the file for upload, and return a path to that
|
|
709
|
+
file in the upload directory. In addition to the filename, additioal asset metadata and file asset types may
|
|
710
|
+
be specified.
|
|
590
711
|
|
|
591
|
-
|
|
592
|
-
|
|
712
|
+
This routine has three modes, depending on if file_name refers to an existing file. If it doesn't, a path
|
|
713
|
+
to a new file with the specified name is returned. The caller can then open that file for writing.
|
|
593
714
|
|
|
594
|
-
|
|
715
|
+
If the provided filename refers to an existing file and the copy_file argument is False (the default), then the
|
|
716
|
+
returned path contains a symbolic link to that file. If the copy_file argument is True then the contents of
|
|
717
|
+
file_name are copied into the target directory.
|
|
595
718
|
|
|
596
719
|
Args:
|
|
597
|
-
|
|
720
|
+
asset_name: Type of asset to be uploaded. Must be a term in Asset_Type controlled vocabulary.
|
|
721
|
+
file_name: Name of file to be uploaded.
|
|
722
|
+
asset_types: Type of asset to be uploaded. Defaults to name of the asset.
|
|
723
|
+
**kwargs: Any additional metadata values that may be part of the asset table.
|
|
598
724
|
|
|
599
725
|
Returns:
|
|
600
726
|
Path in which to place asset files.
|
|
@@ -602,73 +728,46 @@ class Execution:
|
|
|
602
728
|
Raises:
|
|
603
729
|
DerivaException: If the asset type is not defined.
|
|
604
730
|
"""
|
|
605
|
-
self.
|
|
606
|
-
|
|
607
|
-
return execution_asset_dir(
|
|
608
|
-
self._working_dir, exec_rid=self.execution_rid, asset_type=asset_type
|
|
609
|
-
)
|
|
610
|
-
|
|
611
|
-
@property
|
|
612
|
-
def _execution_root(self) -> Path:
|
|
613
|
-
"""
|
|
614
|
-
|
|
615
|
-
Args:
|
|
616
|
-
|
|
617
|
-
Returns:
|
|
618
|
-
:return:
|
|
619
|
-
|
|
620
|
-
"""
|
|
621
|
-
return execution_root(self._working_dir, self.execution_rid)
|
|
622
|
-
|
|
623
|
-
@property
|
|
624
|
-
def _feature_root(self) -> Path:
|
|
625
|
-
"""The root path to all execution specific files.
|
|
626
|
-
:return:
|
|
627
|
-
|
|
628
|
-
Args:
|
|
629
|
-
|
|
630
|
-
Returns:
|
|
631
|
-
|
|
632
|
-
"""
|
|
633
|
-
return feature_root(self._working_dir, self.execution_rid)
|
|
634
|
-
|
|
635
|
-
def feature_paths(
|
|
636
|
-
self, table: Table | str, feature_name: str
|
|
637
|
-
) -> tuple[Path, dict[str, Path]]:
|
|
638
|
-
"""Return the file path of where to place feature values, and assets for the named feature and table.
|
|
731
|
+
if not self._model.is_asset(asset_name):
|
|
732
|
+
DerivaMLException(f"Table {asset_name} is not an asset")
|
|
639
733
|
|
|
640
|
-
|
|
641
|
-
will be created
|
|
734
|
+
asset_table = self._model.name_to_table(asset_name)
|
|
642
735
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
Returns:
|
|
648
|
-
A tuple whose first element is the path for the feature values and whose second element is a dictionary
|
|
649
|
-
of associated asset table names and corresponding paths.
|
|
650
|
-
"""
|
|
651
|
-
feature = self._ml_object.lookup_feature(table, feature_name)
|
|
736
|
+
asset_types = asset_types or kwargs.get("Asset_Type", None) or asset_name
|
|
737
|
+
asset_types = [asset_types] if isinstance(asset_types, str) else asset_types
|
|
738
|
+
for t in asset_types:
|
|
739
|
+
self._ml_object.lookup_term(MLVocab.asset_type, t)
|
|
652
740
|
|
|
653
|
-
|
|
741
|
+
file_name = Path(file_name)
|
|
742
|
+
asset_path = asset_file_path(
|
|
654
743
|
self._working_dir,
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
744
|
+
self.execution_rid,
|
|
745
|
+
self._model.name_to_table(asset_name),
|
|
746
|
+
file_name.name,
|
|
747
|
+
metadata=kwargs,
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
if file_name.exists():
|
|
751
|
+
if copy_file:
|
|
752
|
+
asset_path.write_bytes(file_name.read_bytes())
|
|
753
|
+
else:
|
|
754
|
+
asset_path.symlink_to(file_name)
|
|
755
|
+
|
|
756
|
+
# Persist the asset types into a file
|
|
757
|
+
with open(
|
|
758
|
+
asset_type_path(self._working_dir, self.execution_rid, asset_table),
|
|
759
|
+
"a",
|
|
760
|
+
encoding="utf-8",
|
|
761
|
+
) as f:
|
|
762
|
+
f.write(json.dumps({file_name.name: asset_types}) + "\n")
|
|
763
|
+
|
|
764
|
+
return AssetFilePath(
|
|
765
|
+
asset_path=asset_path,
|
|
766
|
+
asset_name=asset_name,
|
|
767
|
+
file_name=file_name.name,
|
|
768
|
+
asset_metadata=kwargs,
|
|
769
|
+
asset_types=asset_types,
|
|
659
770
|
)
|
|
660
|
-
asset_paths = {
|
|
661
|
-
asset_table.name: feature_asset_dir(
|
|
662
|
-
self._working_dir,
|
|
663
|
-
exec_rid=self.execution_rid,
|
|
664
|
-
schema=self._ml_object.domain_schema,
|
|
665
|
-
target_table=feature.target_table.name,
|
|
666
|
-
feature_name=feature_name,
|
|
667
|
-
asset_table=asset_table.name,
|
|
668
|
-
)
|
|
669
|
-
for asset_table in feature.asset_columns
|
|
670
|
-
}
|
|
671
|
-
return tpath, asset_paths
|
|
672
771
|
|
|
673
772
|
def table_path(self, table: str) -> Path:
|
|
674
773
|
"""Return a local file path to a CSV to add values to a table on upload.
|
|
@@ -679,10 +778,7 @@ class Execution:
|
|
|
679
778
|
Returns:
|
|
680
779
|
Pathlib path to the file in which to place table values.
|
|
681
780
|
"""
|
|
682
|
-
if
|
|
683
|
-
table
|
|
684
|
-
not in self._ml_object.model.schemas[self._ml_object.domain_schema].tables
|
|
685
|
-
):
|
|
781
|
+
if table not in self._model.schemas[self._ml_object.domain_schema].tables:
|
|
686
782
|
raise DerivaMLException(
|
|
687
783
|
"Table '{}' not found in domain schema".format(table)
|
|
688
784
|
)
|
|
@@ -693,10 +789,11 @@ class Execution:
|
|
|
693
789
|
|
|
694
790
|
def execute(self) -> Execution:
|
|
695
791
|
"""Initiate an execution with provided configuration. Can be used in a context manager."""
|
|
792
|
+
self.execution_start()
|
|
696
793
|
return self
|
|
697
794
|
|
|
698
795
|
@validate_call
|
|
699
|
-
def
|
|
796
|
+
def add_features(self, features: Iterable[FeatureRecord]) -> None:
|
|
700
797
|
"""Given a collection of Feature records, write out a CSV file in the appropriate assets directory so that this
|
|
701
798
|
feature gets uploaded when the execution is complete.
|
|
702
799
|
|
|
@@ -704,22 +801,28 @@ class Execution:
|
|
|
704
801
|
features: Iterable of Feature records to write.
|
|
705
802
|
"""
|
|
706
803
|
|
|
707
|
-
|
|
708
|
-
|
|
804
|
+
# Make sure feature list is homogeneous:
|
|
805
|
+
sorted_features = defaultdict(list)
|
|
806
|
+
for f in features:
|
|
807
|
+
sorted_features[type(f)].append(f)
|
|
808
|
+
for fs in sorted_features.values():
|
|
809
|
+
self._add_features(fs)
|
|
810
|
+
|
|
811
|
+
def _add_features(self, features: list[FeatureRecord]) -> None:
|
|
812
|
+
# Update feature records to include current execution_rid
|
|
813
|
+
first_row = features[0]
|
|
709
814
|
feature = first_row.feature
|
|
710
|
-
|
|
711
|
-
|
|
815
|
+
json_path = feature_value_path(
|
|
816
|
+
self._working_dir,
|
|
817
|
+
schema=self._ml_object.domain_schema,
|
|
818
|
+
target_table=feature.target_table.name,
|
|
819
|
+
feature_name=feature.feature_name,
|
|
820
|
+
exec_rid=self.execution_rid,
|
|
712
821
|
)
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
with open(csv_path, "w") as f:
|
|
718
|
-
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
719
|
-
writer.writeheader()
|
|
720
|
-
writer.writerow(first_row.model_dump())
|
|
721
|
-
for feature in feature_iter:
|
|
722
|
-
writer.writerow(feature.model_dump())
|
|
822
|
+
with open(json_path, "a", encoding="utf-8") as file:
|
|
823
|
+
for feature in features:
|
|
824
|
+
feature.Execution = self.execution_rid
|
|
825
|
+
file.write(json.dumps(feature.model_dump(mode="json")) + "\n")
|
|
723
826
|
|
|
724
827
|
@validate_call
|
|
725
828
|
def create_dataset(self, dataset_types: str | list[str], description: str) -> RID:
|
|
@@ -748,9 +851,13 @@ class Execution:
|
|
|
748
851
|
Add new elements to an existing dataset. In addition to adding new members, the minor version number of the
|
|
749
852
|
dataset is incremented and the description, if provide is applied to that new version.
|
|
750
853
|
|
|
854
|
+
The RIDs in the list to not have to be all from the same table, but they must be from a table that has
|
|
855
|
+
been configured to be a dataset element type.
|
|
856
|
+
|
|
751
857
|
Args:
|
|
752
858
|
dataset_rid: RID of dataset_table to extend or None if new dataset_table is to be created.
|
|
753
|
-
members: List of RIDs of members to add to the dataset_table.
|
|
859
|
+
members: List of RIDs of members to add to the dataset_table. RID must be to a table type that is a
|
|
860
|
+
dataset element type (see DerivaML.add_dataset_element_type).
|
|
754
861
|
validate: Check rid_list to make sure elements are not already in the dataset_table.
|
|
755
862
|
description: Markdown description of the updated dataset.
|
|
756
863
|
"""
|