deriva-ml 1.11.0__py3-none-any.whl → 1.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/database_model.py +3 -2
- deriva_ml/dataset.py +6 -15
- deriva_ml/dataset_bag.py +1 -1
- deriva_ml/deriva_ml_base.py +21 -16
- deriva_ml/deriva_model.py +8 -2
- deriva_ml/execution.py +44 -13
- deriva_ml/execution_configuration.py +4 -0
- deriva_ml/upload.py +7 -6
- {deriva_ml-1.11.0.dist-info → deriva_ml-1.12.1.dist-info}/METADATA +2 -1
- {deriva_ml-1.11.0.dist-info → deriva_ml-1.12.1.dist-info}/RECORD +14 -14
- {deriva_ml-1.11.0.dist-info → deriva_ml-1.12.1.dist-info}/WHEEL +0 -0
- {deriva_ml-1.11.0.dist-info → deriva_ml-1.12.1.dist-info}/entry_points.txt +0 -0
- {deriva_ml-1.11.0.dist-info → deriva_ml-1.12.1.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.11.0.dist-info → deriva_ml-1.12.1.dist-info}/top_level.txt +0 -0
deriva_ml/database_model.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an
|
|
1
|
+
"""Ths module contains the definition of the DatabaseModel class. The role of this class is to provide an interface between the BDBag representation
|
|
2
2
|
of a dataset and a sqllite database in which the contents of the bag are stored.
|
|
3
3
|
"""
|
|
4
4
|
|
|
@@ -51,7 +51,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
|
|
|
51
51
|
appear in more than one database. To help manage this, a global list of all the datasets that have been loaded
|
|
52
52
|
into DatabaseModels, is kept in the class variable `_rid_map`.
|
|
53
53
|
|
|
54
|
-
Because you can load
|
|
54
|
+
Because you can load different versions of a dataset simultaneously, the dataset RID and version number are tracked, and a new
|
|
55
55
|
sqllite instance is created for every new dataset version present.
|
|
56
56
|
|
|
57
57
|
Attributes:
|
|
@@ -290,6 +290,7 @@ class DatabaseModel(DerivaModel, metaclass=DatabaseModelMeta):
|
|
|
290
290
|
return DatasetBag(self, dataset_rid or self.dataset_rid)
|
|
291
291
|
|
|
292
292
|
def dataset_version(self, dataset_rid: Optional[RID] = None) -> DatasetVersion:
|
|
293
|
+
"""Return the version of the specified dataset."""
|
|
293
294
|
if dataset_rid and dataset_rid not in self.bag_rids:
|
|
294
295
|
DerivaMLException(f"Dataset RID {dataset_rid} is not in model.")
|
|
295
296
|
return self.bag_rids[dataset_rid]
|
deriva_ml/dataset.py
CHANGED
|
@@ -232,12 +232,10 @@ class Dataset:
|
|
|
232
232
|
"""Increment the version of the specified dataset_table.
|
|
233
233
|
|
|
234
234
|
Args:
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
description: Description of the version update of the dataset_table.
|
|
240
|
-
execution_rid: Which execution is performing increment.
|
|
235
|
+
dataset_rid: RID of the dataset whose version is to be incremented.
|
|
236
|
+
component: Which version of the dataset_table to increment. Major, Minor or Patch
|
|
237
|
+
description: Description of the version update of the dataset_table.
|
|
238
|
+
execution_rid: Which execution is performing increment.
|
|
241
239
|
|
|
242
240
|
Returns:
|
|
243
241
|
new semantic version of the dataset_table as a 3-tuple
|
|
@@ -275,9 +273,6 @@ class Dataset:
|
|
|
275
273
|
description: Description of the dataset_table.
|
|
276
274
|
execution_rid: Execution under which the dataset_table will be created.
|
|
277
275
|
version: Version of the dataset_table.
|
|
278
|
-
type: str | list[str]:
|
|
279
|
-
description: str:
|
|
280
|
-
|
|
281
276
|
|
|
282
277
|
Returns:
|
|
283
278
|
New dataset_table RID.
|
|
@@ -349,7 +344,6 @@ class Dataset:
|
|
|
349
344
|
Args:
|
|
350
345
|
dataset_rid: RID of the dataset_table to delete.
|
|
351
346
|
recurse: If True, delete the dataset_table along with any nested datasets. (Default value = False)
|
|
352
|
-
dataset_rid: RID:
|
|
353
347
|
"""
|
|
354
348
|
# Get association table entries for this dataset_table
|
|
355
349
|
# Delete association table entries
|
|
@@ -397,7 +391,7 @@ class Dataset:
|
|
|
397
391
|
filtered_path = dataset_path
|
|
398
392
|
else:
|
|
399
393
|
filtered_path = dataset_path.filter(
|
|
400
|
-
(dataset_path.Deleted == False) | (dataset_path.Deleted == None)
|
|
394
|
+
(dataset_path.Deleted == False) | (dataset_path.Deleted == None) # noqa: E712
|
|
401
395
|
)
|
|
402
396
|
|
|
403
397
|
# Get a list of all the dataset_type values associated with this dataset_table.
|
|
@@ -439,8 +433,7 @@ class Dataset:
|
|
|
439
433
|
routine makes it possible to add objects from the specified table to a dataset_table.
|
|
440
434
|
|
|
441
435
|
Args:
|
|
442
|
-
element: Name
|
|
443
|
-
element: str | Table:
|
|
436
|
+
element: Name of the table or table object that is to be added to the dataset_table.
|
|
444
437
|
|
|
445
438
|
Returns:
|
|
446
439
|
The table object that was added to the dataset_table.
|
|
@@ -464,7 +457,6 @@ class Dataset:
|
|
|
464
457
|
|
|
465
458
|
Args:
|
|
466
459
|
dataset_rid: param recurse: If this is a nested dataset_table, list the members of the contained datasets
|
|
467
|
-
dataset_rid: RID:
|
|
468
460
|
recurse: (Default value = False)
|
|
469
461
|
limit: If provided, the maximum number of members to return for each element type.
|
|
470
462
|
|
|
@@ -677,7 +669,6 @@ class Dataset:
|
|
|
677
669
|
|
|
678
670
|
Args:
|
|
679
671
|
dataset_rid: return: RID of the parent dataset_table.
|
|
680
|
-
dataset_rid: RID:
|
|
681
672
|
|
|
682
673
|
Returns:
|
|
683
674
|
RID of the parent dataset_table.
|
deriva_ml/dataset_bag.py
CHANGED
|
@@ -168,7 +168,7 @@ class DatasetBag:
|
|
|
168
168
|
yield dict(zip(col_names, row))
|
|
169
169
|
|
|
170
170
|
@validate_call
|
|
171
|
-
def list_dataset_members(self, recurse: bool = False) -> dict[str, dict[str,
|
|
171
|
+
def list_dataset_members(self, recurse: bool = False) -> dict[str, dict[str, list]]:
|
|
172
172
|
"""Return a list of entities associated with a specific _dataset_table.
|
|
173
173
|
|
|
174
174
|
Args:
|
deriva_ml/deriva_ml_base.py
CHANGED
|
@@ -193,11 +193,7 @@ class DerivaML(Dataset):
|
|
|
193
193
|
pass
|
|
194
194
|
|
|
195
195
|
def _check_nbstrip_status(self) -> None:
|
|
196
|
-
"""
|
|
197
|
-
|
|
198
|
-
Returns:
|
|
199
|
-
A Path to the notebook file that is currently being executed.
|
|
200
|
-
"""
|
|
196
|
+
"""Check to see if nbstrip is installed"""
|
|
201
197
|
try:
|
|
202
198
|
if subprocess.run(
|
|
203
199
|
["nbstripout", "--is-installed"],
|
|
@@ -265,10 +261,13 @@ class DerivaML(Dataset):
|
|
|
265
261
|
is_notebook = True
|
|
266
262
|
else:
|
|
267
263
|
stack = inspect.stack()
|
|
264
|
+
# Get the caller's filename, which is two up the stack from here.
|
|
268
265
|
if len(stack) > 1:
|
|
269
|
-
filename = Path(
|
|
270
|
-
|
|
271
|
-
|
|
266
|
+
filename = Path(stack[2].filename)
|
|
267
|
+
if not filename.exists():
|
|
268
|
+
# Begin called from command line interpreter.
|
|
269
|
+
filename = "REPL"
|
|
270
|
+
# Get the caller's filename, which is two up the stack from here.
|
|
272
271
|
else:
|
|
273
272
|
raise DerivaMLException(
|
|
274
273
|
"Looking for caller failed"
|
|
@@ -326,7 +325,6 @@ class DerivaML(Dataset):
|
|
|
326
325
|
"""Return a local file path in which to place a CSV to add values to a table on upload.
|
|
327
326
|
|
|
328
327
|
Args:
|
|
329
|
-
table: return:
|
|
330
328
|
table: str | Table:
|
|
331
329
|
|
|
332
330
|
Returns:
|
|
@@ -1143,13 +1141,17 @@ class DerivaML(Dataset):
|
|
|
1143
1141
|
if self._is_notebook
|
|
1144
1142
|
else f"git hash-object {self.executable_path}"
|
|
1145
1143
|
)
|
|
1146
|
-
checksum =
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1144
|
+
checksum = (
|
|
1145
|
+
subprocess.run(
|
|
1146
|
+
cmd,
|
|
1147
|
+
capture_output=True,
|
|
1148
|
+
text=True,
|
|
1149
|
+
check=False,
|
|
1150
|
+
shell=True,
|
|
1151
|
+
).stdout.strip()
|
|
1152
|
+
if self.executable_path != "REPL"
|
|
1153
|
+
else "1"
|
|
1154
|
+
)
|
|
1153
1155
|
|
|
1154
1156
|
return Workflow(
|
|
1155
1157
|
name=name,
|
|
@@ -1172,6 +1174,8 @@ class DerivaML(Dataset):
|
|
|
1172
1174
|
"""
|
|
1173
1175
|
|
|
1174
1176
|
# Get repo URL from local gitHub repo.
|
|
1177
|
+
if self.executable_path == "REPL":
|
|
1178
|
+
return "REPL", True
|
|
1175
1179
|
try:
|
|
1176
1180
|
result = subprocess.run(
|
|
1177
1181
|
["git", "remote", "get-url", "origin"],
|
|
@@ -1240,6 +1244,7 @@ class DerivaML(Dataset):
|
|
|
1240
1244
|
# @validate_call
|
|
1241
1245
|
def restore_execution(self, execution_rid: Optional[RID] = None) -> "Execution":
|
|
1242
1246
|
"""Return an Execution object for a previously started execution with the specified RID."""
|
|
1247
|
+
|
|
1243
1248
|
from .execution import Execution
|
|
1244
1249
|
|
|
1245
1250
|
# Find path to execution
|
deriva_ml/deriva_model.py
CHANGED
|
@@ -27,6 +27,8 @@ from typing import Iterable, Optional
|
|
|
27
27
|
class DerivaModel:
|
|
28
28
|
"""Augmented interface to deriva model class.
|
|
29
29
|
|
|
30
|
+
This class provides a number of DerivaML specific methods that augment the interface in the deriva model class.
|
|
31
|
+
|
|
30
32
|
Attributes:
|
|
31
33
|
domain_schema: Schema name for domain specific tables and relationships.
|
|
32
34
|
model: ERMRest model for the catalog.
|
|
@@ -71,6 +73,10 @@ class DerivaModel:
|
|
|
71
73
|
# No domain schema defined.
|
|
72
74
|
self.domain_schema = domain_schema
|
|
73
75
|
|
|
76
|
+
def __getattr__(self, name):
|
|
77
|
+
# Called only if `name` is not found in Manager. Delegate attributes to model class.
|
|
78
|
+
return getattr(self.model, name)
|
|
79
|
+
|
|
74
80
|
def name_to_table(self, table: str | Table) -> Table:
|
|
75
81
|
"""Return the table object corresponding to the given table name.
|
|
76
82
|
|
|
@@ -129,7 +135,7 @@ class DerivaModel:
|
|
|
129
135
|
def find_association(self, table1: Table | str, table2: Table | str) -> Table:
|
|
130
136
|
"""Given two tables, return an association table that connects the two.
|
|
131
137
|
|
|
132
|
-
Raises
|
|
138
|
+
Raises:
|
|
133
139
|
DerivaML exception if there is either not an association table or more than one association table.
|
|
134
140
|
"""
|
|
135
141
|
table1 = self.name_to_table(table1)
|
|
@@ -138,7 +144,7 @@ class DerivaModel:
|
|
|
138
144
|
tables = [
|
|
139
145
|
a.table
|
|
140
146
|
for a in table1.find_associations(pure=False)
|
|
141
|
-
if
|
|
147
|
+
if a.other_fkeys.pop().pk_table == table2
|
|
142
148
|
]
|
|
143
149
|
if len(tables) == 1:
|
|
144
150
|
return tables[0]
|
deriva_ml/execution.py
CHANGED
|
@@ -66,7 +66,6 @@ class AssetFilePath(type(Path())):
|
|
|
66
66
|
asset_rid: The RID of the asset if it has been uploaded into an asset table
|
|
67
67
|
"""
|
|
68
68
|
|
|
69
|
-
|
|
70
69
|
def __new__(
|
|
71
70
|
cls,
|
|
72
71
|
asset_path,
|
|
@@ -76,6 +75,17 @@ class AssetFilePath(type(Path())):
|
|
|
76
75
|
asset_types: list[str] | str,
|
|
77
76
|
asset_rid: Optional[RID] = None,
|
|
78
77
|
):
|
|
78
|
+
"""
|
|
79
|
+
Create a new Path object that has additional information related to the use of this path as an asset.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
asset_path: Local path to the location of the asset.
|
|
83
|
+
asset_name: The name of the asset in the catalog (e.g. the asset table name).
|
|
84
|
+
file_name: Name of the local file that contains the contents of the asset.
|
|
85
|
+
asset_metadata: Any additional columns associated with this asset beyond the URL, Length, and checksum.
|
|
86
|
+
asset_types: A list of terms from the Asset_Type controlled vocabulary.
|
|
87
|
+
asset_rid: The RID of the asset if it has been uploaded into an asset table
|
|
88
|
+
"""
|
|
79
89
|
obj = super().__new__(cls, asset_path)
|
|
80
90
|
obj.asset_types = (
|
|
81
91
|
asset_types if isinstance(asset_types, list) else [asset_types]
|
|
@@ -133,7 +143,7 @@ class Execution:
|
|
|
133
143
|
ml_object: The DerivaML instance that created the execution.
|
|
134
144
|
reload: RID of previously initialized execution object.
|
|
135
145
|
"""
|
|
136
|
-
self.asset_paths: list[
|
|
146
|
+
self.asset_paths: list[AssetFilePath] = []
|
|
137
147
|
self.configuration = configuration
|
|
138
148
|
self._ml_object = ml_object
|
|
139
149
|
self._model = ml_object.model
|
|
@@ -141,7 +151,7 @@ class Execution:
|
|
|
141
151
|
self.start_time = None
|
|
142
152
|
self.stop_time = None
|
|
143
153
|
self.status = Status.created
|
|
144
|
-
self.uploaded_assets: list[
|
|
154
|
+
self.uploaded_assets: Optional[dict[str, list[AssetFilePath]]] = None
|
|
145
155
|
self.configuration.argv = sys.argv
|
|
146
156
|
|
|
147
157
|
self.dataset_rids: list[RID] = []
|
|
@@ -152,6 +162,7 @@ class Execution:
|
|
|
152
162
|
self._cache_dir = self._ml_object.cache_dir
|
|
153
163
|
self._dry_run = dry_run
|
|
154
164
|
|
|
165
|
+
# Make sure we have a good workflow.
|
|
155
166
|
if isinstance(self.configuration.workflow, Workflow):
|
|
156
167
|
self.workflow_rid = (
|
|
157
168
|
self._ml_object.add_workflow(self.configuration.workflow)
|
|
@@ -168,6 +179,7 @@ class Execution:
|
|
|
168
179
|
"Workflow specified in execution configuration is not a Workflow"
|
|
169
180
|
)
|
|
170
181
|
|
|
182
|
+
# Validate the datasets and assets to be valid.
|
|
171
183
|
for d in self.configuration.datasets:
|
|
172
184
|
if self._ml_object.resolve_rid(d.rid).table.name != "Dataset":
|
|
173
185
|
raise DerivaMLException(
|
|
@@ -265,7 +277,7 @@ class Execution:
|
|
|
265
277
|
file_name="configuration.json",
|
|
266
278
|
asset_types=ExecMetadataVocab.execution_config.value,
|
|
267
279
|
)
|
|
268
|
-
with open(cfile, "w", encoding="utf-8") as config_file:
|
|
280
|
+
with open(cfile.as_posix(), "w", encoding="utf-8") as config_file:
|
|
269
281
|
json.dump(self.configuration.model_dump(), config_file)
|
|
270
282
|
|
|
271
283
|
# save runtime env
|
|
@@ -387,7 +399,7 @@ class Execution:
|
|
|
387
399
|
try:
|
|
388
400
|
self.update_status(Status.running, "Uploading execution files...")
|
|
389
401
|
results = upload_directory(self._model, self._asset_root)
|
|
390
|
-
except
|
|
402
|
+
except RuntimeError as e:
|
|
391
403
|
error = format_exception(e)
|
|
392
404
|
self.update_status(Status.failed, error)
|
|
393
405
|
raise DerivaMLException(f"Fail to upload execution_assets. Error: {error}")
|
|
@@ -519,7 +531,7 @@ class Execution:
|
|
|
519
531
|
|
|
520
532
|
def upload_execution_outputs(
|
|
521
533
|
self, clean_folder: bool = True
|
|
522
|
-
) -> dict[str, AssetFilePath]:
|
|
534
|
+
) -> dict[str, list[AssetFilePath]]:
|
|
523
535
|
"""Upload all the assets and metadata associated with the current execution.
|
|
524
536
|
|
|
525
537
|
This will include any new assets, features, or table values.
|
|
@@ -535,11 +547,11 @@ class Execution:
|
|
|
535
547
|
if self._dry_run:
|
|
536
548
|
return {}
|
|
537
549
|
try:
|
|
538
|
-
uploaded_assets = self._upload_execution_dirs()
|
|
550
|
+
self.uploaded_assets = self._upload_execution_dirs()
|
|
539
551
|
self.update_status(Status.completed, "Successfully end the execution.")
|
|
540
552
|
if clean_folder:
|
|
541
553
|
self._clean_folder_contents(self._execution_root)
|
|
542
|
-
return uploaded_assets
|
|
554
|
+
return self.uploaded_assets
|
|
543
555
|
except Exception as e:
|
|
544
556
|
error = format_exception(e)
|
|
545
557
|
self.update_status(Status.failed, error)
|
|
@@ -639,6 +651,7 @@ class Execution:
|
|
|
639
651
|
] # Peel off the schema from the asset table
|
|
640
652
|
asset_exe = self._model.find_association(asset_table_name, "Execution")
|
|
641
653
|
asset_exe_path = pb.schemas[asset_exe.schema.name].tables[asset_exe.name]
|
|
654
|
+
|
|
642
655
|
asset_exe_path.insert(
|
|
643
656
|
[
|
|
644
657
|
{
|
|
@@ -688,16 +701,26 @@ class Execution:
|
|
|
688
701
|
asset_name: str,
|
|
689
702
|
file_name: str,
|
|
690
703
|
asset_types: Optional[list[str] | str] = None,
|
|
704
|
+
copy_file=False,
|
|
691
705
|
**kwargs,
|
|
692
706
|
) -> AssetFilePath:
|
|
693
707
|
"""Return a pathlib Path to the directory in which to place files for the specified execution_asset type.
|
|
694
708
|
|
|
695
|
-
|
|
709
|
+
Given the name of an asset table, and a file name, register the file for upload, and return a path to that
|
|
710
|
+
file in the upload directory. In addition to the filename, additional asset metadata and file asset types may
|
|
711
|
+
be specified.
|
|
712
|
+
|
|
713
|
+
This routine has three modes, depending on if file_name refers to an existing file. If it doesn't, a path
|
|
714
|
+
to a new file with the specified name is returned. The caller can then open that file for writing.
|
|
715
|
+
|
|
716
|
+
If the provided filename refers to an existing file and the copy_file argument is False (the default), then the
|
|
717
|
+
returned path contains a symbolic link to that file. If the copy_file argument is True then the contents of
|
|
718
|
+
file_name are copied into the target directory.
|
|
696
719
|
|
|
697
720
|
Args:
|
|
698
721
|
asset_name: Type of asset to be uploaded. Must be a term in Asset_Type controlled vocabulary.
|
|
699
|
-
asset_types: Type of asset to be uploaded. Defaults to name of the asset.
|
|
700
722
|
file_name: Name of file to be uploaded.
|
|
723
|
+
asset_types: Type of asset to be uploaded. Defaults to name of the asset.
|
|
701
724
|
**kwargs: Any additional metadata values that may be part of the asset table.
|
|
702
725
|
|
|
703
726
|
Returns:
|
|
@@ -716,26 +739,33 @@ class Execution:
|
|
|
716
739
|
for t in asset_types:
|
|
717
740
|
self._ml_object.lookup_term(MLVocab.asset_type, t)
|
|
718
741
|
|
|
742
|
+
file_name = Path(file_name)
|
|
719
743
|
asset_path = asset_file_path(
|
|
720
744
|
self._working_dir,
|
|
721
745
|
self.execution_rid,
|
|
722
746
|
self._model.name_to_table(asset_name),
|
|
723
|
-
file_name,
|
|
747
|
+
file_name.name,
|
|
724
748
|
metadata=kwargs,
|
|
725
749
|
)
|
|
726
750
|
|
|
751
|
+
if file_name.exists():
|
|
752
|
+
if copy_file:
|
|
753
|
+
asset_path.write_bytes(file_name.read_bytes())
|
|
754
|
+
else:
|
|
755
|
+
asset_path.symlink_to(file_name)
|
|
756
|
+
|
|
727
757
|
# Persist the asset types into a file
|
|
728
758
|
with open(
|
|
729
759
|
asset_type_path(self._working_dir, self.execution_rid, asset_table),
|
|
730
760
|
"a",
|
|
731
761
|
encoding="utf-8",
|
|
732
762
|
) as f:
|
|
733
|
-
f.write(json.dumps({file_name: asset_types}) + "\n")
|
|
763
|
+
f.write(json.dumps({file_name.name: asset_types}) + "\n")
|
|
734
764
|
|
|
735
765
|
return AssetFilePath(
|
|
736
766
|
asset_path=asset_path,
|
|
737
767
|
asset_name=asset_name,
|
|
738
|
-
file_name=file_name,
|
|
768
|
+
file_name=file_name.name,
|
|
739
769
|
asset_metadata=kwargs,
|
|
740
770
|
asset_types=asset_types,
|
|
741
771
|
)
|
|
@@ -760,6 +790,7 @@ class Execution:
|
|
|
760
790
|
|
|
761
791
|
def execute(self) -> Execution:
|
|
762
792
|
"""Initiate an execution with provided configuration. Can be used in a context manager."""
|
|
793
|
+
self.execution_start()
|
|
763
794
|
return self
|
|
764
795
|
|
|
765
796
|
@validate_call
|
deriva_ml/upload.py
CHANGED
|
@@ -212,14 +212,15 @@ def asset_table_upload_spec(model: DerivaModel, asset_table: str | Table):
|
|
|
212
212
|
}
|
|
213
213
|
| {c: f"{{{c}}}" for c in metadata_columns},
|
|
214
214
|
"file_pattern": asset_path, # Sets schema, asset_table, file_name, file_ext
|
|
215
|
+
"asset_type": "file",
|
|
215
216
|
"target_table": [schema, asset_table.name],
|
|
216
217
|
"checksum_types": ["sha256", "md5"],
|
|
217
218
|
"hatrac_options": {"versioned_urls": True},
|
|
218
219
|
"hatrac_templates": {
|
|
219
|
-
"hatrac_uri": f"/hatrac/{asset_table.name}/{{md5}}.{{file_name}}",
|
|
220
|
+
"hatrac_uri": f"/hatrac/{asset_table.name}/{{md5}}.{{file_name}}.{{file_ext}}",
|
|
220
221
|
"content-disposition": "filename*=UTF-8''{file_name}.{file_ext}",
|
|
221
222
|
},
|
|
222
|
-
"record_query_template": "/entity/{target_table}/MD5={{md5}}&Filename={
|
|
223
|
+
"record_query_template": "/entity/{target_table}/MD5={{md5}}&Filename={file_name}.{file_ext}",
|
|
223
224
|
}
|
|
224
225
|
|
|
225
226
|
|
|
@@ -244,7 +245,7 @@ def bulk_upload_configuration(model: DerivaModel) -> dict[str, Any]:
|
|
|
244
245
|
"Length": "{file_size}",
|
|
245
246
|
"Filename": "{file_name}",
|
|
246
247
|
},
|
|
247
|
-
"asset_type": "
|
|
248
|
+
"asset_type": "file",
|
|
248
249
|
"target_table": ["{schema}", "{asset_table}"],
|
|
249
250
|
"file_pattern": asset_path_regex
|
|
250
251
|
+ "/"
|
|
@@ -252,10 +253,10 @@ def bulk_upload_configuration(model: DerivaModel) -> dict[str, Any]:
|
|
|
252
253
|
"checksum_types": ["sha256", "md5"],
|
|
253
254
|
"hatrac_options": {"versioned_urls": True},
|
|
254
255
|
"hatrac_templates": {
|
|
255
|
-
"hatrac_uri": "/hatrac/{asset_table}/{md5}.{file_name}",
|
|
256
|
+
"hatrac_uri": "/hatrac/{asset_table}/{md5}.{file_name}.{file_ext}",
|
|
256
257
|
"content-disposition": "filename*=UTF-8''{file_name}.{file_ext}",
|
|
257
258
|
},
|
|
258
|
-
"record_query_template": "/entity/{target_table}/MD5={
|
|
259
|
+
"record_query_template": "/entity/{target_table}/MD5={md5}&Filename={file_name}.{file_ext}",
|
|
259
260
|
},
|
|
260
261
|
# {
|
|
261
262
|
# Upload the records into a table
|
|
@@ -448,7 +449,7 @@ def asset_type_path(prefix: Path | str, exec_rid: RID, asset_table: Table) -> Pa
|
|
|
448
449
|
asset_table: Table in which to place assets.
|
|
449
450
|
|
|
450
451
|
Returns:
|
|
451
|
-
Path to the file in which to place asset_type values for the named asset
|
|
452
|
+
Path to the file in which to place asset_type values for the named asset.
|
|
452
453
|
"""
|
|
453
454
|
path = (
|
|
454
455
|
execution_root(prefix, exec_rid=exec_rid)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.12.1
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -28,3 +28,4 @@ The script release.sh will create a new release tag in GitHub. This script requ
|
|
|
28
28
|
GitHUB CLI be installed.
|
|
29
29
|
|
|
30
30
|
See [https://cli.github.com](https://cli.github.com) for instructions on how to install and configure the CLI.
|
|
31
|
+
|
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
deriva_ml/__init__.py,sha256=r1Z9N5vtZkAET7emqhpAx2bf_xJUp5wHOc4_DIplsG8,1082
|
|
2
|
-
deriva_ml/database_model.py,sha256=
|
|
3
|
-
deriva_ml/dataset.py,sha256=
|
|
2
|
+
deriva_ml/database_model.py,sha256=lMbAEqn4n0m7h_JstMX_LX9gbvBIEydG3sRilPn3eLU,14885
|
|
3
|
+
deriva_ml/dataset.py,sha256=oBg4j8loAZA2ccP38fTryeWEolsZ1PStYUOGMlpjE0w,60592
|
|
4
4
|
deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
|
|
5
|
-
deriva_ml/dataset_bag.py,sha256=
|
|
5
|
+
deriva_ml/dataset_bag.py,sha256=yS8oYVshfFtRDyhGPRqtbvxjyd3ZFF29lrB783OP4vM,11849
|
|
6
6
|
deriva_ml/demo_catalog.py,sha256=9Qo3JD4bUIwnL3ngPctc2QBeWApvMR_5UyaK9ockTrY,11536
|
|
7
7
|
deriva_ml/deriva_definitions.py,sha256=2eSbTFQ-9rpctphN4PLo8WdtkzMfhfZr3vJeywt6xPM,8897
|
|
8
|
-
deriva_ml/deriva_ml_base.py,sha256=
|
|
9
|
-
deriva_ml/deriva_model.py,sha256=
|
|
10
|
-
deriva_ml/execution.py,sha256=
|
|
11
|
-
deriva_ml/execution_configuration.py,sha256=
|
|
8
|
+
deriva_ml/deriva_ml_base.py,sha256=rrImShp1RXvMuXVLft5GfTnxf_PfF1LONHgV1Ee_E9I,46517
|
|
9
|
+
deriva_ml/deriva_model.py,sha256=wytGCAHutiUaRfnRKr80Ks_P6ci0_wXRU3vq3lthfYU,13260
|
|
10
|
+
deriva_ml/execution.py,sha256=SggLMAfQevnkGyaixF6dRwn36qHO5s07wkLxQXmNCag,36020
|
|
11
|
+
deriva_ml/execution_configuration.py,sha256=XQeXzPz9Gh_AGa_iYW8zF95niwHed3ojv4gnibB0thA,4082
|
|
12
12
|
deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
|
|
13
13
|
deriva_ml/feature.py,sha256=07g0uSrhumdopJluWuWSRMrzagaikAOihqB09bzXBP4,5475
|
|
14
14
|
deriva_ml/history.py,sha256=qTDLDs8Ow_6r7mDO0gZm0Fg81SWKOAgtCU5pzZoDRgM,2828
|
|
15
15
|
deriva_ml/test_functions.py,sha256=-eqLHjjCQCLBNAr1ofbZekNiCOfMISSACRxT_YHER8I,4396
|
|
16
|
-
deriva_ml/upload.py,sha256=
|
|
16
|
+
deriva_ml/upload.py,sha256=Df-xyU8i0wEe-avRf3I_HMCPMatLU6ID1vZzfxCF_ko,16211
|
|
17
17
|
deriva_ml/schema_setup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
deriva_ml/schema_setup/annotations.py,sha256=v0gTpmWYxRqsQ-bcnQzsr8WowGv2pi9pZUsO3WWnu1U,9528
|
|
19
19
|
deriva_ml/schema_setup/create_schema.py,sha256=hNMc-v5tferd0UjfdB6nBw7Rc_o-Mg6NkPqQGie9YOw,11700
|
|
20
20
|
deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
|
|
21
21
|
deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
|
|
22
|
-
deriva_ml-1.
|
|
23
|
-
deriva_ml-1.
|
|
24
|
-
deriva_ml-1.
|
|
25
|
-
deriva_ml-1.
|
|
26
|
-
deriva_ml-1.
|
|
27
|
-
deriva_ml-1.
|
|
22
|
+
deriva_ml-1.12.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
23
|
+
deriva_ml-1.12.1.dist-info/METADATA,sha256=HpaB7Rs3HCV_iFBzl9TTBrmI6BOvomv_FGuO6Rx7k8c,974
|
|
24
|
+
deriva_ml-1.12.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
25
|
+
deriva_ml-1.12.1.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
|
|
26
|
+
deriva_ml-1.12.1.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
|
|
27
|
+
deriva_ml-1.12.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|