deriva-ml 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deriva_ml/VERSION.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.8.2"
1
+ __version__ = "1.8.4"
deriva_ml/dataset.py CHANGED
@@ -6,6 +6,7 @@ accessible via a DerivaML class instance.
6
6
 
7
7
  """
8
8
 
9
+ from __future__ import annotations
9
10
  from bdbag.fetch.fetcher import fetch_single_file
10
11
  from bdbag import bdbag_api as bdb
11
12
  from collections import defaultdict
@@ -37,7 +38,7 @@ from pydantic import (
37
38
  import requests
38
39
 
39
40
  from tempfile import TemporaryDirectory, NamedTemporaryFile
40
- from typing import Any, Callable, Optional, Iterable, Iterator
41
+ from typing import Any, Callable, Optional, Iterable, Iterator, TYPE_CHECKING
41
42
 
42
43
  from deriva_ml import DatasetBag
43
44
  from .deriva_definitions import ML_SCHEMA, DerivaMLException, MLVocab, Status, RID
@@ -52,6 +53,9 @@ from .dataset_aux_classes import (
52
53
  DatasetSpec,
53
54
  )
54
55
 
56
+ if TYPE_CHECKING:
57
+ from .deriva_ml_base import DerivaML
58
+
55
59
 
56
60
  class Dataset:
57
61
  """
@@ -83,29 +87,32 @@ class Dataset:
83
87
  else:
84
88
  return not list(rid_info.datapath.entities().fetch())[0]["Deleted"]
85
89
 
86
- def _insert_dataset_version(
90
+ def _insert_dataset_versions(
87
91
  self,
88
- dataset_rid: RID,
89
- dataset_version: DatasetVersion,
92
+ dataset_list: list[DatasetSpec],
90
93
  description: Optional[str] = "",
91
94
  execution_rid: Optional[RID] = None,
92
95
  ) -> RID:
93
96
  schema_path = self._model.catalog.getPathBuilder().schemas[self._ml_schema]
94
- version_path = schema_path.tables["Dataset_Version"]
95
- version_rid = version_path.insert(
96
- [
97
- {
98
- "Dataset": dataset_rid,
99
- "Version": str(dataset_version),
100
- "Description": description,
101
- "Execution": execution_rid,
102
- }
103
- ]
104
- )[0]["RID"]
105
- schema_path.tables["Dataset"].update(
106
- [{"RID": dataset_rid, "Version": version_rid}]
107
- )
108
- return version_rid
97
+
98
+ # Construct version records for insert
99
+ version_records = [
100
+ {
101
+ "Dataset": dataset.rid,
102
+ "Version": str(dataset.version),
103
+ "Description": description,
104
+ "Execution": execution_rid,
105
+ }
106
+ for dataset in dataset_list
107
+ ]
108
+
109
+ # Insert version records and construct entities for updating the dataset version column.
110
+ version_rids = [
111
+ {"Version": v["RID"], "RID": v["Dataset"]}
112
+ for v in schema_path.tables["Dataset_Version"].insert(version_records)
113
+ ]
114
+ schema_path.tables["Dataset"].update(version_rids)
115
+ return version_rids
109
116
 
110
117
  def _bootstrap_versions(self):
111
118
  datasets = [ds["RID"] for ds in self.find_datasets()]
@@ -237,16 +244,20 @@ class Dataset:
237
244
  Raises:
238
245
  DerivaMLException: if provided RID is not to a dataset_table.
239
246
  """
240
- for dataset in self._build_dataset_graph(dataset_rid=dataset_rid):
241
- version = self.dataset_version(dataset)
242
- new_version = version.increment_version(component)
243
- self._insert_dataset_version(
244
- dataset,
245
- new_version,
246
- description=description,
247
- execution_rid=execution_rid,
247
+
248
+ # Find all of the datasets that are reachable from this dataset and determine their new version numbers.
249
+ related_datasets = list(self._build_dataset_graph(dataset_rid=dataset_rid))
250
+ version_update_list = [
251
+ DatasetSpec(
252
+ rid=ds_rid,
253
+ version=self.dataset_version(ds_rid).increment_version(component),
248
254
  )
249
- return self.dataset_version(dataset_rid)
255
+ for ds_rid in related_datasets
256
+ ]
257
+ updated_versions = self._insert_dataset_versions(
258
+ version_update_list, description=description, execution_rid=execution_rid
259
+ )
260
+ return [d.version for d in version_update_list if d.rid == dataset_rid][0]
250
261
 
251
262
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
252
263
  def create_dataset(
@@ -323,9 +334,8 @@ class Dataset:
323
334
  pb.schemas[self._ml_schema].Dataset_Execution.insert(
324
335
  [{"Dataset": dataset_rid, "Execution": execution_rid}]
325
336
  )
326
- self._insert_dataset_version(
327
- dataset_rid,
328
- dataset_version=version,
337
+ self._insert_dataset_versions(
338
+ [DatasetSpec(rid=dataset_rid, version=version)],
329
339
  execution_rid=execution_rid,
330
340
  description="Initial dataset creation.",
331
341
  )
@@ -455,7 +465,7 @@ class Dataset:
455
465
  dataset_rid: param recurse: If this is a nested dataset_table, list the members of the contained datasets
456
466
  dataset_rid: RID:
457
467
  recurse: (Default value = False)
458
- limit: If provided, the maxiumum number of members to return for each element type.
468
+ limit: If provided, the maximum number of members to return for each element type.
459
469
 
460
470
  Returns:
461
471
  Dictionary of entities associated with a specific dataset_table. Key is the table from which the elements
@@ -697,11 +707,25 @@ class Dataset:
697
707
  list of RIDs of nested datasets.
698
708
 
699
709
  """
700
- children = [d["RID"] for d in self.list_dataset_members(dataset_rid)["Dataset"]]
701
- if recurse:
702
- for child in children.copy():
703
- children.extend(self.list_dataset_children(child, recurse=recurse))
704
- return children
710
+ dataset_dataset_path = (
711
+ self._model.catalog.getPathBuilder()
712
+ .schemas[self._ml_schema]
713
+ .tables["Dataset_Dataset"]
714
+ )
715
+ nested_datasets = list(dataset_dataset_path.entities().fetch())
716
+
717
+ def find_children(rid: RID):
718
+ children = [
719
+ child["Nested_Dataset"]
720
+ for child in nested_datasets
721
+ if child["Dataset"] == rid
722
+ ]
723
+ if recurse:
724
+ for child in children.copy():
725
+ children.extend(find_children(child))
726
+ return children
727
+
728
+ return find_children(dataset_rid)
705
729
 
706
730
  def _vocabulary_specification(
707
731
  self, writer: Callable[[str, str, Table], list[dict[str, Any]]]
@@ -727,20 +751,19 @@ class Dataset:
727
751
  ]
728
752
 
729
753
  def _table_paths(
730
- self, dataset: DatasetSpec = None
731
- ) -> Iterator[tuple[list[str], list[str], list[Table]]]:
754
+ self, dataset: DatasetSpec = None, snapshot_catalog: Optional[DerivaML] = None
755
+ ) -> Iterator[tuple[str, str, Table]]:
732
756
 
733
- dataset_dataset = self._model.schemas[self._ml_schema].tables["Dataset_Dataset"]
734
-
735
- paths = self._collect_paths(dataset and dataset.rid)
757
+ paths = self._collect_paths(dataset and dataset.rid, snapshot_catalog)
736
758
 
737
759
  def source_path(path: tuple[Table, ...]):
760
+ """Convert a tuple representing a path into a source path component with FK linkage"""
738
761
  path = list(path)
739
762
  p = [f"{self._model.ml_schema}:Dataset/RID={{Dataset_RID}}"]
740
763
  for table in path[1:]:
741
- if table == dataset_dataset:
764
+ if table.name == "Dataset_Dataset":
742
765
  p.append("(RID)=(deriva-ml:Dataset_Dataset:Dataset)")
743
- elif table == self.dataset_table:
766
+ elif table.name == "Dataset":
744
767
  p.append("(Nested_Dataset)=(deriva-ml:Dataset:RID)")
745
768
  elif table.name == "Dataset_Version":
746
769
  p.append(f"(RID)=({self._model.ml_schema}:Dataset_Version:Dataset)")
@@ -751,63 +774,76 @@ class Dataset:
751
774
  src_paths = ["/".join(source_path(p)) for p in paths]
752
775
  dest_paths = ["/".join([t.name for t in p]) for p in paths]
753
776
  target_tables = [p[-1] for p in paths]
754
-
755
777
  return zip(src_paths, dest_paths, target_tables)
756
778
 
757
779
  def _collect_paths(
758
780
  self,
759
781
  dataset_rid: Optional[RID] = None,
782
+ snapshot_catalog: Optional[DerivaML] = None,
760
783
  dataset_nesting_depth: Optional[int] = None,
761
784
  ) -> set[tuple[Table, ...]]:
762
785
 
763
- dataset_nesting_depth = (
764
- self._dataset_nesting_depth()
765
- if dataset_nesting_depth is None
766
- else dataset_nesting_depth
767
- )
768
- dataset_dataset = self._model.schemas[self._ml_schema].tables["Dataset_Dataset"]
769
-
770
- # Figure out which paths we don't need to query for this dataset. If no dataset is provided, use them all.
771
- dataset_elements = (
772
- [
773
- self._model.name_to_table(e)
774
- for e, m in self.list_dataset_members(
786
+ snapshot_catalog = snapshot_catalog or self
787
+ dataset_table = snapshot_catalog._model.schemas[self._ml_schema].tables[
788
+ "Dataset"
789
+ ]
790
+ dataset_dataset = snapshot_catalog._model.schemas[self._ml_schema].tables[
791
+ "Dataset_Dataset"
792
+ ]
793
+ dataset_associations = [
794
+ a
795
+ for a in self.dataset_table.find_associations()
796
+ if a.table.schema.name != self._ml_schema
797
+ or a.table.name == "Dataset_Dataset"
798
+ ]
799
+ if dataset_rid:
800
+ # Get a list of the members of the dataset so we can figure out which tables to query.
801
+ dataset_elements = [
802
+ snapshot_catalog._model.name_to_table(e)
803
+ for e, m in snapshot_catalog.list_dataset_members(
775
804
  dataset_rid=dataset_rid, limit=1
776
805
  ).items()
777
806
  if m
778
807
  ]
779
- if dataset_rid
780
- else self.list_dataset_element_types()
781
- )
782
-
783
- dataset_associations = [a.table for a in self.dataset_table.find_associations()]
784
- included_associations = [
785
- a.table
786
- for a in self.dataset_table.find_associations()
787
- if a.other_fkeys.pop().pk_table in dataset_elements
788
- ]
808
+ included_associations = [
809
+ a.table
810
+ for a in dataset_table.find_associations()
811
+ if a.other_fkeys.pop().pk_table in dataset_elements
812
+ ]
813
+ else:
814
+ included_associations = dataset_associations
789
815
  # Get the paths through the schema and filter out all of dataset paths not used by this dataset.
790
816
  paths = {
791
817
  tuple(p)
792
- for p in self._model._schema_to_paths()
818
+ for p in snapshot_catalog._model._schema_to_paths()
793
819
  if (len(p) == 1)
794
- or (p[1] not in dataset_associations)
795
- or (p[1] in included_associations)
820
+ or (p[1] not in dataset_associations) # Tables in the domain schema
821
+ or (
822
+ p[1] in included_associations
823
+ ) # Tables that include members of the dataset
796
824
  }
797
825
  # Now get paths for nested datasets
798
826
  nested_paths = set()
799
827
  if dataset_rid:
800
- for c in self.list_dataset_children(dataset_rid=dataset_rid):
801
- nested_paths |= self._collect_paths(c)
828
+ for c in snapshot_catalog.list_dataset_children(dataset_rid=dataset_rid):
829
+ nested_paths |= self._collect_paths(
830
+ c, snapshot_catalog=snapshot_catalog
831
+ )
802
832
  else:
833
+ # Initialize nesting depth if not already provided.
834
+ dataset_nesting_depth = (
835
+ self._dataset_nesting_depth()
836
+ if dataset_nesting_depth is None
837
+ else dataset_nesting_depth
838
+ )
803
839
  if dataset_nesting_depth:
804
840
  nested_paths = self._collect_paths(
805
841
  dataset_nesting_depth=dataset_nesting_depth - 1
806
842
  )
807
843
  if nested_paths:
808
844
  paths |= {
809
- tuple([self.dataset_table]),
810
- (self.dataset_table, dataset_dataset),
845
+ tuple([dataset_table]),
846
+ (dataset_table, dataset_dataset),
811
847
  }
812
848
  paths |= {(self.dataset_table, dataset_dataset) + p for p in nested_paths}
813
849
  return paths
@@ -863,6 +899,7 @@ class Dataset:
863
899
  self,
864
900
  writer: Callable[[str, str, Table], list[dict[str, Any]]],
865
901
  dataset: DatasetSpec,
902
+ snapshot_catalog: Optional[DerivaML] = None,
866
903
  ) -> list[dict[str, Any]]:
867
904
  """Output a download/export specification for a dataset_table. Each element of the dataset_table will be placed in its own dir
868
905
  The top level data directory of the resulting BDBag will have one subdirectory for element type. the subdirectory
@@ -902,21 +939,24 @@ class Dataset:
902
939
  A dataset_table specification.
903
940
  """
904
941
  element_spec = []
905
- for path in self._table_paths(dataset=dataset):
942
+ for path in self._table_paths(
943
+ dataset=dataset, snapshot_catalog=snapshot_catalog
944
+ ):
906
945
  element_spec.extend(writer(*path))
907
946
  return self._vocabulary_specification(writer) + element_spec
908
947
 
909
- @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
910
- def download_dataset_bag(
948
+ def _download_dataset_bag(
911
949
  self,
912
950
  dataset: DatasetSpec,
913
951
  execution_rid: Optional[RID] = None,
952
+ snapshot_catalog: Optional[DerivaML] = None,
914
953
  ) -> DatasetBag:
915
954
  """Download a dataset onto the local file system. Create a MINID for the dataset if one doesn't already exist.
916
955
 
917
956
  Args:
918
957
  dataset: Specification of the dataset to be downloaded.
919
958
  execution_rid: Execution RID for the dataset.
959
+ snapshot_catalog: Snapshot catalog for the dataset version if specified.
920
960
 
921
961
  Returns:
922
962
  Tuple consisting of the path to the dataset, the RID of the dataset that was downloaded and the MINID
@@ -927,16 +967,17 @@ class Dataset:
927
967
  and self._model.catalog.resolve_rid(execution_rid).table.name != "Execution"
928
968
  ):
929
969
  raise DerivaMLException(f"RID {execution_rid} is not an execution")
930
- minid = self.get_dataset_minid(dataset)
970
+ minid = self._get_dataset_minid(dataset, snapshot_catalog=snapshot_catalog)
931
971
 
932
972
  bag_path = (
933
973
  self._materialize_dataset_bag(minid, execution_rid=execution_rid)
934
974
  if dataset.materialize
935
- else self._download_dataset_bag(minid)
975
+ else self._download_dataset_minid(minid)
936
976
  )
937
977
  return DatabaseModel(minid, bag_path).get_dataset()
938
978
 
939
979
  def _version_snapshot(self, dataset: DatasetSpec) -> str:
980
+ """Return a catalog with snapshot for the specified dataset version"""
940
981
  version_record = [
941
982
  h
942
983
  for h in self.dataset_history(dataset_rid=dataset.rid)
@@ -944,13 +985,17 @@ class Dataset:
944
985
  ][0]
945
986
  return f"{self._model.catalog.catalog_id}@{iso_to_snap(version_record.timestamp.isoformat())}"
946
987
 
947
- def _create_dataset_minid(self, dataset: DatasetSpec) -> str:
988
+ def _create_dataset_minid(
989
+ self, dataset: DatasetSpec, snapshot_catalog: Optional[DerivaML] = None
990
+ ) -> str:
948
991
  with TemporaryDirectory() as tmp_dir:
949
992
  # Generate a download specification file for the current catalog schema. By default, this spec
950
993
  # will generate a minid and place the bag into S3 storage.
951
994
  spec_file = f"{tmp_dir}/download_spec.json"
952
995
  with open(spec_file, "w", encoding="utf-8") as ds:
953
- json.dump(self._generate_dataset_download_spec(dataset), ds)
996
+ json.dump(
997
+ self._generate_dataset_download_spec(dataset, snapshot_catalog), ds
998
+ )
954
999
  try:
955
1000
  self._logger.info(
956
1001
  f"Downloading dataset minid for catalog: {dataset.rid}@{str(dataset.version)}"
@@ -987,14 +1032,17 @@ class Dataset:
987
1032
  version_path.update([{"RID": version_rid, "Minid": minid_page_url}])
988
1033
  return minid_page_url
989
1034
 
990
- @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
991
- def get_dataset_minid(
992
- self, dataset: DatasetSpec, create: bool = True
1035
+ def _get_dataset_minid(
1036
+ self,
1037
+ dataset: DatasetSpec,
1038
+ snapshot_catalog: Optional[DerivaML] = None,
1039
+ create: bool = True,
993
1040
  ) -> DatasetMinid:
994
1041
  """Return a MINID to the specified dataset. If no version is specified, use the latest.
995
1042
 
996
1043
  Args:
997
1044
  dataset: Specification of the dataset.
1045
+ snapshot_catalog: Snapshot catalog for the dataset version if specified.
998
1046
  create: Create a new MINID if one doesn't already exist.
999
1047
 
1000
1048
  Returns:
@@ -1025,12 +1073,12 @@ class Dataset:
1025
1073
  f"Minid for dataset {dataset.rid} doesn't exist"
1026
1074
  )
1027
1075
  self._logger.info("Creating new MINID for dataset %s", dataset.rid)
1028
- minid_url = self._create_dataset_minid(dataset)
1076
+ minid_url = self._create_dataset_minid(dataset, snapshot_catalog)
1029
1077
  # If provided a MINID, use the MINID metadata to get the checksum and download the bag.
1030
1078
  r = requests.get(minid_url, headers={"accept": "application/json"})
1031
1079
  return DatasetMinid(dataset_version=dataset.version, **r.json())
1032
1080
 
1033
- def _download_dataset_bag(self, minid: DatasetMinid) -> Path:
1081
+ def _download_dataset_minid(self, minid: DatasetMinid) -> Path:
1034
1082
  """Given a RID to a dataset_table, or a MINID to an existing bag, download the bag file, extract it and validate
1035
1083
  that all the metadata is correct
1036
1084
 
@@ -1097,7 +1145,7 @@ class Dataset:
1097
1145
  return True
1098
1146
 
1099
1147
  # request metadata
1100
- bag_path = self._download_dataset_bag(minid)
1148
+ bag_path = self._download_dataset_minid(minid)
1101
1149
  bag_dir = bag_path.parent
1102
1150
  validated_check = bag_dir / "validated_check.txt"
1103
1151
 
@@ -1112,7 +1160,9 @@ class Dataset:
1112
1160
  return Path(bag_path)
1113
1161
 
1114
1162
  def _export_outputs(
1115
- self, dataset: Optional[DatasetSpec] = None
1163
+ self,
1164
+ dataset: Optional[DatasetSpec] = None,
1165
+ snapshot_catalog: Optional[DerivaML] = None,
1116
1166
  ) -> list[dict[str, Any]]:
1117
1167
  """Return and output specification for the datasets in the provided model
1118
1168
 
@@ -1150,9 +1200,13 @@ class Dataset:
1150
1200
  "source": {"api": "schema", "skip_root_path": True},
1151
1201
  "destination": {"type": "json", "name": "schema"},
1152
1202
  },
1153
- ] + self._dataset_specification(writer, dataset)
1203
+ ] + self._dataset_specification(
1204
+ writer, dataset, snapshot_catalog=snapshot_catalog
1205
+ )
1154
1206
 
1155
- def _processor_params(self, dataset: DatasetSpec) -> list[dict[str, Any]]:
1207
+ def _processor_params(
1208
+ self, dataset: DatasetSpec, snapshot_catalog: Optional[DerivaML] = None
1209
+ ) -> list[dict[str, Any]]:
1156
1210
  """
1157
1211
  Returns:
1158
1212
  a download specification for the datasets in the provided model.
@@ -1178,7 +1232,7 @@ class Dataset:
1178
1232
  "processor": "json",
1179
1233
  "processor_params": {"query_path": "/schema", "output_path": "schema"},
1180
1234
  }
1181
- ] + self._dataset_specification(writer, dataset)
1235
+ ] + self._dataset_specification(writer, dataset, snapshot_catalog)
1182
1236
 
1183
1237
  @staticmethod
1184
1238
  def _download_dataset_element(
@@ -1257,7 +1311,9 @@ class Dataset:
1257
1311
  )
1258
1312
  return exports
1259
1313
 
1260
- def _generate_dataset_download_spec(self, dataset: DatasetSpec) -> dict[str, Any]:
1314
+ def _generate_dataset_download_spec(
1315
+ self, dataset: DatasetSpec, snapshot_catalog: Optional[DerivaML]
1316
+ ) -> dict[str, Any]:
1261
1317
  """
1262
1318
 
1263
1319
  Returns:
@@ -1315,7 +1371,7 @@ class Dataset:
1315
1371
  },
1316
1372
  },
1317
1373
  ]
1318
- + self._processor_params(dataset),
1374
+ + self._processor_params(dataset, snapshot_catalog),
1319
1375
  },
1320
1376
  }
1321
1377
 
@@ -139,7 +139,6 @@ class FileSpec(BaseModel):
139
139
  if url_parts.scheme == "tag":
140
140
  return v
141
141
  elif not url_parts.scheme:
142
- print(v)
143
142
  return f'tag://{gethostname()},{date.today()}:file://{v}'
144
143
  else:
145
144
  raise ValidationError("url is not a file URL")
@@ -15,8 +15,11 @@ import logging
15
15
  from datetime import datetime
16
16
  import hashlib
17
17
  from itertools import chain
18
+ import inspect
18
19
  from pathlib import Path
19
20
  import requests
21
+ from setuptools_git_versioning import get_latest_file_commit
22
+ import subprocess
20
23
  from typing import Optional, Any, Iterable, TYPE_CHECKING
21
24
  from deriva.core import (
22
25
  ErmrestCatalog,
@@ -35,6 +38,8 @@ from pydantic import validate_call, ConfigDict
35
38
  from .execution_configuration import ExecutionConfiguration, Workflow
36
39
  from .feature import Feature, FeatureRecord
37
40
  from .dataset import Dataset
41
+ from .dataset_aux_classes import DatasetSpec
42
+ from .dataset_bag import DatasetBag
38
43
  from .deriva_model import DerivaModel
39
44
  from .upload import (
40
45
  table_path,
@@ -56,6 +61,18 @@ from .deriva_definitions import (
56
61
  FileSpec,
57
62
  )
58
63
 
64
+ try:
65
+ from icecream import ic
66
+ except ImportError: # Graceful fallback if IceCream isn't installed.
67
+ ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
68
+
69
+
70
+ try:
71
+ from IPython import get_ipython
72
+ except ImportError: # Graceful fallback if IPython isn't installed.
73
+ get_ipython = lambda: None
74
+
75
+
59
76
  if TYPE_CHECKING:
60
77
  from .execution import Execution
61
78
 
@@ -132,6 +149,17 @@ class DerivaML(Dataset):
132
149
  self.version = model_version
133
150
  self.configuration = None
134
151
  self._execution: Optional[Execution] = None
152
+ self._notebook = None
153
+ try:
154
+ from IPython import get_ipython
155
+
156
+ ipython = get_ipython()
157
+ # Check if running in Jupyter's ZMQ kernel (used by notebooks)
158
+ if ipython is not None and "IPKernelApp" in ipython.config:
159
+ self._notebook = Path(ipython.user_ns.get("__session__"))
160
+ # Check if running in Jupyter's ZMQ kernel (used by notebooks)
161
+ except (ImportError, AttributeError):
162
+ pass
135
163
 
136
164
  self.domain_schema = self.model.domain_schema
137
165
  self.project_name = project_name or self.domain_schema
@@ -705,6 +733,28 @@ class DerivaML(Dataset):
705
733
  for v in pb.schemas[table.schema.name].tables[table.name].entities().fetch()
706
734
  ]
707
735
 
736
+ @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
737
+ def download_dataset_bag(
738
+ self,
739
+ dataset: DatasetSpec,
740
+ execution_rid: Optional[RID] = None,
741
+ ) -> DatasetBag:
742
+ """Download a dataset onto the local file system. Create a MINID for the dataset if one doesn't already exist.
743
+
744
+ Args:
745
+ dataset: Specification of the dataset to be downloaded.
746
+ execution_rid: Execution RID for the dataset.
747
+
748
+ Returns:
749
+ Tuple consisting of the path to the dataset, the RID of the dataset that was downloaded and the MINID
750
+ for the dataset.
751
+ """
752
+ return self._download_dataset_bag(
753
+ dataset=dataset,
754
+ execution_rid=execution_rid,
755
+ snapshot_catalog=DerivaML(self.host_name, self._version_snapshot(dataset)),
756
+ )
757
+
708
758
  @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
709
759
  def download_asset(self, asset_rid: RID, dest_dir: Path) -> Path:
710
760
  """Download an asset from a URL and place it in a local directory.
@@ -808,8 +858,10 @@ class DerivaML(Dataset):
808
858
  Iterable of the RIDs of the files that were added.
809
859
  """
810
860
  defined_types = self.list_vocabulary_terms(MLVocab.file_type)
811
- if execution_rid and self.resolve_rid(execution_rid).table.name != 'Execution':
812
- raise DerivaMLException(f'RID {execution_rid} is not for an execution table.')
861
+ if execution_rid and self.resolve_rid(execution_rid).table.name != "Execution":
862
+ raise DerivaMLException(
863
+ f"RID {execution_rid} is not for an execution table."
864
+ )
813
865
 
814
866
  def check_file_type(dtype: str) -> bool:
815
867
  for term in defined_types:
@@ -862,18 +914,11 @@ class DerivaML(Dataset):
862
914
  self, file_types: Optional[list[str]] = None
863
915
  ) -> list[dict[str, Any]]:
864
916
  """Return the contents of the file table. Denormalized file types into the file record."""
865
- atable = next(
866
- self._model.schemas[self._ml_schema]
867
- .tables[MLVocab.dataset_type]
868
- .find_associations()
869
- ).name
870
917
  ml_path = self.pathBuilder.schemas[self._ml_schema]
871
- atable_path = ml_path.tables[atable]
872
918
  file_path = ml_path.File
873
919
  type_path = ml_path.File_File_Type
874
920
 
875
921
  # Get a list of all the dataset_type values associated with this dataset_table.
876
- files = []
877
922
  path = file_path.link(type_path)
878
923
  path = path.attributes(
879
924
  path.File.RID,
@@ -885,10 +930,12 @@ class DerivaML(Dataset):
885
930
  )
886
931
  file_map = {}
887
932
  for f in path.fetch():
888
- file_map.setdefault(f['RID'], f | {'File_Types': []})['File_Types'].append(f['File_Type'])
933
+ file_map.setdefault(f["RID"], f | {"File_Types": []})["File_Types"].append(
934
+ f["File_Type"]
935
+ )
889
936
 
890
937
  # Now get rid of the File_Type key and return the result
891
- return [ (f, f.pop('File_Type'))[0] for f in file_map.values()]
938
+ return [(f, f.pop("File_Type"))[0] for f in file_map.values()]
892
939
 
893
940
  def list_workflows(self) -> list[Workflow]:
894
941
  """Return a list of all the workflows in the catalog."""
@@ -901,6 +948,7 @@ class DerivaML(Dataset):
901
948
  version=w["Version"],
902
949
  description=w["Description"],
903
950
  rid=w["RID"],
951
+ checksum=w["Checksum"],
904
952
  )
905
953
  for w in workflow_path.entities().fetch()
906
954
  ]
@@ -917,33 +965,18 @@ class DerivaML(Dataset):
917
965
  """
918
966
 
919
967
  # Check to make sure that the workflow is not already in the table. If it's not, add it.
920
- def get_checksum(url) -> str:
921
- """Get the checksum of a file from a URL."""
922
- try:
923
- response = requests.get(url)
924
- response.raise_for_status()
925
- except Exception:
926
- raise DerivaMLException(f"Invalid URL: {url}")
927
- else:
928
- sha256_hash = hashlib.sha256()
929
- sha256_hash.update(response.content)
930
- checksum = "SHA-256: " + sha256_hash.hexdigest()
931
- return checksum
968
+
969
+ if workflow_rid := self.lookup_workflow(workflow.url):
970
+ return workflow_rid
932
971
 
933
972
  ml_schema_path = self.pathBuilder.schemas[self.ml_schema]
934
973
  try:
935
- url_column = ml_schema_path.Workflow.URL
936
- workflow_record = list(
937
- ml_schema_path.Workflow.filter(url_column == workflow.url).entities()
938
- )[0]
939
- workflow_rid = workflow_record["RID"]
940
- except IndexError:
941
974
  # Record doesn't exist already
942
975
  workflow_record = {
943
976
  "URL": workflow.url,
944
977
  "Name": workflow.name,
945
978
  "Description": workflow.description,
946
- "Checksum": get_checksum(workflow.url),
979
+ "Checksum": workflow.checksum,
947
980
  "Version": workflow.version,
948
981
  MLVocab.workflow_type: self.lookup_term(
949
982
  MLVocab.workflow_type, workflow.workflow_type
@@ -955,6 +988,125 @@ class DerivaML(Dataset):
955
988
  raise DerivaMLException(f"Failed to insert workflow. Error: {error}")
956
989
  return workflow_rid
957
990
 
991
+ def lookup_workflow(self, url: str) -> Optional[RID]:
992
+ workflow_path = self.pathBuilder.schemas[self.ml_schema].Workflow
993
+ try:
994
+ url_column = workflow_path.URL
995
+ return list(workflow_path.filter(url_column == url).entities())[0]["RID"]
996
+ except IndexError:
997
+ return None
998
+
999
+ def create_workflow(
1000
+ self, name: str, workflow_type: str, description: str = "", create: bool = True
1001
+ ) -> RID:
1002
+ """Identify current executing program and return a workflow RID for it
1003
+
1004
+ Determane the notebook of script that is currently being executed. Assume that this is
1005
+ being executed from a cloned GitHub repository. Determine the remote repository name for
1006
+ this object. Then either retrieve an existing workflow for this executable of create
1007
+ a new one.
1008
+
1009
+ Args:
1010
+ name: The name of the workflow.
1011
+ workflow_type: The type of the workflow.
1012
+ description: The description of the workflow.
1013
+ create: Whether or not to create a new workflow.
1014
+ """
1015
+ # Make sure type is correct.
1016
+ self.lookup_term(MLVocab.workflow_type, workflow_type)
1017
+ filename, github_url, is_dirty = self._github_url()
1018
+
1019
+ if is_dirty:
1020
+ self._logger.warning(
1021
+ f"File {filename} has been modified since last commit. Consider commiting before executing"
1022
+ )
1023
+
1024
+ sha256_hash = hashlib.sha256()
1025
+ if self._notebook:
1026
+ # If you are in a notebook, strip out the outputs before computing the checksum.
1027
+ result = subprocess.run(
1028
+ ["nbstripout", "-t", filename],
1029
+ capture_output=True,
1030
+ text=False,
1031
+ check=True,
1032
+ )
1033
+ sha256_hash.update(result.stdout)
1034
+ else:
1035
+ with open(filename, "rb") as f:
1036
+ sha256_hash.update(f.read())
1037
+ checksum = "SHA-256:" + sha256_hash.hexdigest()
1038
+
1039
+ workflow = Workflow(
1040
+ name=name,
1041
+ url=github_url,
1042
+ checksum=checksum,
1043
+ description=description,
1044
+ workflow_type=workflow_type,
1045
+ )
1046
+ return self.add_workflow(workflow) if create else None
1047
+
1048
+ def _github_url(self) -> tuple[str, str, bool]:
1049
+ """Return a GitHUB URL for the latest commit of the script from which this routine is called.
1050
+
1051
+ This routine is used to be called from a script or notebook (e.g. python -m file). It assumes that
1052
+ the file is in a gitHUB repository and commited. It returns a URL to the last commited version of this
1053
+ file in GitHUB.
1054
+
1055
+ Returns: A tuple with the filename, gethub_url and a boolaen to indicated if uncommited changes
1056
+ have been made to the file.
1057
+
1058
+ """
1059
+
1060
+ # Get the name of the script that is calling this function.
1061
+ if self._notebook:
1062
+ # Try to get the __session__ variable from the user namespace.
1063
+ filename = Path("").absolute().parent / self._notebook
1064
+ else:
1065
+ stack = inspect.stack()
1066
+ if len(stack) > 1:
1067
+ filename = Path(
1068
+ stack[2].filename
1069
+ ) # Get the caller's filename, which is two up the stack from here.
1070
+ else:
1071
+ raise DerivaMLException(
1072
+ f"Looking for caller failed"
1073
+ ) # Stack is too shallow
1074
+
1075
+ # Get repo URL from local github repo.
1076
+ try:
1077
+ result = subprocess.run(
1078
+ ["git", "remote", "get-url", "origin"], capture_output=True, text=True
1079
+ )
1080
+ github_url = result.stdout.strip().removesuffix(".git")
1081
+ except subprocess.CalledProcessError:
1082
+ raise DerivaMLException(f"No GIT remote found")
1083
+
1084
+ # Find the root directory for the repository
1085
+ repo_root = filename
1086
+ while repo_root != repo_root.root:
1087
+ if (repo_root / ".git").exists():
1088
+ break
1089
+ else:
1090
+ repo_root = repo_root.parent
1091
+
1092
+ # Now check to see if file has been modified since the last commit.
1093
+ try:
1094
+ result = subprocess.run(
1095
+ ["git", "status", "--porcelain"],
1096
+ capture_output=True,
1097
+ text=True,
1098
+ check=True,
1099
+ )
1100
+ is_dirty = bool(
1101
+ " M " in result.stdout.strip()
1102
+ ) # Returns True if output indicates a modified file
1103
+ except subprocess.CalledProcessError:
1104
+ is_dirty = False # If Git command fails, assume no changes
1105
+
1106
+ sha = get_latest_file_commit(filename)
1107
+ url = f"{github_url}/blob/{sha}/{filename.relative_to(repo_root)}"
1108
+ return filename, url, is_dirty
1109
+
958
1110
  # @validate_call
959
1111
  def create_execution(self, configuration: ExecutionConfiguration) -> "Execution":
960
1112
  """Create an execution object
@@ -0,0 +1,104 @@
1
+ from sympy import cxxcode
2
+
3
+ from deriva_ml import DerivaML, execution_configuration
4
+
5
+ def execute(host, catalog, script):
6
+ workflow_rid = foobar
7
+ execution_configuration = cxxcode(
8
+
9
+ )
10
+ ml_instance = DerivaML()
11
+ ml_instance.create_execution(configuration)
12
+ script
13
+
14
+
15
+ from deriva_ml import DerivaML, ExecutionConfiguration, DatasetSpec, RID, DerivaMLException
16
+ import os
17
+ import sys
18
+ import json
19
+ import traceback
20
+ import argparse
21
+ import requests
22
+ from requests.exceptions import HTTPError, ConnectionError
23
+ from deriva.transfer import GenericDownloader
24
+ from deriva.transfer.download import DerivaDownloadError, DerivaDownloadConfigurationError, \
25
+ DerivaDownloadAuthenticationError, DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, \
26
+ DerivaDownloadBaggingError
27
+ from deriva.core import BaseCLI, KeyValuePairArgs, format_credential, format_exception, urlparse
28
+
29
+
30
+ class DerivaMLExecCLI(BaseCLI):
31
+ def __init__(self, description, epilog, **kwargs):
32
+
33
+ BaseCLI.__init__(self, description, epilog, **kwargs)
34
+ self.parser.add_argument("--catalog", default=1, metavar="<1>", help="Catalog number. Default: 1")
35
+ self.parser.add_argument("--timeout", metavar="<seconds>",
36
+ help="Total number of seconds elapsed before the download is aborted.")
37
+ self.parser.add_argument("output_dir", metavar="<output dir>", help="Path to an output directory.")
38
+ self.parser.add_argument("envars", metavar="[key=value key=value ...]",
39
+ nargs=argparse.REMAINDER, action=KeyValuePairArgs, default={},
40
+ help="Variable length of whitespace-delimited key=value pair arguments used for "
41
+ "string interpolation in specific parts of the configuration file. "
42
+ "For example: key1=value1 key2=value2")
43
+
44
+ def main(self):
45
+ try:
46
+ args = self.parse_cli()
47
+ except ValueError as e:
48
+ sys.stderr.write(str(e))
49
+ return 2
50
+ if not args.quiet:
51
+ sys.stderr.write("\n")
52
+
53
+ try:
54
+ try:
55
+ ml_instance = DerivaML(args.hostname, args.catalog)
56
+ downloaded = self.execute()
57
+ sys.stdout.write("\n%s\n" % (json.dumps(downloaded)))
58
+ except ConnectionError as e:
59
+ raise DerivaDownloadError("Connection error occurred. %s" % format_exception(e))
60
+ except HTTPError as e:
61
+ if e.response.status_code == requests.codes.unauthorized:
62
+ raise DerivaDownloadAuthenticationError(
63
+ "The requested service requires authentication and a valid login session could "
64
+ "not be found for the specified host. Server responded: %s" % e)
65
+ elif e.response.status_code == requests.codes.forbidden:
66
+ raise DerivaDownloadAuthorizationError(
67
+ "A requested operation was forbidden. Server responded: %s" % e)
68
+ except (DerivaDownloadError, DerivaDownloadConfigurationError, DerivaDownloadAuthenticationError,
69
+ DerivaDownloadAuthorizationError, DerivaDownloadTimeoutError, DerivaDownloadBaggingError) as e:
70
+ sys.stderr.write(("\n" if not args.quiet else "") + format_exception(e))
71
+ if args.debug:
72
+ traceback.print_exc()
73
+ return 1
74
+ except:
75
+ sys.stderr.write("An unexpected error occurred.")
76
+ traceback.print_exc()
77
+ return 1
78
+ finally:
79
+ if not args.quiet:
80
+ sys.stderr.write("\n\n")
81
+ return 0
82
+
83
+
84
+ def do_stuff():
85
+ pass
86
+
87
+ def main(datasets: list[RID], model: list[RID], hostname: str, catalog_id: str):
88
+ my_url = DerivaML.github_url()
89
+ ml_instance = DerivaML(hostname, catalog_id)
90
+ ml_instance.lookup_workflow(my_url)
91
+ config = ExecutionConfiguration(
92
+ datasets=[DatasetSpec(rid=dataset,
93
+ version=ml_instance.dataset_version(dataset)) for dataset in datasets],
94
+ assets=model,
95
+ workflow= ml_instance.lookup_workflow(my_url)
96
+ )
97
+ execution = ml_instance.create_execution(config)
98
+ with execution as e:
99
+ do_stuff()
100
+ execution.upload_execution_outputs()
101
+
102
+ if __name__ == "__main__":
103
+ main(datasets, model, hostname, catalog_id)
104
+ if __file__ == matplotlib_inline
deriva_ml/execution.py CHANGED
@@ -12,6 +12,7 @@ import os
12
12
  import shutil
13
13
  from datetime import datetime
14
14
  from pathlib import Path
15
+ import requests
15
16
  from tempfile import NamedTemporaryFile
16
17
  from typing import Iterable, Any, Optional
17
18
  from deriva.core import format_exception
@@ -28,7 +29,6 @@ from .deriva_definitions import (
28
29
  )
29
30
  from .deriva_ml_base import DerivaML, FeatureRecord
30
31
  from .dataset_aux_classes import DatasetSpec, DatasetVersion, VersionPart
31
- from .dataset import Dataset
32
32
  from .dataset_bag import DatasetBag
33
33
  from .execution_configuration import ExecutionConfiguration
34
34
  from .execution_environment import get_execution_environment
@@ -51,6 +51,12 @@ except ImportError: # Graceful fallback if IceCream isn't installed.
51
51
  ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
52
52
 
53
53
 
54
+ try:
55
+ from jupyter_server.serverapp import list_running_servers
56
+ except ImportError:
57
+ list_running_servers = lambda: []
58
+
59
+
54
60
  class Execution:
55
61
  """The Execution class is used to capture the context of an activity within DerivaML. While these are primarily
56
62
  computational, manual processes can be represented by an execution as well.
@@ -100,6 +106,7 @@ class Execution:
100
106
  self.configuration = configuration
101
107
  self._ml_object = ml_object
102
108
  self.start_time = None
109
+ self.stop_time = None
103
110
  self.status = Status.created
104
111
  self.uploaded_assets: list[Path] = []
105
112
 
@@ -221,8 +228,9 @@ class Execution:
221
228
  Returns:
222
229
  the location of the unpacked and validated dataset_table bag and the RID of the bag
223
230
  """
224
- ds = Dataset(self._ml_object.model, cache_dir=self._cache_dir)
225
- return ds.download_dataset_bag(dataset, execution_rid=self.execution_rid)
231
+ return self._ml_object.download_dataset_bag(
232
+ dataset, execution_rid=self.execution_rid
233
+ )
226
234
 
227
235
  @validate_call
228
236
  def update_status(self, status: Status, msg: str) -> None:
@@ -243,6 +251,35 @@ class Execution:
243
251
  ]
244
252
  )
245
253
 
254
+ def _create_notebook_checkpoint(self):
255
+ """Trigger a checkpoint creation using Jupyter's API."""
256
+ notebook_name = self._ml_object._notebook
257
+ servers = list_running_servers()
258
+ # Look for the server running this notebook.
259
+ root = Path("").absolute().parent.as_posix()
260
+ servers = list(list_running_servers())
261
+ # Jupyterhub seems to handle root_dir differently then server case.
262
+ server = (
263
+ servers
264
+ if len(servers) == 1
265
+ else [s for s in servers if s["root_dir"] == root]
266
+ )[0]
267
+ notebook_url = f"{server['url']}api/contents/{notebook_name}"
268
+
269
+ # Get notebook content
270
+ response = requests.get(
271
+ notebook_url, headers={"Authorization": f"Token {server['token']}"}
272
+ )
273
+ if response.status_code == 200:
274
+ notebook_content = response.json()["content"]
275
+ # Execution metadata cannot be in a directory, so map path into filename.
276
+ checkpoint_path = (
277
+ self.execution_metadata_path(ExecMetadataVocab.runtime_env.value)
278
+ / f"{notebook_name.as_posix().replace('/','_')}.checkpoint"
279
+ )
280
+ with open(checkpoint_path, "w", encoding="utf-8") as f:
281
+ json.dump(notebook_content, f)
282
+
246
283
  def execution_start(self) -> None:
247
284
  """Start an execution, uploading status to catalog"""
248
285
 
@@ -252,11 +289,15 @@ class Execution:
252
289
 
253
290
  def execution_stop(self) -> None:
254
291
  """Finish the execution and update the duration and status of execution."""
255
- duration = datetime.now() - self.start_time
292
+ self.stop_time = datetime.now()
293
+ duration = self.stop_time - self.start_time
256
294
  hours, remainder = divmod(duration.total_seconds(), 3600)
257
295
  minutes, seconds = divmod(remainder, 60)
258
296
  duration = f"{round(hours, 0)}H {round(minutes, 0)}min {round(seconds, 4)}sec"
259
297
 
298
+ if self._ml_object._notebook:
299
+ self._create_notebook_checkpoint()
300
+
260
301
  self.update_status(Status.completed, "Algorithm execution ended.")
261
302
  self._ml_object.pathBuilder.schemas[self._ml_object.ml_schema].Execution.update(
262
303
  [{"RID": self.execution_rid, "Duration": duration}]
@@ -33,18 +33,18 @@ class Workflow(BaseModel):
33
33
  version: Optional[str] = None
34
34
  description: Optional[str] = ""
35
35
  rid: Optional[RID] = None
36
+ checksum: Optional[str]
37
+
36
38
 
37
39
 
38
40
  class ExecutionConfiguration(BaseModel):
39
41
  """Define the parameters that are used to configure a specific execution.
40
42
 
41
43
  Attributes:
42
- datasets: List of dataset_table RIDS, MINIDS for datasets to be downloaded prior to execution. By default,
43
- all the datasets are materialized. However, if the assets associated with a dataset_table are not
44
- needed, a dictionary that defines the rid and the materialization parameter for the
45
- download_dataset_bag method can be specified, e.g. datasets=[{'rid': RID, 'materialize': True}].
44
+ datasets: List of dataset specifications which specify the dataset RID, version and if the dataset
45
+ should be materialized.
46
46
  assets: List of assets to be downloaded prior to execution. The values must be RIDs in an asset table
47
- workflow: A workflow instance. Must have a name, URI to the workflow instance, and a type.
47
+ workflow: A RID for a workflow instance. Must have a name, URI to the workflow instance, and a type.
48
48
  description: A description of the execution. Can use Markdown format.
49
49
  """
50
50
 
deriva_ml/upload.py CHANGED
@@ -70,8 +70,11 @@ exec_asset_regex = (
70
70
  exec_metadata_dir_regex = (
71
71
  exec_dir_regex + r"/execution-metadata/(?P<execution_metadata_type>[-\w]+)"
72
72
  )
73
+
74
+ # May have more than one suffix
73
75
  exec_metadata_regex = (
74
- exec_metadata_dir_regex + r"/(?P<filename>[-\w]+)[.](?P<file_ext>[a-z0-9]*)$"
76
+ exec_metadata_dir_regex
77
+ + r"/(?P<filename>[-\w]+([.][\w]+)*)[.](?P<file_ext>[a-z0-9]*)$"
75
78
  )
76
79
  feature_dir_regex = exec_dir_regex + r"/feature"
77
80
  feature_table_dir_regex = (
@@ -1,16 +1,18 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: deriva-ml
3
- Version: 1.8.2
3
+ Version: 1.8.4
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
6
  Requires-Python: >=3.10
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
- Requires-Dist: deriva~=1.7.6
9
+ Requires-Dist: deriva~=1.7.7
10
10
  Requires-Dist: pandas
11
11
  Requires-Dist: regex~=2024.7.24
12
12
  Requires-Dist: pydantic>=2.10.6
13
13
  Requires-Dist: semver>3.0.0
14
+ Requires-Dist: setuptools-git-versioning<3,>=2.0
15
+ Requires-Dist: nbstripout
14
16
 
15
17
  Deriva-ML is a python libary to simplify the process of creating and executing reproducible machine learning workflows
16
18
  using a deriva catalog.
@@ -1,20 +1,21 @@
1
- deriva_ml/VERSION.py,sha256=d6593s-XBNvVxri9lr2qLUDZQ3Zk3-VXHEwdb4pj8qA,22
1
+ deriva_ml/VERSION.py,sha256=8kdJa8mgK7VES73y02oBbzwoXZCUs42GzbJ4UU-L_3I,22
2
2
  deriva_ml/__init__.py,sha256=0PHNB8gRDALLtaffRmU7wCUgWbRHVQZcjuPJxMLNEco,856
3
3
  deriva_ml/database_model.py,sha256=uhoyVyd8MQmY8J9ovCH8fjxhZDxxXNkdJyYdeyEGPXA,13898
4
- deriva_ml/dataset.py,sha256=5STHbjWomTCPl8isdlcDgLk_K9DLCfACajBAreUAXTQ,58272
4
+ deriva_ml/dataset.py,sha256=xC6QPUp4MZcJiEnOEU3NnzoLBL9RcJWtPTyzIQP0Ivw,60666
5
5
  deriva_ml/dataset_aux_classes.py,sha256=YxjQnu2kS9kK_f8bGqhmgE6ty9GNeitCxfvReT9vaM0,6537
6
6
  deriva_ml/dataset_bag.py,sha256=e6IHv3saZUnZRfl0EjfnlV2NnmPeOagYYv3PuZqS1l0,11501
7
7
  deriva_ml/demo_catalog.py,sha256=xQPhFlflqwJskNQrQ-jdBSnGzBm2-aONBgcRxfsdNKM,11045
8
- deriva_ml/deriva_definitions.py,sha256=MGl29ogCzqrlRilMhSuR5tECo4NSHP4CLbJAXRtPH6E,8914
9
- deriva_ml/deriva_ml_base.py,sha256=ShDZlG9F4XrGRUcUINT3bb_P_UdvV1FqSnnPsjGTCLU,36443
8
+ deriva_ml/deriva_definitions.py,sha256=pZLPoUxiuJ-uGglmQ6sF9oVXsSUuOnPEqywoec78XNM,8893
9
+ deriva_ml/deriva_ml_base.py,sha256=3iA1OaPU-6Q7ixt87uDmPuHHZ5P-FyHvX0AKfi4tKp0,42224
10
+ deriva_ml/deriva_ml_execute.py,sha256=y_rGjc97eidBuzy-AaQGe93vuTbWbkNkK9rpReqV0IY,4433
10
11
  deriva_ml/deriva_model.py,sha256=LV3FjIhIlz13ckZSmu0aOJhT9EVE0-M9oVMudfkxb0g,12004
11
- deriva_ml/execution.py,sha256=UcXWY1W5Mt_Yzuayd3Pjd-lKzLlMV5QXZFcLvE6Lt0E,28390
12
- deriva_ml/execution_configuration.py,sha256=nMeaG1qYdIgu4BV5atSUlcL8VZ3O6ohGY5iBhtD_LQ4,3700
12
+ deriva_ml/execution.py,sha256=c7dbk4HvEh7E4BLlBrf_azUxxhRSUmLQa_6G8t8OKVY,29929
13
+ deriva_ml/execution_configuration.py,sha256=bjnZwXN6M7YPy5dFQwoGEBU8YjhQRSe1FW0rL0V9TaM,3422
13
14
  deriva_ml/execution_environment.py,sha256=bCRKrCELDbGQDo7_FKfw7e8iMzVjSRZK3baKkqH5-_0,3264
14
15
  deriva_ml/feature.py,sha256=7e8WYPCfJSrGxJh9oUTduYSnB5ekybRhXa_0HIigS_w,5459
15
16
  deriva_ml/history.py,sha256=qTDLDs8Ow_6r7mDO0gZm0Fg81SWKOAgtCU5pzZoDRgM,2828
16
17
  deriva_ml/test_functions.py,sha256=-eqLHjjCQCLBNAr1ofbZekNiCOfMISSACRxT_YHER8I,4396
17
- deriva_ml/upload.py,sha256=HCOChW6bALW_gt0sWUs_81bNPsb72TNs4o0FQsGSLM4,22222
18
+ deriva_ml/upload.py,sha256=CKtT-gBln3pnAll9TFaiPhFSHC-bzg9oE4ruh_OSOqY,22270
18
19
  deriva_ml/build/lib/schema_setup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
20
  deriva_ml/build/lib/schema_setup/alter_annotation.py,sha256=pkwk0WystN69JfAFK4iBJZAZVQKbRs-gN9IFYuS9rfg,1739
20
21
  deriva_ml/build/lib/schema_setup/annotation_temp.py,sha256=Euygu8wNklZFUbR6mz-pDWJemlzdsIn9d6j0f6fCfgE,9102
@@ -26,9 +27,9 @@ deriva_ml/schema_setup/annotations.py,sha256=Uogm9YkRtoKSdgfQlICqRywbCATppwBO-Xr
26
27
  deriva_ml/schema_setup/create_schema.py,sha256=jwziMWJPbjRgjiRBT-KtidnXI8YNEFO74A9fwfptjHY,10626
27
28
  deriva_ml/schema_setup/policy.json,sha256=77sf0Imy6CAQV0_VwwbA56_KROJ05WXsvT-Wjtkk538,1633
28
29
  deriva_ml/schema_setup/table_comments_utils.py,sha256=-2_ubEpoH7ViLVb-ZfW9wZbQ26DTKNgjkCABMzGu4i4,2140
29
- deriva_ml-1.8.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
30
- deriva_ml-1.8.2.dist-info/METADATA,sha256=DnSPqOt32ddlxTwuxGo9iL3DSbUKLIiMitRDMbxZcYQ,556
31
- deriva_ml-1.8.2.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
32
- deriva_ml-1.8.2.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
33
- deriva_ml-1.8.2.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
34
- deriva_ml-1.8.2.dist-info/RECORD,,
30
+ deriva_ml-1.8.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ deriva_ml-1.8.4.dist-info/METADATA,sha256=F14U7NvY310NBB4wGp3-OVmAUXvMy_sDNuS1ZmRjwek,631
32
+ deriva_ml-1.8.4.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
33
+ deriva_ml-1.8.4.dist-info/entry_points.txt,sha256=ZiOvrYj022x544TQwi018ujeHRRDahNmwJnzn5ThacM,242
34
+ deriva_ml-1.8.4.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
35
+ deriva_ml-1.8.4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.0.0)
2
+ Generator: setuptools (76.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5