deriva-ml 1.14.47__py3-none-any.whl → 1.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ """A module defining the DatasetLike protocol for dataset operations.
2
+
3
+ This module contains the definition of the DatasetLike protocol, which
4
+ provides an interface for datasets to implement specific functionality related
5
+ to listing dataset children. It is particularly useful for ensuring type
6
+ compatibility for objects that mimic datasets in their behavior.
7
+
8
+ Classes:
9
+ DatasetLike: A protocol that specifies methods required for dataset-like
10
+ objects.
11
+ """
12
+ from typing import Protocol, runtime_checkable
13
+
14
+ from deriva_ml.core.definitions import RID
15
+
16
+
17
+ @runtime_checkable
18
+ class DatasetLike(Protocol):
19
+ def list_dataset_children(self, dataset_rid: RID, recurse: bool = False) -> list[RID]: ...
deriva_ml/run_notebook.py CHANGED
@@ -3,14 +3,13 @@
3
3
  import json
4
4
  import os
5
5
  import tempfile
6
- from datetime import datetime
7
6
  from pathlib import Path
8
7
 
9
8
  import nbformat
10
9
  import papermill as pm
11
- import regex as re
12
10
  import yaml
13
11
  from deriva.core import BaseCLI
12
+ from jupyter_client.kernelspec import KernelSpecManager
14
13
  from nbconvert import MarkdownExporter
15
14
 
16
15
  from deriva_ml import DerivaML, ExecAssetType, Execution, ExecutionConfiguration, MLAsset, Workflow
@@ -44,13 +43,6 @@ class DerivaMLRunNotebookCLI(BaseCLI):
44
43
  help="Display logging output from notebook.",
45
44
  )
46
45
 
47
- self.parser.add_argument(
48
- "--catalog",
49
- metavar="<1>",
50
- default=1,
51
- help="Catalog number. Default 1",
52
- )
53
-
54
46
  self.parser.add_argument(
55
47
  "--parameter",
56
48
  "-p",
@@ -61,7 +53,13 @@ class DerivaMLRunNotebookCLI(BaseCLI):
61
53
  help="Provide a parameter name and value to inject into the notebook.",
62
54
  )
63
55
 
64
- self.parser.add_argument("--kernel", "-k", nargs=1, help="Name of kernel to run..", default=None)
56
+ self.parser.add_argument(
57
+ "--kernel",
58
+ "-k",
59
+ type=str,
60
+ help="Name of kernel to run..",
61
+ default=self._find_kernel_for_venv(),
62
+ )
65
63
 
66
64
  @staticmethod
67
65
  def _coerce_number(val: str):
@@ -100,26 +98,50 @@ class DerivaMLRunNotebookCLI(BaseCLI):
100
98
  print(f"Notebook file must be an ipynb file: {notebook_file.name}.")
101
99
  exit(1)
102
100
 
103
- os.environ["DERIVA_HOST"] = args.host
104
- os.environ["DERIVA_CATALOG"] = args.catalog
105
-
106
101
  # Create a workflow instance for this specific version of the script.
107
102
  # Return an existing workflow if one is found.
108
103
  notebook_parameters = pm.inspect_notebook(notebook_file)
104
+
109
105
  if args.inspect:
110
106
  for param, value in notebook_parameters.items():
111
107
  print(f"{param}:{value['inferred_type_name']} (default {value['default']})")
112
108
  return
113
109
  else:
114
- notebook_parameters = (
115
- {k: v["default"] for k, v in notebook_parameters.items()}
116
- | {"host": args.host, "hostname": args.host, "catalog_id": args.catalog, "catalog": args.catalog}
117
- | parameters
118
- )
119
- print(f"Running notebook {notebook_file.name} with parameters:")
120
- for param, value in notebook_parameters.items():
121
- print(f" {param}:{value}")
122
- self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel[0], log=args.log_output)
110
+ notebook_parameters = {k: v["default"] for k, v in notebook_parameters.items()} | parameters
111
+ self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
112
+
113
+ @staticmethod
114
+ def _find_kernel_for_venv() -> str | None:
115
+ """
116
+ Return the name and spec of an existing Jupyter kernel corresponding
117
+ to a given Python virtual environment path.
118
+
119
+ Parameters
120
+ ----------
121
+ venv_path : str
122
+ Absolute or relative path to the virtual environment.
123
+
124
+ Returns
125
+ -------
126
+ dict | None
127
+ The kernel spec (as a dict) if found, or None if not found.
128
+ """
129
+ venv = os.environ.get("VIRTUAL_ENV")
130
+ if not venv:
131
+ return None
132
+ venv_path = Path(venv).resolve()
133
+ ksm = KernelSpecManager()
134
+ for name, spec in ksm.get_all_specs().items():
135
+ kernel_json = spec.get("spec", {})
136
+ argv = kernel_json.get("argv", [])
137
+ # check for python executable path inside argv
138
+ for arg in argv:
139
+ try:
140
+ if Path(arg).resolve() == venv_path.joinpath("bin", "python").resolve():
141
+ return name
142
+ except Exception:
143
+ continue
144
+ return None
123
145
 
124
146
  def run_notebook(self, notebook_file: Path, parameters, kernel=None, log=False):
125
147
  url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
@@ -127,8 +149,9 @@ class DerivaMLRunNotebookCLI(BaseCLI):
127
149
  os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
128
150
  os.environ["DERIVA_ML_NOTEBOOK_PATH"] = notebook_file.as_posix()
129
151
  with tempfile.TemporaryDirectory() as tmpdirname:
130
- print(f"Running notebook {notebook_file.name} with parameters:")
131
152
  notebook_output = Path(tmpdirname) / Path(notebook_file).name
153
+ execution_rid_path = Path(tmpdirname) / "execution_rid.json"
154
+ os.environ["DERIVA_ML_SAVE_EXECUTION_RID"] = execution_rid_path.as_posix()
132
155
  pm.execute_notebook(
133
156
  input_path=notebook_file,
134
157
  output_path=notebook_output,
@@ -137,22 +160,19 @@ class DerivaMLRunNotebookCLI(BaseCLI):
137
160
  log_output=log,
138
161
  )
139
162
  print(f"Notebook output saved to {notebook_output}")
140
- catalog_id = execution_rid = None
141
- with Path(notebook_output).open("r") as f:
142
- for line in f:
143
- if m := re.search(
144
- r"Execution RID: https://(?P<host>.*)/id/(?P<catalog_id>.*)/(?P<execution_rid>[\w-]+)",
145
- line,
146
- ):
147
- hostname = m["host"]
148
- catalog_id = m["catalog_id"]
149
- execution_rid = m["execution_rid"]
150
- if not execution_rid:
163
+ with execution_rid_path.open("r") as f:
164
+ execution_config = json.load(f)
165
+
166
+ if not execution_config:
151
167
  print("Execution RID not found.")
152
168
  exit(1)
153
169
 
170
+ execution_rid = execution_config["execution_rid"]
171
+ hostname = execution_config["hostname"]
172
+ catalog_id = execution_config["catalog_id"]
173
+ workflow_rid = execution_config["workflow_rid"]
154
174
  ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id, working_dir=tmpdirname)
155
- workflow_rid = ml_instance.retrieve_rid(execution_rid)["Workflow"]
175
+ workflow_rid = ml_instance.retrieve_rid(execution_config["execution_rid"])["Workflow"]
156
176
 
157
177
  execution = Execution(
158
178
  configuration=ExecutionConfiguration(workflow=workflow_rid),
@@ -183,21 +203,6 @@ class DerivaMLRunNotebookCLI(BaseCLI):
183
203
  file_name=notebook_output_md,
184
204
  asset_types=ExecAssetType.notebook_output,
185
205
  )
186
- execution.asset_file_path(
187
- asset_name=MLAsset.execution_asset,
188
- file_name=notebook_output_md,
189
- asset_types=ExecAssetType.notebook_output,
190
- )
191
- print("parameter....")
192
-
193
- parameter_file = execution.asset_file_path(
194
- asset_name=MLAsset.execution_asset,
195
- file_name=f"notebook-parameters-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json",
196
- asset_types=ExecAssetType.input_file.value,
197
- )
198
-
199
- with Path(parameter_file).open("w") as f:
200
- json.dump(parameters, f)
201
206
  execution.upload_execution_outputs()
202
207
 
203
208
  print(ml_instance.cite(execution_rid))
@@ -1,4 +1,5 @@
1
1
  import argparse
2
+ import sys
2
3
 
3
4
  from deriva.core.ermrest_model import Model, Table
4
5
  from deriva.core.utils.core_utils import tag as deriva_tags
@@ -183,10 +184,10 @@ def catalog_annotation(model: DerivaModel) -> None:
183
184
 
184
185
  def asset_annotation(asset_table: Table):
185
186
  """Generate annotations for an asset table.
186
-
187
+
187
188
  Args:
188
189
  asset_table: The Table object representing the asset table.
189
-
190
+
190
191
  Returns:
191
192
  A dictionary containing the annotations for the asset table.
192
193
  """
@@ -316,7 +317,8 @@ def generate_annotation(model: Model, schema: str) -> dict:
316
317
  },
317
318
  {
318
319
  "source": [
319
- {"inbound": [schema, "Execution_Metadata_Execution_fkey"]},
320
+ {"inbound": [schema, "Execution_Metadata_Execution_Execution_fkey"]},
321
+ {"outbound": [schema, "Execution_Metadata_Execution_Execution_Metadata_fkey"]},
320
322
  "RID",
321
323
  ],
322
324
  "markdown_name": "Execution Metadata",
@@ -453,9 +455,9 @@ def generate_annotation(model: Model, schema: str) -> dict:
453
455
 
454
456
  def main():
455
457
  """Main entry point for the annotations CLI.
456
-
458
+
457
459
  Applies annotations to the ML schema based on command line arguments.
458
-
460
+
459
461
  Returns:
460
462
  None. Executes the CLI.
461
463
  """
deriva_ml/test.py ADDED
@@ -0,0 +1,94 @@
1
+ from typing import Any, Type
2
+ from deriva_ml import RID
3
+ from sqlalchemy import UniqueConstraint, inspect
4
+ from collections import defaultdict
5
+ from graphlib import CycleError, TopologicalSorter
6
+
7
+ def _prepare_wide_table(self, dataset, dataset_rid: RID, include_tables: list[str]) -> tuple:
8
+ """
9
+ Generates details of a wide table from the model
10
+
11
+ Args:
12
+ include_tables (list[str] | None): List of table names to include in the denormalized dataset. If None,
13
+ all tables from the dataset will be included.
14
+
15
+ Returns:
16
+ str: SQL query string that represents the process of denormalization.
17
+ """
18
+
19
+ # Skip over tables that we don't want to include in the denormalized dataset.
20
+ # Also, strip off the Dataset/Dataset_X part of the path so we don't include dataset columns in the denormalized
21
+ # table.
22
+ include_tables = set(include_tables)
23
+ for t in include_tables:
24
+ # Check to make sure the table is in the catalog.
25
+ _ = self.name_to_table(t)
26
+
27
+ table_paths = [
28
+ path
29
+ for path in self._schema_to_paths()
30
+ if path[-1].name in include_tables and include_tables.intersection({p.name for p in path})
31
+ ]
32
+ paths_by_element = defaultdict(list)
33
+ for p in table_paths:
34
+ paths_by_element[p[2].name].append(p)
35
+
36
+ # Get the names of all of the tables that can be dataset elements.
37
+ dataset_element_tables = {e.name for e in self.list_dataset_element_types() if e.schema.name == self.domain_schema}
38
+
39
+ skip_columns = {"RCT", "RMT", "RCB", "RMB"}
40
+ join_conditions = {}
41
+ join_tables = {}
42
+ for element_table, paths in paths_by_element.items():
43
+ graph = {}
44
+ for path in paths:
45
+ for left, right in zip(path[0:], path[1:]):
46
+ graph.setdefault(left.name, set()).add(right.name)
47
+
48
+ # New lets remove any cycles that we may have in the graph.
49
+ # We will use a topological sort to find the order in which we need to join the tables.
50
+ # If we find a cycle, we will remove the table from the graph and splice in an additional ON clause.
51
+ # We will then repeat the process until there are no cycles.
52
+ graph_has_cycles = True
53
+ element_join_tables = []
54
+ element_join_conditions = {}
55
+ while graph_has_cycles:
56
+ try:
57
+ ts = TopologicalSorter(graph)
58
+ element_join_tables = list(reversed(list(ts.static_order())))
59
+ graph_has_cycles = False
60
+ except CycleError as e:
61
+ cycle_nodes = e.args[1]
62
+ if len(cycle_nodes) > 3:
63
+ raise DerivaMLException(f"Unexpected cycle found when normalizing dataset {cycle_nodes}")
64
+ # Remove cycle from graph and splice in additional ON constraint.
65
+ graph[cycle_nodes[1]].remove(cycle_nodes[0])
66
+
67
+ # The Dataset_Version table is a special case as it points to dataset and dataset to version.
68
+ if "Dataset_Version" in join_tables:
69
+ element_join_tables.remove("Dataset_Version")
70
+
71
+ for path in paths:
72
+ for left, right in zip(path[0:], path[1:]):
73
+ if right.name == "Dataset_Version":
74
+ # The Dataset_Version table is a special case as it points to dataset and dataset to version.
75
+ continue
76
+ if element_join_tables.index(right.name) < element_join_tables.index(left.name):
77
+ continue
78
+ table_relationship = self._table_relationship(left, right)
79
+ element_join_conditions.setdefault(right.name, set()).add((table_relationship[0], table_relationship[1]))
80
+ join_tables[element_table] = element_join_tables
81
+ join_conditions[element_table] = element_join_conditions
82
+ # Get the list of columns that will appear in the final denormalized dataset.
83
+ denormalized_columns = [
84
+ (table_name, c.name)
85
+ for table_name in join_tables
86
+ if not self.is_association(table_name) # Don't include association columns in the denormalized view.'
87
+ for c in self.name_to_table(table_name).columns
88
+ if (not include_tables or table_name in include_tables) and (c.name not in skip_columns)
89
+ ]
90
+
91
+ # List of dataset ids to include in the denormalized view.
92
+ dataset_rids = [dataset_rid] + dataset.list_dataset_children(recurse=True)
93
+ return join_tables, join_conditions, denormalized_columns, dataset_rids, dataset_element_tables
94
+
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deriva-ml
3
- Version: 1.14.47
3
+ Version: 1.17.0
4
4
  Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
5
5
  Author-email: ISRD <isrd-dev@isi.edu>
6
- Requires-Python: >=3.10
6
+ Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
8
  License-File: LICENSE
9
9
  Requires-Dist: bump-my-version
@@ -12,15 +12,18 @@ Requires-Dist: deriva~=1.7.10
12
12
  Requires-Dist: deepdiff
13
13
  Requires-Dist: nbconvert
14
14
  Requires-Dist: pandas
15
- Requires-Dist: regex~=2024.7.24
15
+ Requires-Dist: pip-system-certs
16
16
  Requires-Dist: pydantic>=2.11
17
- Requires-Dist: semver>3.0.0
18
- Requires-Dist: setuptools>=64
19
- Requires-Dist: setuptools-scm>=8.0
20
- Requires-Dist: nbstripout
21
17
  Requires-Dist: papermill
22
18
  Requires-Dist: pandas-stubs==2.2.3.250527
23
19
  Requires-Dist: pyyaml
20
+ Requires-Dist: regex~=2024.7.24
21
+ Requires-Dist: semver>3.0.0
22
+ Requires-Dist: setuptools>=80
23
+ Requires-Dist: setuptools-scm>=8.0
24
+ Requires-Dist: nbstripout
25
+ Requires-Dist: hydra_zen
26
+ Requires-Dist: SQLAlchemy
24
27
  Dynamic: license-file
25
28
 
26
29
  # DerivaML
@@ -0,0 +1,45 @@
1
+ deriva_ml/.DS_Store,sha256=gb-f5IXVed_gS5Be1Z6WxCYjrI_r5SdblvfFpIOY4ro,8196
2
+ deriva_ml/__init__.py,sha256=YCG7P4PUtO_b-aIIYb4KhKHcfnb8Wz_YeAL-c0HiQlA,1775
3
+ deriva_ml/bump_version.py,sha256=eN2G5G_OeiuFxhOdjjwfxD8Rmv6dFvzIm0y_1x4Mif4,4020
4
+ deriva_ml/demo_catalog.py,sha256=FfXPlDfzy29K9g2Fr_KmYyRhmxP2eSaqm8_Xcji8fUM,15352
5
+ deriva_ml/feature.py,sha256=6-aphkxdKjWa9oPSGFWxHcwAc_8hmWj-7I4M178YG5Y,8470
6
+ deriva_ml/install_kernel.py,sha256=b62XY0SLViYO_Zye5r1Pl9qhYZyu_fk4KAO8NS1pxgM,2165
7
+ deriva_ml/run_notebook.py,sha256=_pds1q3WcfWqhCBqKeznbwSv5n7OND8FkL6JQ2Jkfmc,8093
8
+ deriva_ml/test.py,sha256=BqmQXR9IyQP9h8pWttk0dzyJod2CwcfYbSUZS-Q5r4k,4460
9
+ deriva_ml/core/__init__.py,sha256=Ko8GsWc7K_eDFW0-GaNS6gOWYP8cWHWir-ChSQaHntE,856
10
+ deriva_ml/core/base.py,sha256=KzZW310J0YmvCUhuCWxd42LNCM_JSzR__ObtT7zgcsU,62525
11
+ deriva_ml/core/config.py,sha256=dF4rOLFmbk1DEkQimqbiH4pC519nRZWpwKItARNMiZ4,2244
12
+ deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
13
+ deriva_ml/core/definitions.py,sha256=uq_8uYFBVBVHS691Ri2kdQsN37z0GNYTaZskJIb_ocM,1385
14
+ deriva_ml/core/enums.py,sha256=sSN4B4OynbB-AXwxRszoFr-KWIWIAfhVa06EzAEHwVc,7194
15
+ deriva_ml/core/ermrest.py,sha256=N0IJ3TE87jElaBChEIo5AFDTr0SIrb6F90yiimRfPr4,10182
16
+ deriva_ml/core/exceptions.py,sha256=4MZNPOyN-UMaGeY9sqJDVwh_iOmz1ntp4usSyCNqVMg,934
17
+ deriva_ml/core/filespec.py,sha256=BQAAcRXfXq1lDcsKlokLOOXCBtEZpPgXxrFOIZYAgLg,4229
18
+ deriva_ml/dataset/__init__.py,sha256=wTCQaWGfRYEiUoOOxerKSpkbl1T5YFhoCyemlxGTk8k,283
19
+ deriva_ml/dataset/aux_classes.py,sha256=ojqe7gyK4KQVz_xfIillXS_HJ1PMsyr47pb2tFOXO_c,7855
20
+ deriva_ml/dataset/dataset.py,sha256=d860WuCL0-Pz6TyRpGVzhpPWDMco01-I5LT4dZjYxsQ,64728
21
+ deriva_ml/dataset/dataset_bag.py,sha256=ori3BuYVqfeHkVCjNSKuZh7oMdC6uufsszicpTPODiw,19944
22
+ deriva_ml/dataset/history.py,sha256=FK5AYYz11p4E4FWMVg4r7UPWOD4eobrq3b3xMjWF59g,3197
23
+ deriva_ml/dataset/upload.py,sha256=n1aXSbOx1hghCDxuF8yf03jZmOLMueXL-rSnQMrfHq0,16535
24
+ deriva_ml/execution/__init__.py,sha256=5kKpPwQbxhmRn7Npz7DpavuCxYwCQaDdl4-6z62hbds,705
25
+ deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
26
+ deriva_ml/execution/execution.py,sha256=hNBfYnqXK4PmNS2wxbJ5oYzjDszjaiGHo8d3uxmIgPk,46210
27
+ deriva_ml/execution/execution_configuration.py,sha256=RT0x9n0uyJgEsrLCUTu16nAUJN7X-XLDvfezln0PTDQ,5775
28
+ deriva_ml/execution/workflow.py,sha256=rTlspICp2Q6prUwPCeukjhO64rbcJivcFs4zH60B16U,13906
29
+ deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
+ deriva_ml/model/catalog.py,sha256=O6_Ll4Uxg6DyxoBXT9P9CPTt9jx1guVTeX1L3KW1A5c,19645
31
+ deriva_ml/model/database.py,sha256=BG5FSisl9tWTBnf5k9dNnijOIDyCUDeRhN_inkmIqTw,31132
32
+ deriva_ml/protocols/dataset.py,sha256=1TyaT--89Elcs-nCvVyJxUj4cDaLztZOuSOzzj1cBMk,699
33
+ deriva_ml/schema/__init__.py,sha256=yV-MfzCF3FA4OOz7mZwMM2q6-x1vgOJ057kUvikFF6E,130
34
+ deriva_ml/schema/annotations.py,sha256=CMcRqYUlyW8iLCYp6sYJsncaRNtp4kFKoxcg-i-t-50,18302
35
+ deriva_ml/schema/check_schema.py,sha256=6dadLYHPqRex6AYVClmsESI8WhC7-rb-XnGf2G298xw,3609
36
+ deriva_ml/schema/create_schema.py,sha256=9qK9_8SRQT-DwcEwTGSkhi3j2NaoH5EVgthvV2kO-gg,13042
37
+ deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
38
+ deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
39
+ deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
40
+ deriva_ml-1.17.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
41
+ deriva_ml-1.17.0.dist-info/METADATA,sha256=gvz8ApFj8xylH1r4Nr-X_QiHChj6wRNJE7pLzI2sB8E,1272
42
+ deriva_ml-1.17.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ deriva_ml-1.17.0.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
44
+ deriva_ml-1.17.0.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
45
+ deriva_ml-1.17.0.dist-info/RECORD,,
@@ -1,44 +0,0 @@
1
- from datetime import datetime, timezone
2
- from typing import TYPE_CHECKING, Any, Sequence
3
-
4
- if TYPE_CHECKING:
5
- from deriva_ml.model.database import DatabaseModel
6
-
7
- try:
8
- from icecream import ic
9
- except ImportError: # Graceful fallback if IceCream isn't installed.
10
- ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
11
-
12
-
13
- class SQLMapper:
14
- def __init__(self, database: "DatabaseModel", table: str) -> None:
15
- table_name = database.normalize_table_name(table)
16
- schema, table = table_name.split(":")
17
-
18
- with database.dbase as dbase:
19
- self.col_names = [c[1] for c in dbase.execute(f'PRAGMA table_info("{table_name}")').fetchall()]
20
-
21
- self.boolean_columns = [
22
- self.col_names.index(c.name)
23
- for c in database.model.schemas[schema].tables[table].columns
24
- if c.type.typename == "boolean"
25
- ]
26
- self.time_columns = [
27
- self.col_names.index(c.name)
28
- for c in database.model.schemas[schema].tables[table].columns
29
- if c.type.typename in ["ermrest_rct", "ermrest_rmt"]
30
- ]
31
-
32
- def _map_value(self, idx: int, v: Any) -> Any:
33
- """
34
- Return a new value based on `data` where, for each index in `idxs`,
35
- """
36
- tf_map = {"t": True, "f": False}
37
- if idx in self.boolean_columns:
38
- return tf_map.get(v, v)
39
- if idx in self.time_columns:
40
- return datetime.strptime(v, "%Y-%m-%d %H:%M:%S.%f+00").replace(tzinfo=timezone.utc).isoformat()
41
- return v
42
-
43
- def transform_tuple(self, data: Sequence[Any]) -> Any:
44
- return dict(zip(self.col_names, tuple(self._map_value(i, v) for i, v in enumerate(data))))
@@ -1,42 +0,0 @@
1
- deriva_ml/__init__.py,sha256=_aMdxGG4mRTcXodLZLNpXqH8v5uqMbqFUryE9KqNSB8,1158
2
- deriva_ml/bump_version.py,sha256=eN2G5G_OeiuFxhOdjjwfxD8Rmv6dFvzIm0y_1x4Mif4,4020
3
- deriva_ml/demo_catalog.py,sha256=JjPAIac_hKPh5krEhGJydjXquRnivi7kQoR8W4Khp-s,14928
4
- deriva_ml/feature.py,sha256=6-aphkxdKjWa9oPSGFWxHcwAc_8hmWj-7I4M178YG5Y,8470
5
- deriva_ml/install_kernel.py,sha256=b62XY0SLViYO_Zye5r1Pl9qhYZyu_fk4KAO8NS1pxgM,2165
6
- deriva_ml/run_notebook.py,sha256=QRO_CK9Q9qt_n-c0rxGdIRyTHjGOuZxt-wj0WQTnaAM,8171
7
- deriva_ml/core/__init__.py,sha256=V_i90pc5PB1F4UdOO6DZWzpEFaZDTaPRU-EzKXQ19eI,787
8
- deriva_ml/core/base.py,sha256=LI_ZLpVJwWx4DW2Wo7luALQauQ3xhBxFYHSKDAfNsag,61649
9
- deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
10
- deriva_ml/core/definitions.py,sha256=uq_8uYFBVBVHS691Ri2kdQsN37z0GNYTaZskJIb_ocM,1385
11
- deriva_ml/core/enums.py,sha256=sSN4B4OynbB-AXwxRszoFr-KWIWIAfhVa06EzAEHwVc,7194
12
- deriva_ml/core/ermrest.py,sha256=N0IJ3TE87jElaBChEIo5AFDTr0SIrb6F90yiimRfPr4,10182
13
- deriva_ml/core/exceptions.py,sha256=4MZNPOyN-UMaGeY9sqJDVwh_iOmz1ntp4usSyCNqVMg,934
14
- deriva_ml/core/filespec.py,sha256=BQAAcRXfXq1lDcsKlokLOOXCBtEZpPgXxrFOIZYAgLg,4229
15
- deriva_ml/dataset/__init__.py,sha256=ukl2laJqa9J2AVqb4zlpIYc-3RaAlfRR33NMIQaoNrQ,104
16
- deriva_ml/dataset/aux_classes.py,sha256=9mZAln7_rrzaRbKhKA6dJOp3xeD6dHOC9NXOtJKROo4,6933
17
- deriva_ml/dataset/dataset.py,sha256=B9QBFgcW1fCEseBV3FcgckPSrJyixEqeoG80mp__CfI,64472
18
- deriva_ml/dataset/dataset_bag.py,sha256=mPIZRX5aTbVRcJbCFtdkmlnexquF8NE-onbVK_8IxVk,14224
19
- deriva_ml/dataset/history.py,sha256=FK5AYYz11p4E4FWMVg4r7UPWOD4eobrq3b3xMjWF59g,3197
20
- deriva_ml/dataset/upload.py,sha256=i_7KLfRSd2-THqZ1aG2OFAFGoyb8dJBCZZ5t1ftrtMQ,16429
21
- deriva_ml/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
23
- deriva_ml/execution/execution.py,sha256=NJjjrxGsedv0zoe-T-LxfO_5UG83KOHaxU3SY5EJ0QQ,44928
24
- deriva_ml/execution/execution_configuration.py,sha256=Rw4VWkBCZN9yatvSKdTqEWTfu470lpcVKfHFR0uN0jI,6248
25
- deriva_ml/execution/workflow.py,sha256=7CwPrgs3FKQHiEVus0PpK9w5hVKLKZnCrlu_nT8GFe8,13604
26
- deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
- deriva_ml/model/catalog.py,sha256=dzTBcRlqgEVkPY32AUax_iu75RgFiT4Pu5au7rmrv8k,14068
28
- deriva_ml/model/database.py,sha256=SBkYFf0qwbGmvL0Xtn_n5DCz4roGfrhuYrM8G69Cy9Y,14837
29
- deriva_ml/model/sql_mapper.py,sha256=_0QsJEVSgSPtxrWKSgjfPZCQ1aMVcjR_Tk2OxLhWEvY,1696
30
- deriva_ml/schema/__init__.py,sha256=yV-MfzCF3FA4OOz7mZwMM2q6-x1vgOJ057kUvikFF6E,130
31
- deriva_ml/schema/annotations.py,sha256=TuQ3vWFnK0160fRmtvsCkHx9qAcRa63MSyERB4x5a98,18197
32
- deriva_ml/schema/check_schema.py,sha256=6dadLYHPqRex6AYVClmsESI8WhC7-rb-XnGf2G298xw,3609
33
- deriva_ml/schema/create_schema.py,sha256=9qK9_8SRQT-DwcEwTGSkhi3j2NaoH5EVgthvV2kO-gg,13042
34
- deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
35
- deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
36
- deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
37
- deriva_ml-1.14.47.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
38
- deriva_ml-1.14.47.dist-info/METADATA,sha256=7kRaVpheUZqLTe82Q9KIAovS2gkiBE7KItSk67nQU9U,1190
39
- deriva_ml-1.14.47.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
- deriva_ml-1.14.47.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
41
- deriva_ml-1.14.47.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
42
- deriva_ml-1.14.47.dist-info/RECORD,,