deriva-ml 1.14.47__py3-none-any.whl → 1.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deriva_ml/.DS_Store +0 -0
- deriva_ml/__init__.py +59 -30
- deriva_ml/core/__init__.py +2 -2
- deriva_ml/core/base.py +28 -16
- deriva_ml/core/config.py +67 -0
- deriva_ml/dataset/__init__.py +10 -2
- deriva_ml/dataset/aux_classes.py +31 -2
- deriva_ml/dataset/dataset.py +7 -5
- deriva_ml/dataset/dataset_bag.py +214 -106
- deriva_ml/dataset/upload.py +7 -4
- deriva_ml/demo_catalog.py +17 -3
- deriva_ml/execution/__init__.py +26 -0
- deriva_ml/execution/execution.py +50 -28
- deriva_ml/execution/execution_configuration.py +26 -31
- deriva_ml/execution/workflow.py +8 -0
- deriva_ml/model/catalog.py +119 -2
- deriva_ml/model/database.py +457 -83
- deriva_ml/protocols/dataset.py +19 -0
- deriva_ml/run_notebook.py +55 -50
- deriva_ml/schema/annotations.py +7 -5
- deriva_ml/test.py +94 -0
- {deriva_ml-1.14.47.dist-info → deriva_ml-1.17.0.dist-info}/METADATA +10 -7
- deriva_ml-1.17.0.dist-info/RECORD +45 -0
- deriva_ml/model/sql_mapper.py +0 -44
- deriva_ml-1.14.47.dist-info/RECORD +0 -42
- {deriva_ml-1.14.47.dist-info → deriva_ml-1.17.0.dist-info}/WHEEL +0 -0
- {deriva_ml-1.14.47.dist-info → deriva_ml-1.17.0.dist-info}/entry_points.txt +0 -0
- {deriva_ml-1.14.47.dist-info → deriva_ml-1.17.0.dist-info}/licenses/LICENSE +0 -0
- {deriva_ml-1.14.47.dist-info → deriva_ml-1.17.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""A module defining the DatasetLike protocol for dataset operations.
|
|
2
|
+
|
|
3
|
+
This module contains the definition of the DatasetLike protocol, which
|
|
4
|
+
provides an interface for datasets to implement specific functionality related
|
|
5
|
+
to listing dataset children. It is particularly useful for ensuring type
|
|
6
|
+
compatibility for objects that mimic datasets in their behavior.
|
|
7
|
+
|
|
8
|
+
Classes:
|
|
9
|
+
DatasetLike: A protocol that specifies methods required for dataset-like
|
|
10
|
+
objects.
|
|
11
|
+
"""
|
|
12
|
+
from typing import Protocol, runtime_checkable
|
|
13
|
+
|
|
14
|
+
from deriva_ml.core.definitions import RID
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@runtime_checkable
|
|
18
|
+
class DatasetLike(Protocol):
|
|
19
|
+
def list_dataset_children(self, dataset_rid: RID, recurse: bool = False) -> list[RID]: ...
|
deriva_ml/run_notebook.py
CHANGED
|
@@ -3,14 +3,13 @@
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
import tempfile
|
|
6
|
-
from datetime import datetime
|
|
7
6
|
from pathlib import Path
|
|
8
7
|
|
|
9
8
|
import nbformat
|
|
10
9
|
import papermill as pm
|
|
11
|
-
import regex as re
|
|
12
10
|
import yaml
|
|
13
11
|
from deriva.core import BaseCLI
|
|
12
|
+
from jupyter_client.kernelspec import KernelSpecManager
|
|
14
13
|
from nbconvert import MarkdownExporter
|
|
15
14
|
|
|
16
15
|
from deriva_ml import DerivaML, ExecAssetType, Execution, ExecutionConfiguration, MLAsset, Workflow
|
|
@@ -44,13 +43,6 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
44
43
|
help="Display logging output from notebook.",
|
|
45
44
|
)
|
|
46
45
|
|
|
47
|
-
self.parser.add_argument(
|
|
48
|
-
"--catalog",
|
|
49
|
-
metavar="<1>",
|
|
50
|
-
default=1,
|
|
51
|
-
help="Catalog number. Default 1",
|
|
52
|
-
)
|
|
53
|
-
|
|
54
46
|
self.parser.add_argument(
|
|
55
47
|
"--parameter",
|
|
56
48
|
"-p",
|
|
@@ -61,7 +53,13 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
61
53
|
help="Provide a parameter name and value to inject into the notebook.",
|
|
62
54
|
)
|
|
63
55
|
|
|
64
|
-
self.parser.add_argument(
|
|
56
|
+
self.parser.add_argument(
|
|
57
|
+
"--kernel",
|
|
58
|
+
"-k",
|
|
59
|
+
type=str,
|
|
60
|
+
help="Name of kernel to run..",
|
|
61
|
+
default=self._find_kernel_for_venv(),
|
|
62
|
+
)
|
|
65
63
|
|
|
66
64
|
@staticmethod
|
|
67
65
|
def _coerce_number(val: str):
|
|
@@ -100,26 +98,50 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
100
98
|
print(f"Notebook file must be an ipynb file: {notebook_file.name}.")
|
|
101
99
|
exit(1)
|
|
102
100
|
|
|
103
|
-
os.environ["DERIVA_HOST"] = args.host
|
|
104
|
-
os.environ["DERIVA_CATALOG"] = args.catalog
|
|
105
|
-
|
|
106
101
|
# Create a workflow instance for this specific version of the script.
|
|
107
102
|
# Return an existing workflow if one is found.
|
|
108
103
|
notebook_parameters = pm.inspect_notebook(notebook_file)
|
|
104
|
+
|
|
109
105
|
if args.inspect:
|
|
110
106
|
for param, value in notebook_parameters.items():
|
|
111
107
|
print(f"{param}:{value['inferred_type_name']} (default {value['default']})")
|
|
112
108
|
return
|
|
113
109
|
else:
|
|
114
|
-
notebook_parameters = (
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
110
|
+
notebook_parameters = {k: v["default"] for k, v in notebook_parameters.items()} | parameters
|
|
111
|
+
self.run_notebook(notebook_file.resolve(), parameters, kernel=args.kernel, log=args.log_output)
|
|
112
|
+
|
|
113
|
+
@staticmethod
|
|
114
|
+
def _find_kernel_for_venv() -> str | None:
|
|
115
|
+
"""
|
|
116
|
+
Return the name and spec of an existing Jupyter kernel corresponding
|
|
117
|
+
to a given Python virtual environment path.
|
|
118
|
+
|
|
119
|
+
Parameters
|
|
120
|
+
----------
|
|
121
|
+
venv_path : str
|
|
122
|
+
Absolute or relative path to the virtual environment.
|
|
123
|
+
|
|
124
|
+
Returns
|
|
125
|
+
-------
|
|
126
|
+
dict | None
|
|
127
|
+
The kernel spec (as a dict) if found, or None if not found.
|
|
128
|
+
"""
|
|
129
|
+
venv = os.environ.get("VIRTUAL_ENV")
|
|
130
|
+
if not venv:
|
|
131
|
+
return None
|
|
132
|
+
venv_path = Path(venv).resolve()
|
|
133
|
+
ksm = KernelSpecManager()
|
|
134
|
+
for name, spec in ksm.get_all_specs().items():
|
|
135
|
+
kernel_json = spec.get("spec", {})
|
|
136
|
+
argv = kernel_json.get("argv", [])
|
|
137
|
+
# check for python executable path inside argv
|
|
138
|
+
for arg in argv:
|
|
139
|
+
try:
|
|
140
|
+
if Path(arg).resolve() == venv_path.joinpath("bin", "python").resolve():
|
|
141
|
+
return name
|
|
142
|
+
except Exception:
|
|
143
|
+
continue
|
|
144
|
+
return None
|
|
123
145
|
|
|
124
146
|
def run_notebook(self, notebook_file: Path, parameters, kernel=None, log=False):
|
|
125
147
|
url, checksum = Workflow.get_url_and_checksum(Path(notebook_file))
|
|
@@ -127,8 +149,9 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
127
149
|
os.environ["DERIVA_ML_WORKFLOW_CHECKSUM"] = checksum
|
|
128
150
|
os.environ["DERIVA_ML_NOTEBOOK_PATH"] = notebook_file.as_posix()
|
|
129
151
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
130
|
-
print(f"Running notebook {notebook_file.name} with parameters:")
|
|
131
152
|
notebook_output = Path(tmpdirname) / Path(notebook_file).name
|
|
153
|
+
execution_rid_path = Path(tmpdirname) / "execution_rid.json"
|
|
154
|
+
os.environ["DERIVA_ML_SAVE_EXECUTION_RID"] = execution_rid_path.as_posix()
|
|
132
155
|
pm.execute_notebook(
|
|
133
156
|
input_path=notebook_file,
|
|
134
157
|
output_path=notebook_output,
|
|
@@ -137,22 +160,19 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
137
160
|
log_output=log,
|
|
138
161
|
)
|
|
139
162
|
print(f"Notebook output saved to {notebook_output}")
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
r"Execution RID: https://(?P<host>.*)/id/(?P<catalog_id>.*)/(?P<execution_rid>[\w-]+)",
|
|
145
|
-
line,
|
|
146
|
-
):
|
|
147
|
-
hostname = m["host"]
|
|
148
|
-
catalog_id = m["catalog_id"]
|
|
149
|
-
execution_rid = m["execution_rid"]
|
|
150
|
-
if not execution_rid:
|
|
163
|
+
with execution_rid_path.open("r") as f:
|
|
164
|
+
execution_config = json.load(f)
|
|
165
|
+
|
|
166
|
+
if not execution_config:
|
|
151
167
|
print("Execution RID not found.")
|
|
152
168
|
exit(1)
|
|
153
169
|
|
|
170
|
+
execution_rid = execution_config["execution_rid"]
|
|
171
|
+
hostname = execution_config["hostname"]
|
|
172
|
+
catalog_id = execution_config["catalog_id"]
|
|
173
|
+
workflow_rid = execution_config["workflow_rid"]
|
|
154
174
|
ml_instance = DerivaML(hostname=hostname, catalog_id=catalog_id, working_dir=tmpdirname)
|
|
155
|
-
workflow_rid = ml_instance.retrieve_rid(execution_rid)["Workflow"]
|
|
175
|
+
workflow_rid = ml_instance.retrieve_rid(execution_config["execution_rid"])["Workflow"]
|
|
156
176
|
|
|
157
177
|
execution = Execution(
|
|
158
178
|
configuration=ExecutionConfiguration(workflow=workflow_rid),
|
|
@@ -183,21 +203,6 @@ class DerivaMLRunNotebookCLI(BaseCLI):
|
|
|
183
203
|
file_name=notebook_output_md,
|
|
184
204
|
asset_types=ExecAssetType.notebook_output,
|
|
185
205
|
)
|
|
186
|
-
execution.asset_file_path(
|
|
187
|
-
asset_name=MLAsset.execution_asset,
|
|
188
|
-
file_name=notebook_output_md,
|
|
189
|
-
asset_types=ExecAssetType.notebook_output,
|
|
190
|
-
)
|
|
191
|
-
print("parameter....")
|
|
192
|
-
|
|
193
|
-
parameter_file = execution.asset_file_path(
|
|
194
|
-
asset_name=MLAsset.execution_asset,
|
|
195
|
-
file_name=f"notebook-parameters-{datetime.now().strftime('%Y%m%d-%H%M%S')}.json",
|
|
196
|
-
asset_types=ExecAssetType.input_file.value,
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
with Path(parameter_file).open("w") as f:
|
|
200
|
-
json.dump(parameters, f)
|
|
201
206
|
execution.upload_execution_outputs()
|
|
202
207
|
|
|
203
208
|
print(ml_instance.cite(execution_rid))
|
deriva_ml/schema/annotations.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import argparse
|
|
2
|
+
import sys
|
|
2
3
|
|
|
3
4
|
from deriva.core.ermrest_model import Model, Table
|
|
4
5
|
from deriva.core.utils.core_utils import tag as deriva_tags
|
|
@@ -183,10 +184,10 @@ def catalog_annotation(model: DerivaModel) -> None:
|
|
|
183
184
|
|
|
184
185
|
def asset_annotation(asset_table: Table):
|
|
185
186
|
"""Generate annotations for an asset table.
|
|
186
|
-
|
|
187
|
+
|
|
187
188
|
Args:
|
|
188
189
|
asset_table: The Table object representing the asset table.
|
|
189
|
-
|
|
190
|
+
|
|
190
191
|
Returns:
|
|
191
192
|
A dictionary containing the annotations for the asset table.
|
|
192
193
|
"""
|
|
@@ -316,7 +317,8 @@ def generate_annotation(model: Model, schema: str) -> dict:
|
|
|
316
317
|
},
|
|
317
318
|
{
|
|
318
319
|
"source": [
|
|
319
|
-
{"inbound": [schema, "
|
|
320
|
+
{"inbound": [schema, "Execution_Metadata_Execution_Execution_fkey"]},
|
|
321
|
+
{"outbound": [schema, "Execution_Metadata_Execution_Execution_Metadata_fkey"]},
|
|
320
322
|
"RID",
|
|
321
323
|
],
|
|
322
324
|
"markdown_name": "Execution Metadata",
|
|
@@ -453,9 +455,9 @@ def generate_annotation(model: Model, schema: str) -> dict:
|
|
|
453
455
|
|
|
454
456
|
def main():
|
|
455
457
|
"""Main entry point for the annotations CLI.
|
|
456
|
-
|
|
458
|
+
|
|
457
459
|
Applies annotations to the ML schema based on command line arguments.
|
|
458
|
-
|
|
460
|
+
|
|
459
461
|
Returns:
|
|
460
462
|
None. Executes the CLI.
|
|
461
463
|
"""
|
deriva_ml/test.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from typing import Any, Type
|
|
2
|
+
from deriva_ml import RID
|
|
3
|
+
from sqlalchemy import UniqueConstraint, inspect
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from graphlib import CycleError, TopologicalSorter
|
|
6
|
+
|
|
7
|
+
def _prepare_wide_table(self, dataset, dataset_rid: RID, include_tables: list[str]) -> tuple:
|
|
8
|
+
"""
|
|
9
|
+
Generates details of a wide table from the model
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
include_tables (list[str] | None): List of table names to include in the denormalized dataset. If None,
|
|
13
|
+
all tables from the dataset will be included.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
str: SQL query string that represents the process of denormalization.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
# Skip over tables that we don't want to include in the denormalized dataset.
|
|
20
|
+
# Also, strip off the Dataset/Dataset_X part of the path so we don't include dataset columns in the denormalized
|
|
21
|
+
# table.
|
|
22
|
+
include_tables = set(include_tables)
|
|
23
|
+
for t in include_tables:
|
|
24
|
+
# Check to make sure the table is in the catalog.
|
|
25
|
+
_ = self.name_to_table(t)
|
|
26
|
+
|
|
27
|
+
table_paths = [
|
|
28
|
+
path
|
|
29
|
+
for path in self._schema_to_paths()
|
|
30
|
+
if path[-1].name in include_tables and include_tables.intersection({p.name for p in path})
|
|
31
|
+
]
|
|
32
|
+
paths_by_element = defaultdict(list)
|
|
33
|
+
for p in table_paths:
|
|
34
|
+
paths_by_element[p[2].name].append(p)
|
|
35
|
+
|
|
36
|
+
# Get the names of all of the tables that can be dataset elements.
|
|
37
|
+
dataset_element_tables = {e.name for e in self.list_dataset_element_types() if e.schema.name == self.domain_schema}
|
|
38
|
+
|
|
39
|
+
skip_columns = {"RCT", "RMT", "RCB", "RMB"}
|
|
40
|
+
join_conditions = {}
|
|
41
|
+
join_tables = {}
|
|
42
|
+
for element_table, paths in paths_by_element.items():
|
|
43
|
+
graph = {}
|
|
44
|
+
for path in paths:
|
|
45
|
+
for left, right in zip(path[0:], path[1:]):
|
|
46
|
+
graph.setdefault(left.name, set()).add(right.name)
|
|
47
|
+
|
|
48
|
+
# New lets remove any cycles that we may have in the graph.
|
|
49
|
+
# We will use a topological sort to find the order in which we need to join the tables.
|
|
50
|
+
# If we find a cycle, we will remove the table from the graph and splice in an additional ON clause.
|
|
51
|
+
# We will then repeat the process until there are no cycles.
|
|
52
|
+
graph_has_cycles = True
|
|
53
|
+
element_join_tables = []
|
|
54
|
+
element_join_conditions = {}
|
|
55
|
+
while graph_has_cycles:
|
|
56
|
+
try:
|
|
57
|
+
ts = TopologicalSorter(graph)
|
|
58
|
+
element_join_tables = list(reversed(list(ts.static_order())))
|
|
59
|
+
graph_has_cycles = False
|
|
60
|
+
except CycleError as e:
|
|
61
|
+
cycle_nodes = e.args[1]
|
|
62
|
+
if len(cycle_nodes) > 3:
|
|
63
|
+
raise DerivaMLException(f"Unexpected cycle found when normalizing dataset {cycle_nodes}")
|
|
64
|
+
# Remove cycle from graph and splice in additional ON constraint.
|
|
65
|
+
graph[cycle_nodes[1]].remove(cycle_nodes[0])
|
|
66
|
+
|
|
67
|
+
# The Dataset_Version table is a special case as it points to dataset and dataset to version.
|
|
68
|
+
if "Dataset_Version" in join_tables:
|
|
69
|
+
element_join_tables.remove("Dataset_Version")
|
|
70
|
+
|
|
71
|
+
for path in paths:
|
|
72
|
+
for left, right in zip(path[0:], path[1:]):
|
|
73
|
+
if right.name == "Dataset_Version":
|
|
74
|
+
# The Dataset_Version table is a special case as it points to dataset and dataset to version.
|
|
75
|
+
continue
|
|
76
|
+
if element_join_tables.index(right.name) < element_join_tables.index(left.name):
|
|
77
|
+
continue
|
|
78
|
+
table_relationship = self._table_relationship(left, right)
|
|
79
|
+
element_join_conditions.setdefault(right.name, set()).add((table_relationship[0], table_relationship[1]))
|
|
80
|
+
join_tables[element_table] = element_join_tables
|
|
81
|
+
join_conditions[element_table] = element_join_conditions
|
|
82
|
+
# Get the list of columns that will appear in the final denormalized dataset.
|
|
83
|
+
denormalized_columns = [
|
|
84
|
+
(table_name, c.name)
|
|
85
|
+
for table_name in join_tables
|
|
86
|
+
if not self.is_association(table_name) # Don't include association columns in the denormalized view.'
|
|
87
|
+
for c in self.name_to_table(table_name).columns
|
|
88
|
+
if (not include_tables or table_name in include_tables) and (c.name not in skip_columns)
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
# List of dataset ids to include in the denormalized view.
|
|
92
|
+
dataset_rids = [dataset_rid] + dataset.list_dataset_children(recurse=True)
|
|
93
|
+
return join_tables, join_conditions, denormalized_columns, dataset_rids, dataset_element_tables
|
|
94
|
+
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deriva-ml
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.17.0
|
|
4
4
|
Summary: Utilities to simplify use of Dervia and Pandas to create reproducable ML pipelines
|
|
5
5
|
Author-email: ISRD <isrd-dev@isi.edu>
|
|
6
|
-
Requires-Python: >=3.
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
9
|
Requires-Dist: bump-my-version
|
|
@@ -12,15 +12,18 @@ Requires-Dist: deriva~=1.7.10
|
|
|
12
12
|
Requires-Dist: deepdiff
|
|
13
13
|
Requires-Dist: nbconvert
|
|
14
14
|
Requires-Dist: pandas
|
|
15
|
-
Requires-Dist:
|
|
15
|
+
Requires-Dist: pip-system-certs
|
|
16
16
|
Requires-Dist: pydantic>=2.11
|
|
17
|
-
Requires-Dist: semver>3.0.0
|
|
18
|
-
Requires-Dist: setuptools>=64
|
|
19
|
-
Requires-Dist: setuptools-scm>=8.0
|
|
20
|
-
Requires-Dist: nbstripout
|
|
21
17
|
Requires-Dist: papermill
|
|
22
18
|
Requires-Dist: pandas-stubs==2.2.3.250527
|
|
23
19
|
Requires-Dist: pyyaml
|
|
20
|
+
Requires-Dist: regex~=2024.7.24
|
|
21
|
+
Requires-Dist: semver>3.0.0
|
|
22
|
+
Requires-Dist: setuptools>=80
|
|
23
|
+
Requires-Dist: setuptools-scm>=8.0
|
|
24
|
+
Requires-Dist: nbstripout
|
|
25
|
+
Requires-Dist: hydra_zen
|
|
26
|
+
Requires-Dist: SQLAlchemy
|
|
24
27
|
Dynamic: license-file
|
|
25
28
|
|
|
26
29
|
# DerivaML
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
deriva_ml/.DS_Store,sha256=gb-f5IXVed_gS5Be1Z6WxCYjrI_r5SdblvfFpIOY4ro,8196
|
|
2
|
+
deriva_ml/__init__.py,sha256=YCG7P4PUtO_b-aIIYb4KhKHcfnb8Wz_YeAL-c0HiQlA,1775
|
|
3
|
+
deriva_ml/bump_version.py,sha256=eN2G5G_OeiuFxhOdjjwfxD8Rmv6dFvzIm0y_1x4Mif4,4020
|
|
4
|
+
deriva_ml/demo_catalog.py,sha256=FfXPlDfzy29K9g2Fr_KmYyRhmxP2eSaqm8_Xcji8fUM,15352
|
|
5
|
+
deriva_ml/feature.py,sha256=6-aphkxdKjWa9oPSGFWxHcwAc_8hmWj-7I4M178YG5Y,8470
|
|
6
|
+
deriva_ml/install_kernel.py,sha256=b62XY0SLViYO_Zye5r1Pl9qhYZyu_fk4KAO8NS1pxgM,2165
|
|
7
|
+
deriva_ml/run_notebook.py,sha256=_pds1q3WcfWqhCBqKeznbwSv5n7OND8FkL6JQ2Jkfmc,8093
|
|
8
|
+
deriva_ml/test.py,sha256=BqmQXR9IyQP9h8pWttk0dzyJod2CwcfYbSUZS-Q5r4k,4460
|
|
9
|
+
deriva_ml/core/__init__.py,sha256=Ko8GsWc7K_eDFW0-GaNS6gOWYP8cWHWir-ChSQaHntE,856
|
|
10
|
+
deriva_ml/core/base.py,sha256=KzZW310J0YmvCUhuCWxd42LNCM_JSzR__ObtT7zgcsU,62525
|
|
11
|
+
deriva_ml/core/config.py,sha256=dF4rOLFmbk1DEkQimqbiH4pC519nRZWpwKItARNMiZ4,2244
|
|
12
|
+
deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
|
|
13
|
+
deriva_ml/core/definitions.py,sha256=uq_8uYFBVBVHS691Ri2kdQsN37z0GNYTaZskJIb_ocM,1385
|
|
14
|
+
deriva_ml/core/enums.py,sha256=sSN4B4OynbB-AXwxRszoFr-KWIWIAfhVa06EzAEHwVc,7194
|
|
15
|
+
deriva_ml/core/ermrest.py,sha256=N0IJ3TE87jElaBChEIo5AFDTr0SIrb6F90yiimRfPr4,10182
|
|
16
|
+
deriva_ml/core/exceptions.py,sha256=4MZNPOyN-UMaGeY9sqJDVwh_iOmz1ntp4usSyCNqVMg,934
|
|
17
|
+
deriva_ml/core/filespec.py,sha256=BQAAcRXfXq1lDcsKlokLOOXCBtEZpPgXxrFOIZYAgLg,4229
|
|
18
|
+
deriva_ml/dataset/__init__.py,sha256=wTCQaWGfRYEiUoOOxerKSpkbl1T5YFhoCyemlxGTk8k,283
|
|
19
|
+
deriva_ml/dataset/aux_classes.py,sha256=ojqe7gyK4KQVz_xfIillXS_HJ1PMsyr47pb2tFOXO_c,7855
|
|
20
|
+
deriva_ml/dataset/dataset.py,sha256=d860WuCL0-Pz6TyRpGVzhpPWDMco01-I5LT4dZjYxsQ,64728
|
|
21
|
+
deriva_ml/dataset/dataset_bag.py,sha256=ori3BuYVqfeHkVCjNSKuZh7oMdC6uufsszicpTPODiw,19944
|
|
22
|
+
deriva_ml/dataset/history.py,sha256=FK5AYYz11p4E4FWMVg4r7UPWOD4eobrq3b3xMjWF59g,3197
|
|
23
|
+
deriva_ml/dataset/upload.py,sha256=n1aXSbOx1hghCDxuF8yf03jZmOLMueXL-rSnQMrfHq0,16535
|
|
24
|
+
deriva_ml/execution/__init__.py,sha256=5kKpPwQbxhmRn7Npz7DpavuCxYwCQaDdl4-6z62hbds,705
|
|
25
|
+
deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
|
|
26
|
+
deriva_ml/execution/execution.py,sha256=hNBfYnqXK4PmNS2wxbJ5oYzjDszjaiGHo8d3uxmIgPk,46210
|
|
27
|
+
deriva_ml/execution/execution_configuration.py,sha256=RT0x9n0uyJgEsrLCUTu16nAUJN7X-XLDvfezln0PTDQ,5775
|
|
28
|
+
deriva_ml/execution/workflow.py,sha256=rTlspICp2Q6prUwPCeukjhO64rbcJivcFs4zH60B16U,13906
|
|
29
|
+
deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
|
+
deriva_ml/model/catalog.py,sha256=O6_Ll4Uxg6DyxoBXT9P9CPTt9jx1guVTeX1L3KW1A5c,19645
|
|
31
|
+
deriva_ml/model/database.py,sha256=BG5FSisl9tWTBnf5k9dNnijOIDyCUDeRhN_inkmIqTw,31132
|
|
32
|
+
deriva_ml/protocols/dataset.py,sha256=1TyaT--89Elcs-nCvVyJxUj4cDaLztZOuSOzzj1cBMk,699
|
|
33
|
+
deriva_ml/schema/__init__.py,sha256=yV-MfzCF3FA4OOz7mZwMM2q6-x1vgOJ057kUvikFF6E,130
|
|
34
|
+
deriva_ml/schema/annotations.py,sha256=CMcRqYUlyW8iLCYp6sYJsncaRNtp4kFKoxcg-i-t-50,18302
|
|
35
|
+
deriva_ml/schema/check_schema.py,sha256=6dadLYHPqRex6AYVClmsESI8WhC7-rb-XnGf2G298xw,3609
|
|
36
|
+
deriva_ml/schema/create_schema.py,sha256=9qK9_8SRQT-DwcEwTGSkhi3j2NaoH5EVgthvV2kO-gg,13042
|
|
37
|
+
deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
|
|
38
|
+
deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
|
|
39
|
+
deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
|
|
40
|
+
deriva_ml-1.17.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
41
|
+
deriva_ml-1.17.0.dist-info/METADATA,sha256=gvz8ApFj8xylH1r4Nr-X_QiHChj6wRNJE7pLzI2sB8E,1272
|
|
42
|
+
deriva_ml-1.17.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
43
|
+
deriva_ml-1.17.0.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
|
|
44
|
+
deriva_ml-1.17.0.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
|
|
45
|
+
deriva_ml-1.17.0.dist-info/RECORD,,
|
deriva_ml/model/sql_mapper.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
from datetime import datetime, timezone
|
|
2
|
-
from typing import TYPE_CHECKING, Any, Sequence
|
|
3
|
-
|
|
4
|
-
if TYPE_CHECKING:
|
|
5
|
-
from deriva_ml.model.database import DatabaseModel
|
|
6
|
-
|
|
7
|
-
try:
|
|
8
|
-
from icecream import ic
|
|
9
|
-
except ImportError: # Graceful fallback if IceCream isn't installed.
|
|
10
|
-
ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a) # noqa
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class SQLMapper:
|
|
14
|
-
def __init__(self, database: "DatabaseModel", table: str) -> None:
|
|
15
|
-
table_name = database.normalize_table_name(table)
|
|
16
|
-
schema, table = table_name.split(":")
|
|
17
|
-
|
|
18
|
-
with database.dbase as dbase:
|
|
19
|
-
self.col_names = [c[1] for c in dbase.execute(f'PRAGMA table_info("{table_name}")').fetchall()]
|
|
20
|
-
|
|
21
|
-
self.boolean_columns = [
|
|
22
|
-
self.col_names.index(c.name)
|
|
23
|
-
for c in database.model.schemas[schema].tables[table].columns
|
|
24
|
-
if c.type.typename == "boolean"
|
|
25
|
-
]
|
|
26
|
-
self.time_columns = [
|
|
27
|
-
self.col_names.index(c.name)
|
|
28
|
-
for c in database.model.schemas[schema].tables[table].columns
|
|
29
|
-
if c.type.typename in ["ermrest_rct", "ermrest_rmt"]
|
|
30
|
-
]
|
|
31
|
-
|
|
32
|
-
def _map_value(self, idx: int, v: Any) -> Any:
|
|
33
|
-
"""
|
|
34
|
-
Return a new value based on `data` where, for each index in `idxs`,
|
|
35
|
-
"""
|
|
36
|
-
tf_map = {"t": True, "f": False}
|
|
37
|
-
if idx in self.boolean_columns:
|
|
38
|
-
return tf_map.get(v, v)
|
|
39
|
-
if idx in self.time_columns:
|
|
40
|
-
return datetime.strptime(v, "%Y-%m-%d %H:%M:%S.%f+00").replace(tzinfo=timezone.utc).isoformat()
|
|
41
|
-
return v
|
|
42
|
-
|
|
43
|
-
def transform_tuple(self, data: Sequence[Any]) -> Any:
|
|
44
|
-
return dict(zip(self.col_names, tuple(self._map_value(i, v) for i, v in enumerate(data))))
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
deriva_ml/__init__.py,sha256=_aMdxGG4mRTcXodLZLNpXqH8v5uqMbqFUryE9KqNSB8,1158
|
|
2
|
-
deriva_ml/bump_version.py,sha256=eN2G5G_OeiuFxhOdjjwfxD8Rmv6dFvzIm0y_1x4Mif4,4020
|
|
3
|
-
deriva_ml/demo_catalog.py,sha256=JjPAIac_hKPh5krEhGJydjXquRnivi7kQoR8W4Khp-s,14928
|
|
4
|
-
deriva_ml/feature.py,sha256=6-aphkxdKjWa9oPSGFWxHcwAc_8hmWj-7I4M178YG5Y,8470
|
|
5
|
-
deriva_ml/install_kernel.py,sha256=b62XY0SLViYO_Zye5r1Pl9qhYZyu_fk4KAO8NS1pxgM,2165
|
|
6
|
-
deriva_ml/run_notebook.py,sha256=QRO_CK9Q9qt_n-c0rxGdIRyTHjGOuZxt-wj0WQTnaAM,8171
|
|
7
|
-
deriva_ml/core/__init__.py,sha256=V_i90pc5PB1F4UdOO6DZWzpEFaZDTaPRU-EzKXQ19eI,787
|
|
8
|
-
deriva_ml/core/base.py,sha256=LI_ZLpVJwWx4DW2Wo7luALQauQ3xhBxFYHSKDAfNsag,61649
|
|
9
|
-
deriva_ml/core/constants.py,sha256=6wBJ8qMxe-dbCjRGrjUIX-RK0mTWrLDTeUpaVbLFoM8,888
|
|
10
|
-
deriva_ml/core/definitions.py,sha256=uq_8uYFBVBVHS691Ri2kdQsN37z0GNYTaZskJIb_ocM,1385
|
|
11
|
-
deriva_ml/core/enums.py,sha256=sSN4B4OynbB-AXwxRszoFr-KWIWIAfhVa06EzAEHwVc,7194
|
|
12
|
-
deriva_ml/core/ermrest.py,sha256=N0IJ3TE87jElaBChEIo5AFDTr0SIrb6F90yiimRfPr4,10182
|
|
13
|
-
deriva_ml/core/exceptions.py,sha256=4MZNPOyN-UMaGeY9sqJDVwh_iOmz1ntp4usSyCNqVMg,934
|
|
14
|
-
deriva_ml/core/filespec.py,sha256=BQAAcRXfXq1lDcsKlokLOOXCBtEZpPgXxrFOIZYAgLg,4229
|
|
15
|
-
deriva_ml/dataset/__init__.py,sha256=ukl2laJqa9J2AVqb4zlpIYc-3RaAlfRR33NMIQaoNrQ,104
|
|
16
|
-
deriva_ml/dataset/aux_classes.py,sha256=9mZAln7_rrzaRbKhKA6dJOp3xeD6dHOC9NXOtJKROo4,6933
|
|
17
|
-
deriva_ml/dataset/dataset.py,sha256=B9QBFgcW1fCEseBV3FcgckPSrJyixEqeoG80mp__CfI,64472
|
|
18
|
-
deriva_ml/dataset/dataset_bag.py,sha256=mPIZRX5aTbVRcJbCFtdkmlnexquF8NE-onbVK_8IxVk,14224
|
|
19
|
-
deriva_ml/dataset/history.py,sha256=FK5AYYz11p4E4FWMVg4r7UPWOD4eobrq3b3xMjWF59g,3197
|
|
20
|
-
deriva_ml/dataset/upload.py,sha256=i_7KLfRSd2-THqZ1aG2OFAFGoyb8dJBCZZ5t1ftrtMQ,16429
|
|
21
|
-
deriva_ml/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
|
-
deriva_ml/execution/environment.py,sha256=B7nywqxFTRUWgyu8n7rFoKcVC9on422kjeFG2FPQfvg,9302
|
|
23
|
-
deriva_ml/execution/execution.py,sha256=NJjjrxGsedv0zoe-T-LxfO_5UG83KOHaxU3SY5EJ0QQ,44928
|
|
24
|
-
deriva_ml/execution/execution_configuration.py,sha256=Rw4VWkBCZN9yatvSKdTqEWTfu470lpcVKfHFR0uN0jI,6248
|
|
25
|
-
deriva_ml/execution/workflow.py,sha256=7CwPrgs3FKQHiEVus0PpK9w5hVKLKZnCrlu_nT8GFe8,13604
|
|
26
|
-
deriva_ml/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
deriva_ml/model/catalog.py,sha256=dzTBcRlqgEVkPY32AUax_iu75RgFiT4Pu5au7rmrv8k,14068
|
|
28
|
-
deriva_ml/model/database.py,sha256=SBkYFf0qwbGmvL0Xtn_n5DCz4roGfrhuYrM8G69Cy9Y,14837
|
|
29
|
-
deriva_ml/model/sql_mapper.py,sha256=_0QsJEVSgSPtxrWKSgjfPZCQ1aMVcjR_Tk2OxLhWEvY,1696
|
|
30
|
-
deriva_ml/schema/__init__.py,sha256=yV-MfzCF3FA4OOz7mZwMM2q6-x1vgOJ057kUvikFF6E,130
|
|
31
|
-
deriva_ml/schema/annotations.py,sha256=TuQ3vWFnK0160fRmtvsCkHx9qAcRa63MSyERB4x5a98,18197
|
|
32
|
-
deriva_ml/schema/check_schema.py,sha256=6dadLYHPqRex6AYVClmsESI8WhC7-rb-XnGf2G298xw,3609
|
|
33
|
-
deriva_ml/schema/create_schema.py,sha256=9qK9_8SRQT-DwcEwTGSkhi3j2NaoH5EVgthvV2kO-gg,13042
|
|
34
|
-
deriva_ml/schema/deriva-ml-reference.json,sha256=AEOMIgwKO3dNMMWHb0lxaXyamvfAEbUPh8qw0aAtsUQ,242460
|
|
35
|
-
deriva_ml/schema/policy.json,sha256=5ykB8nnZFl-oCHzlAwppCFKJHWJFIkYognUMVEanfY8,1826
|
|
36
|
-
deriva_ml/schema/table_comments_utils.py,sha256=4flCqnZAaqg_uSZ9I18pNUWAZoLfmMCXbmI5uERY5vM,2007
|
|
37
|
-
deriva_ml-1.14.47.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
38
|
-
deriva_ml-1.14.47.dist-info/METADATA,sha256=7kRaVpheUZqLTe82Q9KIAovS2gkiBE7KItSk67nQU9U,1190
|
|
39
|
-
deriva_ml-1.14.47.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
40
|
-
deriva_ml-1.14.47.dist-info/entry_points.txt,sha256=XsHSbfp7S1cKMjHoPUdFIaFcp9lHXHS6CV1zb_MEXkg,463
|
|
41
|
-
deriva_ml-1.14.47.dist-info/top_level.txt,sha256=I1Q1dkH96cRghdsFRVqwpa2M7IqJpR2QPUNNc5-Bnpw,10
|
|
42
|
-
deriva_ml-1.14.47.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|