dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backend.py +93 -0
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +419 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +32 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +176 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +157 -0
- dsgrid/cli/dsgrid_admin.py +92 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +729 -0
- dsgrid/cli/registry.py +1862 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +36 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +194 -0
- dsgrid/config/common.py +142 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +907 -0
- dsgrid/config/dataset_schema_handler_factory.py +46 -0
- dsgrid/config/date_time_dimension_config.py +136 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +350 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +1025 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/file_schema.py +190 -0
- dsgrid/config/index_time_dimension_config.py +80 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1462 -0
- dsgrid/config/registration_models.py +188 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +192 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +945 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
- dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +51 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +230 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +252 -0
- dsgrid/dimension/time.py +352 -0
- dsgrid/dimension/time_utils.py +103 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +388 -0
- dsgrid/query/models.py +728 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +994 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +165 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +74 -0
- dsgrid/registry/dataset_config_generator.py +158 -0
- dsgrid/registry/dataset_registry_manager.py +950 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +207 -0
- dsgrid/registry/filesystem_data_store.py +150 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1623 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +667 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +558 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/rust_ext/__init__.py +14 -0
- dsgrid/rust_ext/find_minimal_patterns.py +129 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +589 -0
- dsgrid/spark/types.py +110 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +140 -0
- dsgrid/tests/make_us_data_registry.py +265 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +830 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +65 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +221 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
- dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
- dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
dsgrid/utils/files.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
"""File utility functions"""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
import shutil
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import json5
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def compute_file_hash(filename) -> str:
|
|
19
|
+
"""Compute a hash of the contents of a file.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
filename : str
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
str
|
|
28
|
+
hash in the form of a hex number converted to a string
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
return compute_hash(Path(filename).read_bytes())
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def compute_hash(text: bytes) -> str:
|
|
35
|
+
hash_obj = hashlib.sha256()
|
|
36
|
+
hash_obj.update(text)
|
|
37
|
+
return hash_obj.hexdigest()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def delete_if_exists(path: Path | str) -> None:
|
|
41
|
+
"""Delete a file or directory if it exists."""
|
|
42
|
+
path = Path(path) if isinstance(path, str) else path
|
|
43
|
+
if path.exists():
|
|
44
|
+
if path.is_dir():
|
|
45
|
+
shutil.rmtree(path)
|
|
46
|
+
else:
|
|
47
|
+
path.unlink()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def dump_data(data, filename, **kwargs) -> None:
|
|
51
|
+
"""Dump data to the filename.
|
|
52
|
+
Supports JSON, TOML, or custom via kwargs.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
data : dict
|
|
57
|
+
data to dump
|
|
58
|
+
filename : str
|
|
59
|
+
file to create or overwrite
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
|
+
mod = _get_module_from_extension(filename, **kwargs)
|
|
63
|
+
with open(filename, "w") as f_out:
|
|
64
|
+
mod.dump(data, f_out, **kwargs)
|
|
65
|
+
|
|
66
|
+
logger.debug("Dumped data to %s", filename)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def load_data(filename, **kwargs) -> dict[str, Any]:
|
|
70
|
+
"""Load data from the file.
|
|
71
|
+
Supports JSON, JSON5, or custom via kwargs.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
filename : str
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
dict
|
|
80
|
+
|
|
81
|
+
"""
|
|
82
|
+
mod = _get_module_from_extension(filename, **kwargs)
|
|
83
|
+
with open(filename) as f_in:
|
|
84
|
+
try:
|
|
85
|
+
data = mod.load(f_in)
|
|
86
|
+
except Exception:
|
|
87
|
+
logger.exception("Failed to load data from %s", filename)
|
|
88
|
+
raise
|
|
89
|
+
|
|
90
|
+
logger.debug("Loaded data from %s", filename)
|
|
91
|
+
return data
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def dump_json_file(data, filename, indent=None) -> None:
|
|
95
|
+
"""Dump data to the JSON or JSON5 filename."""
|
|
96
|
+
dump_data(data, filename, indent=indent)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def load_json_file(filename: Path | str) -> dict[str, Any]:
|
|
100
|
+
"""Load data from the JSON or JSON5 file."""
|
|
101
|
+
return load_data(filename)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def dump_line_delimited_json(data, filename, mode="w"):
|
|
105
|
+
"""Dump a list of objects to the file as line-delimited JSON.
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
data : list
|
|
110
|
+
filename : str
|
|
111
|
+
mode : str
|
|
112
|
+
Mode to use for opening the file, defaults to "w"
|
|
113
|
+
|
|
114
|
+
"""
|
|
115
|
+
with open(filename, mode, encoding="utf-8-sig") as f_out:
|
|
116
|
+
for obj in data:
|
|
117
|
+
f_out.write(json.dumps(obj))
|
|
118
|
+
f_out.write("\n")
|
|
119
|
+
|
|
120
|
+
logger.debug("Dumped data to %s", filename)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def load_line_delimited_json(filename):
|
|
124
|
+
"""Load data from the file that is stored as line-delimited JSON.
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
filename : str
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
dict
|
|
133
|
+
|
|
134
|
+
"""
|
|
135
|
+
objects = []
|
|
136
|
+
with open(filename, encoding="utf-8-sig") as f_in:
|
|
137
|
+
for i, line in enumerate(f_in):
|
|
138
|
+
text = line.strip()
|
|
139
|
+
if not text:
|
|
140
|
+
continue
|
|
141
|
+
try:
|
|
142
|
+
objects.append(json.loads(text))
|
|
143
|
+
except Exception:
|
|
144
|
+
logger.exception("Failed to decode line number %s in %s", i, filename)
|
|
145
|
+
raise
|
|
146
|
+
|
|
147
|
+
logger.debug("Loaded data from %s", filename)
|
|
148
|
+
return objects
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@contextmanager
|
|
152
|
+
def in_other_dir(path: Path):
|
|
153
|
+
"""Change to another directory while user code runs.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
path : Path
|
|
158
|
+
"""
|
|
159
|
+
orig = os.getcwd()
|
|
160
|
+
os.chdir(path)
|
|
161
|
+
try:
|
|
162
|
+
yield
|
|
163
|
+
finally:
|
|
164
|
+
os.chdir(orig)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _get_module_from_extension(filename, **kwargs):
|
|
168
|
+
ext = os.path.splitext(filename)[1].lower()
|
|
169
|
+
if ext == ".json":
|
|
170
|
+
mod = json
|
|
171
|
+
elif ext == ".json5":
|
|
172
|
+
mod = json5
|
|
173
|
+
elif "mod" in kwargs:
|
|
174
|
+
mod = kwargs["mod"]
|
|
175
|
+
else:
|
|
176
|
+
msg = f"Unsupported extension {filename}"
|
|
177
|
+
raise NotImplementedError(msg)
|
|
178
|
+
|
|
179
|
+
return mod
|
dsgrid/utils/filters.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from dsgrid.exceptions import DSGInvalidParameter
|
|
3
|
+
|
|
4
|
+
ACCEPTED_OPS = ["==", "!=", "contains", "not contains"]
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def transform_and_validate_filters(filter_expressions):
|
|
8
|
+
"""
|
|
9
|
+
Validate whether an operation exists, return tranformed/formatted filters
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
------------
|
|
13
|
+
filter_expressions : set(str)
|
|
14
|
+
- each expression takes the format 'field operation value'
|
|
15
|
+
- 'field' and 'value' are case-insenstive and can accept spaces since expression is split by 'operation'
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
--------
|
|
19
|
+
transformed_filters : list
|
|
20
|
+
list of validated and transformed filtering fields
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
transformed_filters = []
|
|
24
|
+
for expr_str in filter_expressions:
|
|
25
|
+
check_ops = []
|
|
26
|
+
op = None
|
|
27
|
+
for opp in ACCEPTED_OPS:
|
|
28
|
+
check_ops.append(opp in expr_str)
|
|
29
|
+
if opp in expr_str:
|
|
30
|
+
op = opp
|
|
31
|
+
|
|
32
|
+
if np.sum(check_ops) < 1:
|
|
33
|
+
msg = f"invalid operation detected, valid ops: {ACCEPTED_OPS}"
|
|
34
|
+
raise DSGInvalidParameter(msg)
|
|
35
|
+
elif np.sum(check_ops) > 2:
|
|
36
|
+
msg = f"too many operations detected, choose one of the valid ops: {ACCEPTED_OPS}"
|
|
37
|
+
raise DSGInvalidParameter(msg)
|
|
38
|
+
|
|
39
|
+
fields = [x.strip() for x in expr_str.split(op) if x != ""]
|
|
40
|
+
|
|
41
|
+
if len(fields) < 2:
|
|
42
|
+
msg = (
|
|
43
|
+
f"filter expression: '{expr_str}' contains too few arguments, must be in the format 'field operation value' "
|
|
44
|
+
"(ex: 'Submitter == username')"
|
|
45
|
+
)
|
|
46
|
+
raise DSGInvalidParameter(msg)
|
|
47
|
+
elif len(fields) > 2:
|
|
48
|
+
msg = (
|
|
49
|
+
f"filter expression: '{expr_str}' contains too many arguments, must be in the format 'field operation value', "
|
|
50
|
+
"(ex: 'Submitter == username')"
|
|
51
|
+
)
|
|
52
|
+
raise DSGInvalidParameter(msg)
|
|
53
|
+
|
|
54
|
+
field = fields[0]
|
|
55
|
+
value = fields[1]
|
|
56
|
+
transformed_filters.append(
|
|
57
|
+
[field.lower(), op, value.lower()]
|
|
58
|
+
) # to accept case-insensitive comparison
|
|
59
|
+
|
|
60
|
+
return transformed_filters
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def matches_filters(row, field_to_index, transformed_filters):
|
|
64
|
+
"""
|
|
65
|
+
Validate field name in transformed filter_expressions, return TRUE for rows matching all filters
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
------------
|
|
69
|
+
row : str
|
|
70
|
+
row in `list` registry table (manager.show())
|
|
71
|
+
field_to_index : dict
|
|
72
|
+
key = column names, val = column index, in registry table (or manager.show())
|
|
73
|
+
transformed_filters : list
|
|
74
|
+
transformed/formatted fields for filtering rows
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
--------
|
|
78
|
+
bool
|
|
79
|
+
return TRUE for rows matching all filters
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
field_to_index_lower = dict(
|
|
83
|
+
(k.lower(), v) for k, v in field_to_index.items()
|
|
84
|
+
) # to accept case-insensitive comparison
|
|
85
|
+
|
|
86
|
+
for tfilter in transformed_filters:
|
|
87
|
+
[field, op, value] = tfilter
|
|
88
|
+
if field not in field_to_index_lower:
|
|
89
|
+
msg = f"field='{field}' is not a valid column name, valid fields: {list(field_to_index.keys())}"
|
|
90
|
+
raise DSGInvalidParameter(msg)
|
|
91
|
+
obj_val = row[field_to_index_lower[field]].lower() # to accept case-insensitive comparison
|
|
92
|
+
if not matches_filter(val=obj_val, op=op, required_value=value):
|
|
93
|
+
return False
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def matches_filter(val, op, required_value):
|
|
98
|
+
"""
|
|
99
|
+
check if table content matches filtered value
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
------------
|
|
103
|
+
val : str
|
|
104
|
+
value from registry table to be compared
|
|
105
|
+
op : str
|
|
106
|
+
filtering operation to be performed
|
|
107
|
+
required_value : str
|
|
108
|
+
value to match from filter_expression
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
--------
|
|
112
|
+
bool
|
|
113
|
+
return TRUE if vtable content matches filtered value, FALSE otherwise
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
if op == "==":
|
|
117
|
+
return val == required_value
|
|
118
|
+
elif op == "!=":
|
|
119
|
+
return val != required_value
|
|
120
|
+
elif op == "contains":
|
|
121
|
+
return required_value in val
|
|
122
|
+
elif op == "not contains":
|
|
123
|
+
return required_value not in val
|
|
124
|
+
else:
|
|
125
|
+
assert False, op
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Contains utility functions to map to/from dimension/mapping names and IDs."""
|
|
2
|
+
|
|
3
|
+
from dsgrid.registry.dimension_registry_manager import DimensionRegistryManager
|
|
4
|
+
from dsgrid.utils.files import dump_data, load_data
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def map_dimension_names_to_ids(dimension_mgr: DimensionRegistryManager):
|
|
8
|
+
mapping = {}
|
|
9
|
+
for dim in dimension_mgr.db.dimensions:
|
|
10
|
+
if dim["name"] in mapping:
|
|
11
|
+
assert mapping[dim["name"]] == dim["dimension_id"], dim
|
|
12
|
+
mapping[dim["name"]] = dim["dimension_id"]
|
|
13
|
+
return mapping
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def map_dimension_ids_to_names(dimension_mgr):
|
|
17
|
+
mapping = {}
|
|
18
|
+
for dim in dimension_mgr.db.dimensions:
|
|
19
|
+
assert dim["dimension_id"] not in mapping, dim
|
|
20
|
+
mapping[dim["dimension_id"]] = dim["name"]
|
|
21
|
+
return mapping
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def map_dimension_mapping_names_to_ids(dimension_mapping_mgr, dim_id_to_name):
|
|
25
|
+
mapping = {}
|
|
26
|
+
for dmap in dimension_mapping_mgr.db.dimension_mappings:
|
|
27
|
+
key = (
|
|
28
|
+
dim_id_to_name[dmap["from_dimension"]["dimension_id"]],
|
|
29
|
+
dim_id_to_name[dmap["to_dimension"]["dimension_id"]],
|
|
30
|
+
)
|
|
31
|
+
if key in mapping:
|
|
32
|
+
assert mapping[key] == dmap["mapping_id"], dmap
|
|
33
|
+
mapping[key] = dmap["mapping_id"]
|
|
34
|
+
return mapping
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def replace_dimension_names_with_current_ids(filename, mappings):
|
|
38
|
+
data = load_data(filename)
|
|
39
|
+
assert isinstance(data, dict)
|
|
40
|
+
|
|
41
|
+
def perform_replacements(mappings, dimensions):
|
|
42
|
+
changed = False
|
|
43
|
+
for ref in dimensions:
|
|
44
|
+
if "name" in ref:
|
|
45
|
+
ref["dimension_id"] = mappings[ref.pop("name")]
|
|
46
|
+
changed = True
|
|
47
|
+
return changed
|
|
48
|
+
|
|
49
|
+
changed = False
|
|
50
|
+
if "dimension_references" in data:
|
|
51
|
+
# This is True for a dataset config file.
|
|
52
|
+
if perform_replacements(mappings, data["dimension_references"]):
|
|
53
|
+
changed = True
|
|
54
|
+
|
|
55
|
+
if "dimensions" in data and "base_dimension_references" in data["dimensions"]:
|
|
56
|
+
# This is True for a project config file.
|
|
57
|
+
if perform_replacements(mappings, data["dimensions"]["base_dimension_references"]):
|
|
58
|
+
changed = True
|
|
59
|
+
if perform_replacements(mappings, data["dimensions"]["supplemental_dimension_references"]):
|
|
60
|
+
changed = True
|
|
61
|
+
|
|
62
|
+
if "mappings" in data:
|
|
63
|
+
# This is True for a dimension mappings file.
|
|
64
|
+
for mapping in data["mappings"]:
|
|
65
|
+
if perform_replacements(
|
|
66
|
+
mappings, [mapping["from_dimension"], mapping["to_dimension"]]
|
|
67
|
+
):
|
|
68
|
+
changed = True
|
|
69
|
+
|
|
70
|
+
if changed:
|
|
71
|
+
dump_data(data, filename, indent=2)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def replace_dimension_mapping_names_with_current_ids(filename, mappings):
|
|
75
|
+
data = load_data(filename)
|
|
76
|
+
assert isinstance(data, dict)
|
|
77
|
+
|
|
78
|
+
def perform_replacements(mappings, references):
|
|
79
|
+
changed = False
|
|
80
|
+
for ref in references:
|
|
81
|
+
if "mapping_names" in ref:
|
|
82
|
+
item = ref.pop("mapping_names")
|
|
83
|
+
ref["mapping_id"] = mappings[(item["from"], item["to"])]
|
|
84
|
+
changed = True
|
|
85
|
+
return changed
|
|
86
|
+
|
|
87
|
+
changed = False
|
|
88
|
+
if "dimension_mappings" in data:
|
|
89
|
+
# This is True for a project config file.
|
|
90
|
+
refs = data["dimension_mappings"]["base_to_supplemental_references"]
|
|
91
|
+
if perform_replacements(mappings, refs):
|
|
92
|
+
changed = True
|
|
93
|
+
|
|
94
|
+
if "references" in data:
|
|
95
|
+
# This is True for a dataset-to-project dimension mapping reference file.
|
|
96
|
+
if perform_replacements(mappings, data["references"]):
|
|
97
|
+
changed = True
|
|
98
|
+
|
|
99
|
+
if changed:
|
|
100
|
+
dump_data(data, filename, indent=2)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
5
|
+
in the Software without restriction, including without limitation the rights
|
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
8
|
+
furnished to do so, subject to the following conditions:
|
|
9
|
+
|
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
|
11
|
+
copies or substantial portions of the Software.
|
|
12
|
+
|
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
19
|
+
SOFTWARE.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
We cloned this code from https://github.com/axiacore/py-expression-eval and kept the LICENSE.
|
|
2
|
+
|
|
3
|
+
We needed a modification that would interpret `|` as a union for Python sets and SQL
|
|
4
|
+
queries. This modification almost certainly does not belong in the main repository, and so
|
|
5
|
+
we are storing it here.
|
|
6
|
+
|
|
7
|
+
Note to future dsgrid developers: Keep this in sync with updates from the main repository.
|
|
8
|
+
The code here is based on commit 02dc9de711da50735890bef38a938ce6db9737b9.
|