dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backend.py +93 -0
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +419 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +32 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +176 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +157 -0
- dsgrid/cli/dsgrid_admin.py +92 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +729 -0
- dsgrid/cli/registry.py +1862 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +36 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +194 -0
- dsgrid/config/common.py +142 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +907 -0
- dsgrid/config/dataset_schema_handler_factory.py +46 -0
- dsgrid/config/date_time_dimension_config.py +136 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +350 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +1025 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/file_schema.py +190 -0
- dsgrid/config/index_time_dimension_config.py +80 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1462 -0
- dsgrid/config/registration_models.py +188 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +192 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +945 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
- dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +51 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +230 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +252 -0
- dsgrid/dimension/time.py +352 -0
- dsgrid/dimension/time_utils.py +103 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +388 -0
- dsgrid/query/models.py +728 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +994 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +165 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +74 -0
- dsgrid/registry/dataset_config_generator.py +158 -0
- dsgrid/registry/dataset_registry_manager.py +950 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +207 -0
- dsgrid/registry/filesystem_data_store.py +150 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1623 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +667 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +558 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/rust_ext/__init__.py +14 -0
- dsgrid/rust_ext/find_minimal_patterns.py +129 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +589 -0
- dsgrid/spark/types.py +110 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +140 -0
- dsgrid/tests/make_us_data_registry.py +265 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +830 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +65 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +221 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
- dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
- dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from dsgrid.exceptions import DSGInvalidRegistryState
|
|
4
|
+
from .config_update_checker_base import ConfigUpdateCheckerBase
|
|
5
|
+
from .common import DatasetRegistryStatus, ProjectRegistryStatus
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ProjectUpdateChecker(ConfigUpdateCheckerBase):
|
|
12
|
+
"""Handles update checks for projects."""
|
|
13
|
+
|
|
14
|
+
_ALLOWED_UPDATE_STATUSES = (
|
|
15
|
+
ProjectRegistryStatus.INITIAL_REGISTRATION,
|
|
16
|
+
ProjectRegistryStatus.IN_PROGRESS,
|
|
17
|
+
ProjectRegistryStatus.COMPLETE,
|
|
18
|
+
)
|
|
19
|
+
_REQUIRES_DATASET_UNREGISTRATION = (
|
|
20
|
+
"dimensions",
|
|
21
|
+
"dimension_mappings",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def check_preconditions(self):
|
|
25
|
+
if self._old_model.status not in self._ALLOWED_UPDATE_STATUSES:
|
|
26
|
+
msg = f"project status={self._old_model.status} must be one of {self._ALLOWED_UPDATE_STATUSES} in order to update"
|
|
27
|
+
raise DSGInvalidRegistryState(msg)
|
|
28
|
+
|
|
29
|
+
def handle_postconditions(self):
|
|
30
|
+
# TODO #191: detect changes to required dimensions for each dataset.
|
|
31
|
+
changes = set(self._REQUIRES_DATASET_UNREGISTRATION).intersection(self._changed_fields)
|
|
32
|
+
if changes:
|
|
33
|
+
for dataset in self._new_model.datasets:
|
|
34
|
+
if dataset.status == DatasetRegistryStatus.REGISTERED:
|
|
35
|
+
dataset.status = DatasetRegistryStatus.UNREGISTERED
|
|
36
|
+
logger.warning(
|
|
37
|
+
"Set all datasets in %s to unregistered because of changes=%s. "
|
|
38
|
+
"They must be re-submitted.",
|
|
39
|
+
self._new_model.project_id,
|
|
40
|
+
changes,
|
|
41
|
+
)
|
|
42
|
+
if self._new_model.status == ProjectRegistryStatus.COMPLETE:
|
|
43
|
+
self._new_model.status = ProjectRegistryStatus.IN_PROGRESS
|
|
44
|
+
logger.warning(
|
|
45
|
+
"Set project status to %s because of changes=%s.",
|
|
46
|
+
self._new_model.status,
|
|
47
|
+
changes,
|
|
48
|
+
)
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import getpass
|
|
2
|
+
import logging
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Self
|
|
5
|
+
|
|
6
|
+
from sqlalchemy import Connection
|
|
7
|
+
|
|
8
|
+
from dsgrid.exceptions import DSGInvalidParameter
|
|
9
|
+
from dsgrid.spark.functions import drop_temp_tables_and_views
|
|
10
|
+
from dsgrid.registry.common import RegistrationModel, RegistryType, VersionUpdateType
|
|
11
|
+
from dsgrid.registry.registry_interface import RegistryInterfaceBase
|
|
12
|
+
from dsgrid.utils.timing import timer_stats_collector, track_timing
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RegistrationContext:
|
|
19
|
+
"""Maintains state information across a multi-config registration process."""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
db: RegistryInterfaceBase,
|
|
24
|
+
log_message: str,
|
|
25
|
+
update_type: VersionUpdateType,
|
|
26
|
+
submitter: str | None,
|
|
27
|
+
):
|
|
28
|
+
self._conn: Connection | None = None
|
|
29
|
+
self._db = db
|
|
30
|
+
self._registration = RegistrationModel(
|
|
31
|
+
timestamp=datetime.now(),
|
|
32
|
+
submitter=submitter or getpass.getuser(),
|
|
33
|
+
log_message=log_message,
|
|
34
|
+
update_type=update_type,
|
|
35
|
+
)
|
|
36
|
+
self._managers: dict[RegistryType, RegistryManagerContext | None] = {
|
|
37
|
+
# This order is required for cleanup in self.finalize().
|
|
38
|
+
RegistryType.PROJECT: None,
|
|
39
|
+
RegistryType.DATASET: None,
|
|
40
|
+
RegistryType.DIMENSION_MAPPING: None,
|
|
41
|
+
RegistryType.DIMENSION: None,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
def __del__(self):
|
|
45
|
+
for registry_type in RegistryType:
|
|
46
|
+
manager = self._managers.get(registry_type)
|
|
47
|
+
if manager is not None:
|
|
48
|
+
logger.warning(
|
|
49
|
+
"RegistrationContext destructed with a reference to %s manager",
|
|
50
|
+
registry_type.value,
|
|
51
|
+
)
|
|
52
|
+
if not manager.offline_mode and manager.has_lock():
|
|
53
|
+
logger.error(
|
|
54
|
+
"RegistrationContext destructed with a lock on the remote registry. "
|
|
55
|
+
"Please contact the dsgrid team. Type=%s IDs=%s",
|
|
56
|
+
registry_type.value,
|
|
57
|
+
manager.ids,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def __enter__(self) -> Self:
|
|
61
|
+
self._conn = self._db.engine.connect()
|
|
62
|
+
self._registration = self._db.insert_registration(self._conn, self._registration)
|
|
63
|
+
return self
|
|
64
|
+
|
|
65
|
+
def __exit__(self, exc_type, exc_value, traceback) -> None:
|
|
66
|
+
if self._conn is None:
|
|
67
|
+
return
|
|
68
|
+
try:
|
|
69
|
+
if exc_type is None:
|
|
70
|
+
self.finalize(False)
|
|
71
|
+
self._conn.commit()
|
|
72
|
+
else:
|
|
73
|
+
# Order is important. Don't rollback the configs until dataset files are deleted.
|
|
74
|
+
self.finalize(True)
|
|
75
|
+
self._conn.rollback()
|
|
76
|
+
finally:
|
|
77
|
+
self._conn.close()
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def connection(self) -> Connection:
|
|
81
|
+
"""Return the active sqlalchemy connection."""
|
|
82
|
+
assert self._conn is not None
|
|
83
|
+
return self._conn
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def registration(self) -> RegistrationModel:
|
|
87
|
+
"""Return the registration entry for this context."""
|
|
88
|
+
return self._registration
|
|
89
|
+
|
|
90
|
+
def add_id(self, registry_type: RegistryType, config_id: str, manager):
|
|
91
|
+
"""Add a config ID that has been registered.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
registry_type : RegistryType
|
|
96
|
+
config_id : str
|
|
97
|
+
manager : RegistryManagerBase
|
|
98
|
+
|
|
99
|
+
Raises
|
|
100
|
+
------
|
|
101
|
+
DSGInvalidParameter
|
|
102
|
+
Raised if the config ID is already stored.
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
self.add_ids(registry_type, [config_id], manager)
|
|
106
|
+
|
|
107
|
+
def add_ids(self, registry_type: RegistryType, config_ids: list[str], manager):
|
|
108
|
+
"""Add multiple config IDs that have been registered.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
registry_type : RegistryType
|
|
113
|
+
config_ids : list[str]
|
|
114
|
+
manager : RegistryManagerBase
|
|
115
|
+
|
|
116
|
+
Raises
|
|
117
|
+
------
|
|
118
|
+
DSGInvalidParameter
|
|
119
|
+
Raised if a config ID is already stored.
|
|
120
|
+
|
|
121
|
+
"""
|
|
122
|
+
manager_context = self._managers[registry_type]
|
|
123
|
+
if manager_context is None:
|
|
124
|
+
manager_context = RegistryManagerContext(manager)
|
|
125
|
+
self._managers[registry_type] = manager_context
|
|
126
|
+
# manager.acquire_registry_locks(config_ids)
|
|
127
|
+
# manager_context.set_locked()
|
|
128
|
+
|
|
129
|
+
diff = set(config_ids).intersection(manager_context.ids)
|
|
130
|
+
if diff:
|
|
131
|
+
msg = f"One or more config IDs are already tracked: {registry_type} {diff}"
|
|
132
|
+
raise DSGInvalidParameter(msg)
|
|
133
|
+
|
|
134
|
+
logger.debug("Added registered IDs: %s %s", registry_type, config_ids)
|
|
135
|
+
manager_context.ids += config_ids
|
|
136
|
+
|
|
137
|
+
def get_ids(self, registry_type: RegistryType):
|
|
138
|
+
"""Return the config IDs for registry_type that have been registered with this context.
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
registry_type : RegistryType
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
list[str]
|
|
147
|
+
|
|
148
|
+
"""
|
|
149
|
+
manager_context = self._managers[registry_type]
|
|
150
|
+
assert manager_context is not None, registry_type
|
|
151
|
+
return manager_context.ids
|
|
152
|
+
|
|
153
|
+
@track_timing(timer_stats_collector)
|
|
154
|
+
def finalize(self, error_occurred: bool):
|
|
155
|
+
"""Perform final registration actions. If successful, sync all newly-registered configs
|
|
156
|
+
and data with the remote registry. If there was an error, remove all intermediate
|
|
157
|
+
registrations.
|
|
158
|
+
"""
|
|
159
|
+
try:
|
|
160
|
+
drop_temp_tables_and_views()
|
|
161
|
+
for registry_type, manager_context in self._managers.items():
|
|
162
|
+
if manager_context is not None:
|
|
163
|
+
if manager_context.ids:
|
|
164
|
+
manager_context.manager.finalize_registration(
|
|
165
|
+
self._conn, set(manager_context.ids), error_occurred
|
|
166
|
+
)
|
|
167
|
+
manager_context.ids.clear()
|
|
168
|
+
# manager_context.set_unlocked()
|
|
169
|
+
self._managers[registry_type] = None
|
|
170
|
+
except Exception:
|
|
171
|
+
logger.exception(
|
|
172
|
+
"An unexpected error occurred in finalize_registration. "
|
|
173
|
+
"Please notify the dsgrid team because registry recovery may be required."
|
|
174
|
+
)
|
|
175
|
+
raise
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class RegistryManagerContext:
|
|
179
|
+
"""Maintains state for one registry type."""
|
|
180
|
+
|
|
181
|
+
def __init__(self, manager):
|
|
182
|
+
self._manager = manager
|
|
183
|
+
self._has_lock = False
|
|
184
|
+
self._ids = []
|
|
185
|
+
|
|
186
|
+
def has_lock(self) -> bool:
|
|
187
|
+
"""Return True if the manager has acquired a lock on the remote registry."""
|
|
188
|
+
return self._has_lock
|
|
189
|
+
|
|
190
|
+
def set_locked(self):
|
|
191
|
+
"""Call when a lock has been acquired on the remote registry."""
|
|
192
|
+
logger.debug("Locks acquired on remote registry for %s", self._manager.__class__.__name__)
|
|
193
|
+
self._has_lock = True
|
|
194
|
+
|
|
195
|
+
def set_unlocked(self):
|
|
196
|
+
"""Call when all locks have been released on the remote registry."""
|
|
197
|
+
logger.debug("Locks released on remote registry for %s", self._manager.__class__.__name__)
|
|
198
|
+
self._has_lock = False
|
|
199
|
+
|
|
200
|
+
@property
|
|
201
|
+
def ids(self):
|
|
202
|
+
"""Return a list of config IDs being managed."""
|
|
203
|
+
return self._ids
|
|
204
|
+
|
|
205
|
+
@ids.setter
|
|
206
|
+
def ids(self, val):
|
|
207
|
+
"""Return a list of config IDs being managed."""
|
|
208
|
+
self._ids = val
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def manager(self):
|
|
212
|
+
"""Return a RegistryManagerBase"""
|
|
213
|
+
return self._manager
|
|
214
|
+
|
|
215
|
+
@manager.setter
|
|
216
|
+
def manager(self, val):
|
|
217
|
+
"""Set the RegistryManagerBase"""
|
|
218
|
+
self._manager = val
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def offline_mode(self):
|
|
222
|
+
"""Return True if the manager is in offline mode."""
|
|
223
|
+
return self._manager.offline_mode
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from dsgrid.config.mapping_tables import MappingTableConfig, MappingTableModel
|
|
5
|
+
from dsgrid.config.dataset_config import DatasetConfig, DatasetConfigModel
|
|
6
|
+
from dsgrid.config.dimension_config import DimensionConfig
|
|
7
|
+
from dsgrid.config.project_config import ProjectConfig
|
|
8
|
+
from dsgrid.exceptions import DSGInvalidParameter
|
|
9
|
+
from dsgrid.registry.registration_context import RegistrationContext
|
|
10
|
+
from dsgrid.registry.common import (
|
|
11
|
+
ConfigKey,
|
|
12
|
+
RegistryType,
|
|
13
|
+
VersionUpdateType,
|
|
14
|
+
)
|
|
15
|
+
from dsgrid.registry.registry_manager import RegistryManager
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RegistryAutoUpdater:
|
|
22
|
+
"""Performs auto-updates on the registry."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, manager: RegistryManager) -> None:
|
|
25
|
+
self._project_mgr = manager.project_manager
|
|
26
|
+
self._dataset_mgr = manager.dataset_manager
|
|
27
|
+
self._dimension_mgr = manager.dimension_manager
|
|
28
|
+
self._dimension_mapping_mgr = manager.dimension_mapping_manager
|
|
29
|
+
self._db = self._dimension_mapping_mgr.db
|
|
30
|
+
|
|
31
|
+
def update_dependent_configs(
|
|
32
|
+
self,
|
|
33
|
+
config,
|
|
34
|
+
original_version: str,
|
|
35
|
+
update_type: VersionUpdateType,
|
|
36
|
+
log_message: str,
|
|
37
|
+
submitter: str | None = None,
|
|
38
|
+
):
|
|
39
|
+
"""Update all configs that consume this config. Recursive.
|
|
40
|
+
This is an in incomplete, experimental feature, and is subject to change.
|
|
41
|
+
Should only be called by an admin that understands the consequences.
|
|
42
|
+
Passing a dimension may trigger an update to a project and a dimension mapping.
|
|
43
|
+
The change to that dimension mapping may trigger another update to the project.
|
|
44
|
+
This guarantees that each config version will only be bumped once.
|
|
45
|
+
|
|
46
|
+
It is up to the caller to ensure changes are synced to the remote registry if not in
|
|
47
|
+
offline mode.
|
|
48
|
+
|
|
49
|
+
Datasets likely need to be resubmitted to their projects.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
config : ConfigBase
|
|
54
|
+
original_version : str
|
|
55
|
+
Original version of the config
|
|
56
|
+
update_type : VersionUpdateType
|
|
57
|
+
log_message : str
|
|
58
|
+
"""
|
|
59
|
+
with RegistrationContext(
|
|
60
|
+
self._project_mgr.db, log_message, update_type, submitter
|
|
61
|
+
) as context:
|
|
62
|
+
return self.update_dependent_configs_with_context(config, original_version, context)
|
|
63
|
+
|
|
64
|
+
def update_dependent_configs_with_context(
|
|
65
|
+
self, config, original_version: str, context: RegistrationContext
|
|
66
|
+
):
|
|
67
|
+
if isinstance(config, DimensionConfig):
|
|
68
|
+
self._update_dimension_users(config, original_version, context)
|
|
69
|
+
elif isinstance(config, MappingTableConfig):
|
|
70
|
+
self._update_dimension_mapping_users(config, original_version, context)
|
|
71
|
+
elif isinstance(config, DatasetConfig):
|
|
72
|
+
self._update_dataset_users(config, original_version, context)
|
|
73
|
+
else:
|
|
74
|
+
msg = f"Updates of configs dependent on {type(config)}"
|
|
75
|
+
raise NotImplementedError(msg)
|
|
76
|
+
|
|
77
|
+
def _update_dimension_users(
|
|
78
|
+
self,
|
|
79
|
+
config: DimensionConfig,
|
|
80
|
+
original_version: str,
|
|
81
|
+
context: RegistrationContext,
|
|
82
|
+
):
|
|
83
|
+
# Order is important because
|
|
84
|
+
# - dimension mappings may have this dimension.
|
|
85
|
+
# - datasets may have this dimension.
|
|
86
|
+
# - projects may have this dimension as well as updated mappings and datasets.
|
|
87
|
+
new_mappings = {}
|
|
88
|
+
new_datasets = {}
|
|
89
|
+
|
|
90
|
+
if config.model.version == original_version:
|
|
91
|
+
msg = f"current version cannot be the same as the original: {original_version}"
|
|
92
|
+
raise DSGInvalidParameter(msg)
|
|
93
|
+
|
|
94
|
+
affected = self._db.get_containing_models(
|
|
95
|
+
context.connection, config.model, version=original_version
|
|
96
|
+
)
|
|
97
|
+
for mapping in self._update_dimension_mappings_with_dimensions(
|
|
98
|
+
affected, config, original_version
|
|
99
|
+
):
|
|
100
|
+
key = ConfigKey(mapping.model.mapping_id, mapping.model.version)
|
|
101
|
+
new_mapping = self._dimension_mapping_mgr.update_with_context(mapping, context)
|
|
102
|
+
assert key not in new_mappings
|
|
103
|
+
new_mappings[key] = new_mapping
|
|
104
|
+
logger.info(
|
|
105
|
+
"Updated dimension mapping %s to %s as a result of dimension update",
|
|
106
|
+
new_mapping.model.mapping_id,
|
|
107
|
+
new_mapping.model.version,
|
|
108
|
+
)
|
|
109
|
+
for dataset in self._update_datasets_with_dimensions(affected, config, original_version):
|
|
110
|
+
key = ConfigKey(dataset.model.dataset_id, dataset.model.version)
|
|
111
|
+
new_dataset = self._dataset_mgr.update_with_context(dataset, context)
|
|
112
|
+
assert key not in new_datasets
|
|
113
|
+
new_datasets[key] = new_dataset
|
|
114
|
+
logger.info(
|
|
115
|
+
"Updated dataset %s to %s as a result of dimension update",
|
|
116
|
+
new_dataset.model.dataset_id,
|
|
117
|
+
new_dataset.model.version,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
self._update_projects(
|
|
121
|
+
context,
|
|
122
|
+
dimensions={ConfigKey(config.model.dimension_id, original_version): config},
|
|
123
|
+
dimension_mappings=new_mappings,
|
|
124
|
+
datasets=new_datasets,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def _update_dimension_mapping_users(
|
|
128
|
+
self,
|
|
129
|
+
config: MappingTableConfig,
|
|
130
|
+
original_version: str,
|
|
131
|
+
context: RegistrationContext,
|
|
132
|
+
) -> None:
|
|
133
|
+
self._update_projects(
|
|
134
|
+
context,
|
|
135
|
+
dimension_mappings={ConfigKey(config.model.mapping_id, original_version): config},
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def _update_dataset_users(
|
|
139
|
+
self,
|
|
140
|
+
config: DatasetConfig,
|
|
141
|
+
original_version: str,
|
|
142
|
+
context: RegistrationContext,
|
|
143
|
+
) -> None:
|
|
144
|
+
self._update_projects(
|
|
145
|
+
context,
|
|
146
|
+
datasets={ConfigKey(config.model.dataset_id, original_version): config},
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def _update_dimension_mappings_with_dimensions(
|
|
150
|
+
self,
|
|
151
|
+
affected: dict[RegistryType, list[Any]],
|
|
152
|
+
dim: DimensionConfig,
|
|
153
|
+
original_version: str,
|
|
154
|
+
) -> list[MappingTableConfig]:
|
|
155
|
+
mapping_updates: list[MappingTableConfig] = []
|
|
156
|
+
for model in affected[RegistryType.DIMENSION_MAPPING]:
|
|
157
|
+
assert isinstance(model, MappingTableModel)
|
|
158
|
+
updated = False
|
|
159
|
+
config = self._dimension_mapping_mgr.get_by_id(model.mapping_id, model.version)
|
|
160
|
+
if (
|
|
161
|
+
config.model.from_dimension.dimension_id == dim.model.dimension_id
|
|
162
|
+
and config.model.from_dimension.version == original_version
|
|
163
|
+
):
|
|
164
|
+
config.model.from_dimension.version = dim.model.version
|
|
165
|
+
updated = True
|
|
166
|
+
elif (
|
|
167
|
+
config.model.to_dimension.dimension_id == dim.model.dimension_id
|
|
168
|
+
and config.model.from_dimension.version == original_version
|
|
169
|
+
):
|
|
170
|
+
config.model.to_dimension.version = dim.model.version
|
|
171
|
+
updated = True
|
|
172
|
+
if updated:
|
|
173
|
+
mapping_updates.append(config)
|
|
174
|
+
|
|
175
|
+
return mapping_updates
|
|
176
|
+
|
|
177
|
+
def _update_datasets_with_dimensions(
|
|
178
|
+
self,
|
|
179
|
+
affected: dict[RegistryType, list[Any]],
|
|
180
|
+
dim: DimensionConfig,
|
|
181
|
+
original_version: str,
|
|
182
|
+
) -> list[DatasetConfig]:
|
|
183
|
+
new_datasets = []
|
|
184
|
+
for model in affected[RegistryType.DATASET]:
|
|
185
|
+
assert isinstance(model, DatasetConfigModel)
|
|
186
|
+
updated = False
|
|
187
|
+
config = self._dataset_mgr.get_by_id(model.dataset_id, model.version)
|
|
188
|
+
for ref in config.model.dimension_references:
|
|
189
|
+
if ref.dimension_id == dim.model.dimension_id and ref.version == original_version:
|
|
190
|
+
ref.version = dim.model.version
|
|
191
|
+
updated = True
|
|
192
|
+
if updated:
|
|
193
|
+
new_datasets.append(config)
|
|
194
|
+
|
|
195
|
+
return new_datasets
|
|
196
|
+
|
|
197
|
+
def _update_projects(
|
|
198
|
+
self,
|
|
199
|
+
context: RegistrationContext,
|
|
200
|
+
dimensions: dict[ConfigKey, DimensionConfig] | None = None,
|
|
201
|
+
dimension_mappings: dict[ConfigKey, MappingTableConfig] | None = None,
|
|
202
|
+
datasets: dict[ConfigKey, DatasetConfig] | None = None,
|
|
203
|
+
) -> None:
|
|
204
|
+
updated_projects = {}
|
|
205
|
+
if dimensions is not None:
|
|
206
|
+
self._update_projects_with_new_dimensions(updated_projects, dimensions, context)
|
|
207
|
+
if dimension_mappings is not None:
|
|
208
|
+
self._update_projects_with_new_dimension_mappings(
|
|
209
|
+
updated_projects, dimension_mappings, context
|
|
210
|
+
)
|
|
211
|
+
if datasets is not None:
|
|
212
|
+
self._update_projects_with_new_datasets(updated_projects, datasets, context)
|
|
213
|
+
|
|
214
|
+
for project_config in updated_projects.values():
|
|
215
|
+
new_project = self._project_mgr.update_with_context(project_config, context)
|
|
216
|
+
logger.info(
|
|
217
|
+
"Updated project %s to %s as a result of dependent config updates.",
|
|
218
|
+
new_project.model.project_id,
|
|
219
|
+
new_project.model.version,
|
|
220
|
+
)
|
|
221
|
+
# TODO: Re-submit changed datasets to projects. dataset-to-project mappings might
|
|
222
|
+
# take some work.
|
|
223
|
+
|
|
224
|
+
def _update_projects_with_new_dimensions(
|
|
225
|
+
self,
|
|
226
|
+
updated_projects: dict[str, ProjectConfig],
|
|
227
|
+
dimensions: dict[ConfigKey, DimensionConfig],
|
|
228
|
+
context: RegistrationContext,
|
|
229
|
+
) -> None:
|
|
230
|
+
"""Updates the latest project configurations in place if they consume the dimensions.
|
|
231
|
+
Edits updated_projects as necessary.
|
|
232
|
+
"""
|
|
233
|
+
for key, dim in dimensions.items():
|
|
234
|
+
for model in self._db.get_containing_models(
|
|
235
|
+
context.connection,
|
|
236
|
+
dim.model,
|
|
237
|
+
version=key.version,
|
|
238
|
+
parent_model_type=RegistryType.PROJECT,
|
|
239
|
+
)[RegistryType.PROJECT]:
|
|
240
|
+
config = updated_projects.get(
|
|
241
|
+
model.project_id, self._project_mgr.get_by_id(model.project_id)
|
|
242
|
+
)
|
|
243
|
+
updated = False
|
|
244
|
+
for ref in config.model.dimensions.base_dimension_references:
|
|
245
|
+
if ref.dimension_id == dim.model.dimension_id and ref.version == key.version:
|
|
246
|
+
ref.version = dim.model.version
|
|
247
|
+
updated = True
|
|
248
|
+
break
|
|
249
|
+
for ref in config.model.dimensions.supplemental_dimension_references:
|
|
250
|
+
if ref.dimension_id == dim.model.dimension_id and ref.version == key.version:
|
|
251
|
+
ref.version = dim.model.version
|
|
252
|
+
updated = True
|
|
253
|
+
break
|
|
254
|
+
if updated and config.model.project_id not in updated_projects:
|
|
255
|
+
updated_projects[config.model.project_id] = config
|
|
256
|
+
|
|
257
|
+
def _update_projects_with_new_dimension_mappings(
|
|
258
|
+
self,
|
|
259
|
+
updated_projects: dict[str, ProjectConfig],
|
|
260
|
+
mappings: dict[ConfigKey, MappingTableConfig],
|
|
261
|
+
context: RegistrationContext,
|
|
262
|
+
) -> None:
|
|
263
|
+
"""Updates the latest project configurations in place if they consume the mappings.
|
|
264
|
+
Edits updated_projects as necessary.
|
|
265
|
+
"""
|
|
266
|
+
for key, mapping in mappings.items():
|
|
267
|
+
for model in self._db.get_containing_models(
|
|
268
|
+
context.connection,
|
|
269
|
+
mapping.model,
|
|
270
|
+
version=key.version,
|
|
271
|
+
parent_model_type=RegistryType.PROJECT,
|
|
272
|
+
)[RegistryType.PROJECT]:
|
|
273
|
+
config = updated_projects.get(
|
|
274
|
+
model.project_id, self._project_mgr.get_by_id(model.project_id)
|
|
275
|
+
)
|
|
276
|
+
updated = False
|
|
277
|
+
for ref in config.model.dimension_mappings.base_to_supplemental_references:
|
|
278
|
+
if ref.mapping_id == mapping.model.mapping_id and ref.version == key.version:
|
|
279
|
+
ref.version = mapping.model.version
|
|
280
|
+
updated = True
|
|
281
|
+
break
|
|
282
|
+
for ref in config.model.dimensions.supplemental_dimension_references:
|
|
283
|
+
if ref.dimension_id == mapping.model.mapping_id and ref.version == key.version:
|
|
284
|
+
ref.version = mapping.model.version
|
|
285
|
+
updated = True
|
|
286
|
+
break
|
|
287
|
+
if updated and config.model.project_id not in updated_projects:
|
|
288
|
+
updated_projects[config.model.project_id] = config
|
|
289
|
+
|
|
290
|
+
def _update_projects_with_new_datasets(
|
|
291
|
+
self,
|
|
292
|
+
updated_projects: dict[str, ProjectConfig],
|
|
293
|
+
datasets: dict[ConfigKey, DatasetConfig],
|
|
294
|
+
context: RegistrationContext,
|
|
295
|
+
) -> None:
|
|
296
|
+
for key, dataset in datasets.items():
|
|
297
|
+
for model in self._db.get_containing_models(
|
|
298
|
+
context.connection,
|
|
299
|
+
dataset.model,
|
|
300
|
+
version=key.version,
|
|
301
|
+
parent_model_type=RegistryType.PROJECT,
|
|
302
|
+
)[RegistryType.PROJECT]:
|
|
303
|
+
config = updated_projects.get(
|
|
304
|
+
model.project_id, self._project_mgr.get_by_id(model.project_id)
|
|
305
|
+
)
|
|
306
|
+
updated = False
|
|
307
|
+
for dataset_ in config.model.datasets:
|
|
308
|
+
if (
|
|
309
|
+
dataset_.dataset_id == dataset.model.dataset_id
|
|
310
|
+
and dataset_.version == key.version
|
|
311
|
+
):
|
|
312
|
+
dataset_.version = dataset.model.version
|
|
313
|
+
updated = True
|
|
314
|
+
break
|
|
315
|
+
if updated and config.model.project_id not in updated_projects:
|
|
316
|
+
updated_projects[config.model.project_id] = config
|