dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. build_backend.py +93 -0
  2. dsgrid/__init__.py +22 -0
  3. dsgrid/api/__init__.py +0 -0
  4. dsgrid/api/api_manager.py +179 -0
  5. dsgrid/api/app.py +419 -0
  6. dsgrid/api/models.py +60 -0
  7. dsgrid/api/response_models.py +116 -0
  8. dsgrid/apps/__init__.py +0 -0
  9. dsgrid/apps/project_viewer/app.py +216 -0
  10. dsgrid/apps/registration_gui.py +444 -0
  11. dsgrid/chronify.py +32 -0
  12. dsgrid/cli/__init__.py +0 -0
  13. dsgrid/cli/common.py +120 -0
  14. dsgrid/cli/config.py +176 -0
  15. dsgrid/cli/download.py +13 -0
  16. dsgrid/cli/dsgrid.py +157 -0
  17. dsgrid/cli/dsgrid_admin.py +92 -0
  18. dsgrid/cli/install_notebooks.py +62 -0
  19. dsgrid/cli/query.py +729 -0
  20. dsgrid/cli/registry.py +1862 -0
  21. dsgrid/cloud/__init__.py +0 -0
  22. dsgrid/cloud/cloud_storage_interface.py +140 -0
  23. dsgrid/cloud/factory.py +31 -0
  24. dsgrid/cloud/fake_storage_interface.py +37 -0
  25. dsgrid/cloud/s3_storage_interface.py +156 -0
  26. dsgrid/common.py +36 -0
  27. dsgrid/config/__init__.py +0 -0
  28. dsgrid/config/annual_time_dimension_config.py +194 -0
  29. dsgrid/config/common.py +142 -0
  30. dsgrid/config/config_base.py +148 -0
  31. dsgrid/config/dataset_config.py +907 -0
  32. dsgrid/config/dataset_schema_handler_factory.py +46 -0
  33. dsgrid/config/date_time_dimension_config.py +136 -0
  34. dsgrid/config/dimension_config.py +54 -0
  35. dsgrid/config/dimension_config_factory.py +65 -0
  36. dsgrid/config/dimension_mapping_base.py +350 -0
  37. dsgrid/config/dimension_mappings_config.py +48 -0
  38. dsgrid/config/dimensions.py +1025 -0
  39. dsgrid/config/dimensions_config.py +71 -0
  40. dsgrid/config/file_schema.py +190 -0
  41. dsgrid/config/index_time_dimension_config.py +80 -0
  42. dsgrid/config/input_dataset_requirements.py +31 -0
  43. dsgrid/config/mapping_tables.py +209 -0
  44. dsgrid/config/noop_time_dimension_config.py +42 -0
  45. dsgrid/config/project_config.py +1462 -0
  46. dsgrid/config/registration_models.py +188 -0
  47. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  48. dsgrid/config/simple_models.py +49 -0
  49. dsgrid/config/supplemental_dimension.py +29 -0
  50. dsgrid/config/time_dimension_base_config.py +192 -0
  51. dsgrid/data_models.py +155 -0
  52. dsgrid/dataset/__init__.py +0 -0
  53. dsgrid/dataset/dataset.py +123 -0
  54. dsgrid/dataset/dataset_expression_handler.py +86 -0
  55. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  56. dsgrid/dataset/dataset_schema_handler_base.py +945 -0
  57. dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
  58. dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
  59. dsgrid/dataset/growth_rates.py +162 -0
  60. dsgrid/dataset/models.py +51 -0
  61. dsgrid/dataset/table_format_handler_base.py +257 -0
  62. dsgrid/dataset/table_format_handler_factory.py +17 -0
  63. dsgrid/dataset/unpivoted_table.py +121 -0
  64. dsgrid/dimension/__init__.py +0 -0
  65. dsgrid/dimension/base_models.py +230 -0
  66. dsgrid/dimension/dimension_filters.py +308 -0
  67. dsgrid/dimension/standard.py +252 -0
  68. dsgrid/dimension/time.py +352 -0
  69. dsgrid/dimension/time_utils.py +103 -0
  70. dsgrid/dsgrid_rc.py +88 -0
  71. dsgrid/exceptions.py +105 -0
  72. dsgrid/filesystem/__init__.py +0 -0
  73. dsgrid/filesystem/cloud_filesystem.py +32 -0
  74. dsgrid/filesystem/factory.py +32 -0
  75. dsgrid/filesystem/filesystem_interface.py +136 -0
  76. dsgrid/filesystem/local_filesystem.py +74 -0
  77. dsgrid/filesystem/s3_filesystem.py +118 -0
  78. dsgrid/loggers.py +132 -0
  79. dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
  80. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
  81. dsgrid/notebooks/registration.ipynb +48 -0
  82. dsgrid/notebooks/start_notebook.sh +11 -0
  83. dsgrid/project.py +451 -0
  84. dsgrid/query/__init__.py +0 -0
  85. dsgrid/query/dataset_mapping_plan.py +142 -0
  86. dsgrid/query/derived_dataset.py +388 -0
  87. dsgrid/query/models.py +728 -0
  88. dsgrid/query/query_context.py +287 -0
  89. dsgrid/query/query_submitter.py +994 -0
  90. dsgrid/query/report_factory.py +19 -0
  91. dsgrid/query/report_peak_load.py +70 -0
  92. dsgrid/query/reports_base.py +20 -0
  93. dsgrid/registry/__init__.py +0 -0
  94. dsgrid/registry/bulk_register.py +165 -0
  95. dsgrid/registry/common.py +287 -0
  96. dsgrid/registry/config_update_checker_base.py +63 -0
  97. dsgrid/registry/data_store_factory.py +34 -0
  98. dsgrid/registry/data_store_interface.py +74 -0
  99. dsgrid/registry/dataset_config_generator.py +158 -0
  100. dsgrid/registry/dataset_registry_manager.py +950 -0
  101. dsgrid/registry/dataset_update_checker.py +16 -0
  102. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  103. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  104. dsgrid/registry/dimension_registry_manager.py +413 -0
  105. dsgrid/registry/dimension_update_checker.py +16 -0
  106. dsgrid/registry/duckdb_data_store.py +207 -0
  107. dsgrid/registry/filesystem_data_store.py +150 -0
  108. dsgrid/registry/filter_registry_manager.py +123 -0
  109. dsgrid/registry/project_config_generator.py +57 -0
  110. dsgrid/registry/project_registry_manager.py +1623 -0
  111. dsgrid/registry/project_update_checker.py +48 -0
  112. dsgrid/registry/registration_context.py +223 -0
  113. dsgrid/registry/registry_auto_updater.py +316 -0
  114. dsgrid/registry/registry_database.py +667 -0
  115. dsgrid/registry/registry_interface.py +446 -0
  116. dsgrid/registry/registry_manager.py +558 -0
  117. dsgrid/registry/registry_manager_base.py +367 -0
  118. dsgrid/registry/versioning.py +92 -0
  119. dsgrid/rust_ext/__init__.py +14 -0
  120. dsgrid/rust_ext/find_minimal_patterns.py +129 -0
  121. dsgrid/spark/__init__.py +0 -0
  122. dsgrid/spark/functions.py +589 -0
  123. dsgrid/spark/types.py +110 -0
  124. dsgrid/tests/__init__.py +0 -0
  125. dsgrid/tests/common.py +140 -0
  126. dsgrid/tests/make_us_data_registry.py +265 -0
  127. dsgrid/tests/register_derived_datasets.py +103 -0
  128. dsgrid/tests/utils.py +25 -0
  129. dsgrid/time/__init__.py +0 -0
  130. dsgrid/time/time_conversions.py +80 -0
  131. dsgrid/time/types.py +67 -0
  132. dsgrid/units/__init__.py +0 -0
  133. dsgrid/units/constants.py +113 -0
  134. dsgrid/units/convert.py +71 -0
  135. dsgrid/units/energy.py +145 -0
  136. dsgrid/units/power.py +87 -0
  137. dsgrid/utils/__init__.py +0 -0
  138. dsgrid/utils/dataset.py +830 -0
  139. dsgrid/utils/files.py +179 -0
  140. dsgrid/utils/filters.py +125 -0
  141. dsgrid/utils/id_remappings.py +100 -0
  142. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  143. dsgrid/utils/py_expression_eval/README.md +8 -0
  144. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  145. dsgrid/utils/py_expression_eval/tests.py +283 -0
  146. dsgrid/utils/run_command.py +70 -0
  147. dsgrid/utils/scratch_dir_context.py +65 -0
  148. dsgrid/utils/spark.py +918 -0
  149. dsgrid/utils/spark_partition.py +98 -0
  150. dsgrid/utils/timing.py +239 -0
  151. dsgrid/utils/utilities.py +221 -0
  152. dsgrid/utils/versioning.py +36 -0
  153. dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
  154. dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
  155. dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
  156. dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
  157. dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,48 @@
1
+ import logging
2
+
3
+ from dsgrid.exceptions import DSGInvalidRegistryState
4
+ from .config_update_checker_base import ConfigUpdateCheckerBase
5
+ from .common import DatasetRegistryStatus, ProjectRegistryStatus
6
+
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class ProjectUpdateChecker(ConfigUpdateCheckerBase):
12
+ """Handles update checks for projects."""
13
+
14
+ _ALLOWED_UPDATE_STATUSES = (
15
+ ProjectRegistryStatus.INITIAL_REGISTRATION,
16
+ ProjectRegistryStatus.IN_PROGRESS,
17
+ ProjectRegistryStatus.COMPLETE,
18
+ )
19
+ _REQUIRES_DATASET_UNREGISTRATION = (
20
+ "dimensions",
21
+ "dimension_mappings",
22
+ )
23
+
24
+ def check_preconditions(self):
25
+ if self._old_model.status not in self._ALLOWED_UPDATE_STATUSES:
26
+ msg = f"project status={self._old_model.status} must be one of {self._ALLOWED_UPDATE_STATUSES} in order to update"
27
+ raise DSGInvalidRegistryState(msg)
28
+
29
+ def handle_postconditions(self):
30
+ # TODO #191: detect changes to required dimensions for each dataset.
31
+ changes = set(self._REQUIRES_DATASET_UNREGISTRATION).intersection(self._changed_fields)
32
+ if changes:
33
+ for dataset in self._new_model.datasets:
34
+ if dataset.status == DatasetRegistryStatus.REGISTERED:
35
+ dataset.status = DatasetRegistryStatus.UNREGISTERED
36
+ logger.warning(
37
+ "Set all datasets in %s to unregistered because of changes=%s. "
38
+ "They must be re-submitted.",
39
+ self._new_model.project_id,
40
+ changes,
41
+ )
42
+ if self._new_model.status == ProjectRegistryStatus.COMPLETE:
43
+ self._new_model.status = ProjectRegistryStatus.IN_PROGRESS
44
+ logger.warning(
45
+ "Set project status to %s because of changes=%s.",
46
+ self._new_model.status,
47
+ changes,
48
+ )
@@ -0,0 +1,223 @@
1
+ import getpass
2
+ import logging
3
+ from datetime import datetime
4
+ from typing import Self
5
+
6
+ from sqlalchemy import Connection
7
+
8
+ from dsgrid.exceptions import DSGInvalidParameter
9
+ from dsgrid.spark.functions import drop_temp_tables_and_views
10
+ from dsgrid.registry.common import RegistrationModel, RegistryType, VersionUpdateType
11
+ from dsgrid.registry.registry_interface import RegistryInterfaceBase
12
+ from dsgrid.utils.timing import timer_stats_collector, track_timing
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class RegistrationContext:
19
+ """Maintains state information across a multi-config registration process."""
20
+
21
+ def __init__(
22
+ self,
23
+ db: RegistryInterfaceBase,
24
+ log_message: str,
25
+ update_type: VersionUpdateType,
26
+ submitter: str | None,
27
+ ):
28
+ self._conn: Connection | None = None
29
+ self._db = db
30
+ self._registration = RegistrationModel(
31
+ timestamp=datetime.now(),
32
+ submitter=submitter or getpass.getuser(),
33
+ log_message=log_message,
34
+ update_type=update_type,
35
+ )
36
+ self._managers: dict[RegistryType, RegistryManagerContext | None] = {
37
+ # This order is required for cleanup in self.finalize().
38
+ RegistryType.PROJECT: None,
39
+ RegistryType.DATASET: None,
40
+ RegistryType.DIMENSION_MAPPING: None,
41
+ RegistryType.DIMENSION: None,
42
+ }
43
+
44
+ def __del__(self):
45
+ for registry_type in RegistryType:
46
+ manager = self._managers.get(registry_type)
47
+ if manager is not None:
48
+ logger.warning(
49
+ "RegistrationContext destructed with a reference to %s manager",
50
+ registry_type.value,
51
+ )
52
+ if not manager.offline_mode and manager.has_lock():
53
+ logger.error(
54
+ "RegistrationContext destructed with a lock on the remote registry. "
55
+ "Please contact the dsgrid team. Type=%s IDs=%s",
56
+ registry_type.value,
57
+ manager.ids,
58
+ )
59
+
60
+ def __enter__(self) -> Self:
61
+ self._conn = self._db.engine.connect()
62
+ self._registration = self._db.insert_registration(self._conn, self._registration)
63
+ return self
64
+
65
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
66
+ if self._conn is None:
67
+ return
68
+ try:
69
+ if exc_type is None:
70
+ self.finalize(False)
71
+ self._conn.commit()
72
+ else:
73
+ # Order is important. Don't rollback the configs until dataset files are deleted.
74
+ self.finalize(True)
75
+ self._conn.rollback()
76
+ finally:
77
+ self._conn.close()
78
+
79
+ @property
80
+ def connection(self) -> Connection:
81
+ """Return the active sqlalchemy connection."""
82
+ assert self._conn is not None
83
+ return self._conn
84
+
85
+ @property
86
+ def registration(self) -> RegistrationModel:
87
+ """Return the registration entry for this context."""
88
+ return self._registration
89
+
90
+ def add_id(self, registry_type: RegistryType, config_id: str, manager):
91
+ """Add a config ID that has been registered.
92
+
93
+ Parameters
94
+ ----------
95
+ registry_type : RegistryType
96
+ config_id : str
97
+ manager : RegistryManagerBase
98
+
99
+ Raises
100
+ ------
101
+ DSGInvalidParameter
102
+ Raised if the config ID is already stored.
103
+
104
+ """
105
+ self.add_ids(registry_type, [config_id], manager)
106
+
107
+ def add_ids(self, registry_type: RegistryType, config_ids: list[str], manager):
108
+ """Add multiple config IDs that have been registered.
109
+
110
+ Parameters
111
+ ----------
112
+ registry_type : RegistryType
113
+ config_ids : list[str]
114
+ manager : RegistryManagerBase
115
+
116
+ Raises
117
+ ------
118
+ DSGInvalidParameter
119
+ Raised if a config ID is already stored.
120
+
121
+ """
122
+ manager_context = self._managers[registry_type]
123
+ if manager_context is None:
124
+ manager_context = RegistryManagerContext(manager)
125
+ self._managers[registry_type] = manager_context
126
+ # manager.acquire_registry_locks(config_ids)
127
+ # manager_context.set_locked()
128
+
129
+ diff = set(config_ids).intersection(manager_context.ids)
130
+ if diff:
131
+ msg = f"One or more config IDs are already tracked: {registry_type} {diff}"
132
+ raise DSGInvalidParameter(msg)
133
+
134
+ logger.debug("Added registered IDs: %s %s", registry_type, config_ids)
135
+ manager_context.ids += config_ids
136
+
137
+ def get_ids(self, registry_type: RegistryType):
138
+ """Return the config IDs for registry_type that have been registered with this context.
139
+
140
+ Parameters
141
+ ----------
142
+ registry_type : RegistryType
143
+
144
+ Returns
145
+ -------
146
+ list[str]
147
+
148
+ """
149
+ manager_context = self._managers[registry_type]
150
+ assert manager_context is not None, registry_type
151
+ return manager_context.ids
152
+
153
+ @track_timing(timer_stats_collector)
154
+ def finalize(self, error_occurred: bool):
155
+ """Perform final registration actions. If successful, sync all newly-registered configs
156
+ and data with the remote registry. If there was an error, remove all intermediate
157
+ registrations.
158
+ """
159
+ try:
160
+ drop_temp_tables_and_views()
161
+ for registry_type, manager_context in self._managers.items():
162
+ if manager_context is not None:
163
+ if manager_context.ids:
164
+ manager_context.manager.finalize_registration(
165
+ self._conn, set(manager_context.ids), error_occurred
166
+ )
167
+ manager_context.ids.clear()
168
+ # manager_context.set_unlocked()
169
+ self._managers[registry_type] = None
170
+ except Exception:
171
+ logger.exception(
172
+ "An unexpected error occurred in finalize_registration. "
173
+ "Please notify the dsgrid team because registry recovery may be required."
174
+ )
175
+ raise
176
+
177
+
178
+ class RegistryManagerContext:
179
+ """Maintains state for one registry type."""
180
+
181
+ def __init__(self, manager):
182
+ self._manager = manager
183
+ self._has_lock = False
184
+ self._ids = []
185
+
186
+ def has_lock(self) -> bool:
187
+ """Return True if the manager has acquired a lock on the remote registry."""
188
+ return self._has_lock
189
+
190
+ def set_locked(self):
191
+ """Call when a lock has been acquired on the remote registry."""
192
+ logger.debug("Locks acquired on remote registry for %s", self._manager.__class__.__name__)
193
+ self._has_lock = True
194
+
195
+ def set_unlocked(self):
196
+ """Call when all locks have been released on the remote registry."""
197
+ logger.debug("Locks released on remote registry for %s", self._manager.__class__.__name__)
198
+ self._has_lock = False
199
+
200
+ @property
201
+ def ids(self):
202
+ """Return a list of config IDs being managed."""
203
+ return self._ids
204
+
205
+ @ids.setter
206
+ def ids(self, val):
207
+ """Return a list of config IDs being managed."""
208
+ self._ids = val
209
+
210
+ @property
211
+ def manager(self):
212
+ """Return a RegistryManagerBase"""
213
+ return self._manager
214
+
215
+ @manager.setter
216
+ def manager(self, val):
217
+ """Set the RegistryManagerBase"""
218
+ self._manager = val
219
+
220
+ @property
221
+ def offline_mode(self):
222
+ """Return True if the manager is in offline mode."""
223
+ return self._manager.offline_mode
@@ -0,0 +1,316 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ from dsgrid.config.mapping_tables import MappingTableConfig, MappingTableModel
5
+ from dsgrid.config.dataset_config import DatasetConfig, DatasetConfigModel
6
+ from dsgrid.config.dimension_config import DimensionConfig
7
+ from dsgrid.config.project_config import ProjectConfig
8
+ from dsgrid.exceptions import DSGInvalidParameter
9
+ from dsgrid.registry.registration_context import RegistrationContext
10
+ from dsgrid.registry.common import (
11
+ ConfigKey,
12
+ RegistryType,
13
+ VersionUpdateType,
14
+ )
15
+ from dsgrid.registry.registry_manager import RegistryManager
16
+
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class RegistryAutoUpdater:
22
+ """Performs auto-updates on the registry."""
23
+
24
+ def __init__(self, manager: RegistryManager) -> None:
25
+ self._project_mgr = manager.project_manager
26
+ self._dataset_mgr = manager.dataset_manager
27
+ self._dimension_mgr = manager.dimension_manager
28
+ self._dimension_mapping_mgr = manager.dimension_mapping_manager
29
+ self._db = self._dimension_mapping_mgr.db
30
+
31
+ def update_dependent_configs(
32
+ self,
33
+ config,
34
+ original_version: str,
35
+ update_type: VersionUpdateType,
36
+ log_message: str,
37
+ submitter: str | None = None,
38
+ ):
39
+ """Update all configs that consume this config. Recursive.
40
+ This is an in incomplete, experimental feature, and is subject to change.
41
+ Should only be called by an admin that understands the consequences.
42
+ Passing a dimension may trigger an update to a project and a dimension mapping.
43
+ The change to that dimension mapping may trigger another update to the project.
44
+ This guarantees that each config version will only be bumped once.
45
+
46
+ It is up to the caller to ensure changes are synced to the remote registry if not in
47
+ offline mode.
48
+
49
+ Datasets likely need to be resubmitted to their projects.
50
+
51
+ Parameters
52
+ ----------
53
+ config : ConfigBase
54
+ original_version : str
55
+ Original version of the config
56
+ update_type : VersionUpdateType
57
+ log_message : str
58
+ """
59
+ with RegistrationContext(
60
+ self._project_mgr.db, log_message, update_type, submitter
61
+ ) as context:
62
+ return self.update_dependent_configs_with_context(config, original_version, context)
63
+
64
+ def update_dependent_configs_with_context(
65
+ self, config, original_version: str, context: RegistrationContext
66
+ ):
67
+ if isinstance(config, DimensionConfig):
68
+ self._update_dimension_users(config, original_version, context)
69
+ elif isinstance(config, MappingTableConfig):
70
+ self._update_dimension_mapping_users(config, original_version, context)
71
+ elif isinstance(config, DatasetConfig):
72
+ self._update_dataset_users(config, original_version, context)
73
+ else:
74
+ msg = f"Updates of configs dependent on {type(config)}"
75
+ raise NotImplementedError(msg)
76
+
77
+ def _update_dimension_users(
78
+ self,
79
+ config: DimensionConfig,
80
+ original_version: str,
81
+ context: RegistrationContext,
82
+ ):
83
+ # Order is important because
84
+ # - dimension mappings may have this dimension.
85
+ # - datasets may have this dimension.
86
+ # - projects may have this dimension as well as updated mappings and datasets.
87
+ new_mappings = {}
88
+ new_datasets = {}
89
+
90
+ if config.model.version == original_version:
91
+ msg = f"current version cannot be the same as the original: {original_version}"
92
+ raise DSGInvalidParameter(msg)
93
+
94
+ affected = self._db.get_containing_models(
95
+ context.connection, config.model, version=original_version
96
+ )
97
+ for mapping in self._update_dimension_mappings_with_dimensions(
98
+ affected, config, original_version
99
+ ):
100
+ key = ConfigKey(mapping.model.mapping_id, mapping.model.version)
101
+ new_mapping = self._dimension_mapping_mgr.update_with_context(mapping, context)
102
+ assert key not in new_mappings
103
+ new_mappings[key] = new_mapping
104
+ logger.info(
105
+ "Updated dimension mapping %s to %s as a result of dimension update",
106
+ new_mapping.model.mapping_id,
107
+ new_mapping.model.version,
108
+ )
109
+ for dataset in self._update_datasets_with_dimensions(affected, config, original_version):
110
+ key = ConfigKey(dataset.model.dataset_id, dataset.model.version)
111
+ new_dataset = self._dataset_mgr.update_with_context(dataset, context)
112
+ assert key not in new_datasets
113
+ new_datasets[key] = new_dataset
114
+ logger.info(
115
+ "Updated dataset %s to %s as a result of dimension update",
116
+ new_dataset.model.dataset_id,
117
+ new_dataset.model.version,
118
+ )
119
+
120
+ self._update_projects(
121
+ context,
122
+ dimensions={ConfigKey(config.model.dimension_id, original_version): config},
123
+ dimension_mappings=new_mappings,
124
+ datasets=new_datasets,
125
+ )
126
+
127
+ def _update_dimension_mapping_users(
128
+ self,
129
+ config: MappingTableConfig,
130
+ original_version: str,
131
+ context: RegistrationContext,
132
+ ) -> None:
133
+ self._update_projects(
134
+ context,
135
+ dimension_mappings={ConfigKey(config.model.mapping_id, original_version): config},
136
+ )
137
+
138
+ def _update_dataset_users(
139
+ self,
140
+ config: DatasetConfig,
141
+ original_version: str,
142
+ context: RegistrationContext,
143
+ ) -> None:
144
+ self._update_projects(
145
+ context,
146
+ datasets={ConfigKey(config.model.dataset_id, original_version): config},
147
+ )
148
+
149
+ def _update_dimension_mappings_with_dimensions(
150
+ self,
151
+ affected: dict[RegistryType, list[Any]],
152
+ dim: DimensionConfig,
153
+ original_version: str,
154
+ ) -> list[MappingTableConfig]:
155
+ mapping_updates: list[MappingTableConfig] = []
156
+ for model in affected[RegistryType.DIMENSION_MAPPING]:
157
+ assert isinstance(model, MappingTableModel)
158
+ updated = False
159
+ config = self._dimension_mapping_mgr.get_by_id(model.mapping_id, model.version)
160
+ if (
161
+ config.model.from_dimension.dimension_id == dim.model.dimension_id
162
+ and config.model.from_dimension.version == original_version
163
+ ):
164
+ config.model.from_dimension.version = dim.model.version
165
+ updated = True
166
+ elif (
167
+ config.model.to_dimension.dimension_id == dim.model.dimension_id
168
+ and config.model.from_dimension.version == original_version
169
+ ):
170
+ config.model.to_dimension.version = dim.model.version
171
+ updated = True
172
+ if updated:
173
+ mapping_updates.append(config)
174
+
175
+ return mapping_updates
176
+
177
+ def _update_datasets_with_dimensions(
178
+ self,
179
+ affected: dict[RegistryType, list[Any]],
180
+ dim: DimensionConfig,
181
+ original_version: str,
182
+ ) -> list[DatasetConfig]:
183
+ new_datasets = []
184
+ for model in affected[RegistryType.DATASET]:
185
+ assert isinstance(model, DatasetConfigModel)
186
+ updated = False
187
+ config = self._dataset_mgr.get_by_id(model.dataset_id, model.version)
188
+ for ref in config.model.dimension_references:
189
+ if ref.dimension_id == dim.model.dimension_id and ref.version == original_version:
190
+ ref.version = dim.model.version
191
+ updated = True
192
+ if updated:
193
+ new_datasets.append(config)
194
+
195
+ return new_datasets
196
+
197
+ def _update_projects(
198
+ self,
199
+ context: RegistrationContext,
200
+ dimensions: dict[ConfigKey, DimensionConfig] | None = None,
201
+ dimension_mappings: dict[ConfigKey, MappingTableConfig] | None = None,
202
+ datasets: dict[ConfigKey, DatasetConfig] | None = None,
203
+ ) -> None:
204
+ updated_projects = {}
205
+ if dimensions is not None:
206
+ self._update_projects_with_new_dimensions(updated_projects, dimensions, context)
207
+ if dimension_mappings is not None:
208
+ self._update_projects_with_new_dimension_mappings(
209
+ updated_projects, dimension_mappings, context
210
+ )
211
+ if datasets is not None:
212
+ self._update_projects_with_new_datasets(updated_projects, datasets, context)
213
+
214
+ for project_config in updated_projects.values():
215
+ new_project = self._project_mgr.update_with_context(project_config, context)
216
+ logger.info(
217
+ "Updated project %s to %s as a result of dependent config updates.",
218
+ new_project.model.project_id,
219
+ new_project.model.version,
220
+ )
221
+ # TODO: Re-submit changed datasets to projects. dataset-to-project mappings might
222
+ # take some work.
223
+
224
+ def _update_projects_with_new_dimensions(
225
+ self,
226
+ updated_projects: dict[str, ProjectConfig],
227
+ dimensions: dict[ConfigKey, DimensionConfig],
228
+ context: RegistrationContext,
229
+ ) -> None:
230
+ """Updates the latest project configurations in place if they consume the dimensions.
231
+ Edits updated_projects as necessary.
232
+ """
233
+ for key, dim in dimensions.items():
234
+ for model in self._db.get_containing_models(
235
+ context.connection,
236
+ dim.model,
237
+ version=key.version,
238
+ parent_model_type=RegistryType.PROJECT,
239
+ )[RegistryType.PROJECT]:
240
+ config = updated_projects.get(
241
+ model.project_id, self._project_mgr.get_by_id(model.project_id)
242
+ )
243
+ updated = False
244
+ for ref in config.model.dimensions.base_dimension_references:
245
+ if ref.dimension_id == dim.model.dimension_id and ref.version == key.version:
246
+ ref.version = dim.model.version
247
+ updated = True
248
+ break
249
+ for ref in config.model.dimensions.supplemental_dimension_references:
250
+ if ref.dimension_id == dim.model.dimension_id and ref.version == key.version:
251
+ ref.version = dim.model.version
252
+ updated = True
253
+ break
254
+ if updated and config.model.project_id not in updated_projects:
255
+ updated_projects[config.model.project_id] = config
256
+
257
+ def _update_projects_with_new_dimension_mappings(
258
+ self,
259
+ updated_projects: dict[str, ProjectConfig],
260
+ mappings: dict[ConfigKey, MappingTableConfig],
261
+ context: RegistrationContext,
262
+ ) -> None:
263
+ """Updates the latest project configurations in place if they consume the mappings.
264
+ Edits updated_projects as necessary.
265
+ """
266
+ for key, mapping in mappings.items():
267
+ for model in self._db.get_containing_models(
268
+ context.connection,
269
+ mapping.model,
270
+ version=key.version,
271
+ parent_model_type=RegistryType.PROJECT,
272
+ )[RegistryType.PROJECT]:
273
+ config = updated_projects.get(
274
+ model.project_id, self._project_mgr.get_by_id(model.project_id)
275
+ )
276
+ updated = False
277
+ for ref in config.model.dimension_mappings.base_to_supplemental_references:
278
+ if ref.mapping_id == mapping.model.mapping_id and ref.version == key.version:
279
+ ref.version = mapping.model.version
280
+ updated = True
281
+ break
282
+ for ref in config.model.dimensions.supplemental_dimension_references:
283
+ if ref.dimension_id == mapping.model.mapping_id and ref.version == key.version:
284
+ ref.version = mapping.model.version
285
+ updated = True
286
+ break
287
+ if updated and config.model.project_id not in updated_projects:
288
+ updated_projects[config.model.project_id] = config
289
+
290
+ def _update_projects_with_new_datasets(
291
+ self,
292
+ updated_projects: dict[str, ProjectConfig],
293
+ datasets: dict[ConfigKey, DatasetConfig],
294
+ context: RegistrationContext,
295
+ ) -> None:
296
+ for key, dataset in datasets.items():
297
+ for model in self._db.get_containing_models(
298
+ context.connection,
299
+ dataset.model,
300
+ version=key.version,
301
+ parent_model_type=RegistryType.PROJECT,
302
+ )[RegistryType.PROJECT]:
303
+ config = updated_projects.get(
304
+ model.project_id, self._project_mgr.get_by_id(model.project_id)
305
+ )
306
+ updated = False
307
+ for dataset_ in config.model.datasets:
308
+ if (
309
+ dataset_.dataset_id == dataset.model.dataset_id
310
+ and dataset_.version == key.version
311
+ ):
312
+ dataset_.version = dataset.model.version
313
+ updated = True
314
+ break
315
+ if updated and config.model.project_id not in updated_projects:
316
+ updated_projects[config.model.project_id] = config