dsgrid-toolkit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dsgrid-toolkit might be problematic. Click here for more details.
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +420 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +22 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +177 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +142 -0
- dsgrid/cli/dsgrid_admin.py +349 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +711 -0
- dsgrid/cli/registry.py +1773 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +35 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +187 -0
- dsgrid/config/common.py +131 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +684 -0
- dsgrid/config/dataset_schema_handler_factory.py +41 -0
- dsgrid/config/date_time_dimension_config.py +108 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +349 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +775 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/index_time_dimension_config.py +76 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1457 -0
- dsgrid/config/registration_models.py +199 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +200 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +899 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +196 -0
- dsgrid/dataset/dataset_schema_handler_standard.py +303 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +44 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +218 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +213 -0
- dsgrid/dimension/time.py +531 -0
- dsgrid/dimension/time_utils.py +88 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +950 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +384 -0
- dsgrid/query/models.py +726 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +847 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +161 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +69 -0
- dsgrid/registry/dataset_config_generator.py +156 -0
- dsgrid/registry/dataset_registry_manager.py +734 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +185 -0
- dsgrid/registry/filesystem_data_store.py +141 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1616 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +662 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +544 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +545 -0
- dsgrid/spark/types.py +50 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +139 -0
- dsgrid/tests/make_us_data_registry.py +204 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +612 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +64 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +184 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.2.0.dist-info/METADATA +216 -0
- dsgrid_toolkit-0.2.0.dist-info/RECORD +152 -0
- dsgrid_toolkit-0.2.0.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.2.0.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.2.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import getpass
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from IPython.display import display, HTML
|
|
9
|
+
import ipywidgets as widgets
|
|
10
|
+
|
|
11
|
+
from dsgrid.common import REMOTE_REGISTRY, LOCAL_REGISTRY
|
|
12
|
+
from dsgrid.exceptions import DSGBaseException
|
|
13
|
+
from dsgrid.registry.registry_database import DatabaseConnection
|
|
14
|
+
from dsgrid.registry.registry_manager import RegistryManager
|
|
15
|
+
from dsgrid.loggers import setup_logging
|
|
16
|
+
from dsgrid.spark.types import SparkSession
|
|
17
|
+
from dsgrid.utils.spark import init_spark
|
|
18
|
+
|
|
19
|
+
SS_PROJECT = "https://github.com/dsgrid/dsgrid-project-StandardScenarios/blob/main/dsgrid_project/project.json5"
|
|
20
|
+
RS_DATASET = "https://github.com/dsgrid/dsgrid-project-StandardScenarios/blob/main/dsgrid_project/datasets/modeled/resstock/dataset.json5"
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RegistrationGui:
|
|
26
|
+
"""Provides a UI for registering dsgrid projects and datasets."""
|
|
27
|
+
|
|
28
|
+
DEFAULTS = {
|
|
29
|
+
"remote_registry": REMOTE_REGISTRY,
|
|
30
|
+
"local_registry": LOCAL_REGISTRY,
|
|
31
|
+
"project_file": "",
|
|
32
|
+
"dataset_file": "",
|
|
33
|
+
"dataset_path": "",
|
|
34
|
+
"dimension_mapping_file": "",
|
|
35
|
+
"dimensions_filter": "",
|
|
36
|
+
"log_file": Path(os.environ.get("DSGRID_LOG_FILE_PATH", ".")) / "dsgrid.log",
|
|
37
|
+
"log_message": "",
|
|
38
|
+
"spark_cluster": os.environ.get("SPARK_CLUSTER", "local mode"),
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def __init__(self, defaults=None):
|
|
42
|
+
self._manager = None
|
|
43
|
+
self._defaults = copy.deepcopy(self.DEFAULTS)
|
|
44
|
+
if defaults is not None:
|
|
45
|
+
self._defaults.update(defaults)
|
|
46
|
+
self._project_ids = [""]
|
|
47
|
+
self._make_widgets()
|
|
48
|
+
self._display_widgets()
|
|
49
|
+
self._tables_out = widgets.Output()
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def manager(self):
|
|
53
|
+
return self._manager
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def dimension_manager(self):
|
|
57
|
+
return self._manager.dimension_manager
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def dimension_mapping_manager(self):
|
|
61
|
+
return self._manager.dimension_mapping_manager
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def dataset_manager(self):
|
|
65
|
+
return self._manager.dataset_manager
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def project_manager(self):
|
|
69
|
+
return self._manager.project_manager
|
|
70
|
+
|
|
71
|
+
def _make_widgets(self):
|
|
72
|
+
self._main_label = widgets.HTML("<b>dsgrid Registration Tool</b>")
|
|
73
|
+
text_layout = widgets.Layout(width="400px")
|
|
74
|
+
button_layout = widgets.Layout(width="200px")
|
|
75
|
+
self._remote_path_text = widgets.Text(
|
|
76
|
+
str(self._defaults["remote_registry"]),
|
|
77
|
+
description="Remote registry",
|
|
78
|
+
layout=text_layout,
|
|
79
|
+
)
|
|
80
|
+
self._local_path_text = widgets.Text(
|
|
81
|
+
str(self._defaults["local_registry"]),
|
|
82
|
+
description="Local registry",
|
|
83
|
+
layout=text_layout,
|
|
84
|
+
)
|
|
85
|
+
self._spark_cluster_text = widgets.Text(
|
|
86
|
+
self._defaults["spark_cluster"],
|
|
87
|
+
description="Spark cluster",
|
|
88
|
+
layout=text_layout,
|
|
89
|
+
)
|
|
90
|
+
log_file = self._defaults["log_file"]
|
|
91
|
+
# TODO: setup detection of changes to this text box and reconfigure logging
|
|
92
|
+
self._log_file_text = widgets.Text(
|
|
93
|
+
str(log_file),
|
|
94
|
+
description="Log file",
|
|
95
|
+
layout=text_layout,
|
|
96
|
+
)
|
|
97
|
+
self._online_mode_cbox = widgets.Checkbox(
|
|
98
|
+
value=False,
|
|
99
|
+
description="Online mode",
|
|
100
|
+
)
|
|
101
|
+
self._online_mode_cbox.observe(self._on_online_click, names="value")
|
|
102
|
+
self._sync_cbox = widgets.Checkbox(
|
|
103
|
+
value=True,
|
|
104
|
+
description="Sync pull",
|
|
105
|
+
)
|
|
106
|
+
self._load_btn = widgets.Button(description="Load registry", layout=button_layout)
|
|
107
|
+
self._load_btn.on_click(self._on_load_click)
|
|
108
|
+
self._register_project_btn = widgets.Button(
|
|
109
|
+
description="Register project", disabled=True, layout=button_layout
|
|
110
|
+
)
|
|
111
|
+
self._register_project_btn.on_click(self._on_register_project_click)
|
|
112
|
+
self._project_file_text = widgets.Text(
|
|
113
|
+
str(self._defaults["project_file"]),
|
|
114
|
+
description="Project File",
|
|
115
|
+
placeholder="project.json5",
|
|
116
|
+
)
|
|
117
|
+
self._project_file_ex = widgets.HTML(
|
|
118
|
+
f"<a href={SS_PROJECT} target='_blank'>Example: Standard Scenarios</a>"
|
|
119
|
+
)
|
|
120
|
+
self._register_and_submit_dataset_btn = widgets.Button(
|
|
121
|
+
description="Register and submit dataset", disabled=True, layout=button_layout
|
|
122
|
+
)
|
|
123
|
+
self._register_and_submit_dataset_btn.on_click(self._on_register_and_submit_dataset_click)
|
|
124
|
+
self._dataset_file_ex = widgets.HTML(
|
|
125
|
+
f"<a href={RS_DATASET} target='_blank'>Example: ResStock</a>"
|
|
126
|
+
)
|
|
127
|
+
self._dataset_file_text = widgets.Text(
|
|
128
|
+
str(self._defaults["dataset_file"]),
|
|
129
|
+
description="Dataset File",
|
|
130
|
+
placeholder="dataset.json5",
|
|
131
|
+
)
|
|
132
|
+
self._dataset_path_text = widgets.Text(
|
|
133
|
+
self._defaults["dataset_path"],
|
|
134
|
+
description="Dataset Path",
|
|
135
|
+
placeholder="load_data_path",
|
|
136
|
+
)
|
|
137
|
+
self._dimension_mapping_label = widgets.HTML("Dimension mapping file")
|
|
138
|
+
self._dimension_mapping_text = widgets.Text(
|
|
139
|
+
str(self._defaults["dimension_mapping_file"]), placeholder="dimension_mappings.json5"
|
|
140
|
+
)
|
|
141
|
+
self._dataset_project_id_dd = widgets.Dropdown(
|
|
142
|
+
description="Project ID",
|
|
143
|
+
options=self._project_ids,
|
|
144
|
+
value=self._project_ids[0],
|
|
145
|
+
disabled=True,
|
|
146
|
+
)
|
|
147
|
+
self._log_message_label = widgets.HTML("Registration log message")
|
|
148
|
+
self._log_message_text = widgets.Text(
|
|
149
|
+
self._defaults["log_message"], layout=widgets.Layout(width="400px")
|
|
150
|
+
)
|
|
151
|
+
self._show_projects_btn = widgets.Button(
|
|
152
|
+
disabled=True,
|
|
153
|
+
description="Show projects",
|
|
154
|
+
tooltip="Display a table showing all registered projects",
|
|
155
|
+
)
|
|
156
|
+
self._show_projects_btn.on_click(self._on_show_projects_click)
|
|
157
|
+
self._show_datasets_btn = widgets.Button(
|
|
158
|
+
disabled=True,
|
|
159
|
+
description="Show datasets",
|
|
160
|
+
tooltip="Display a table showing all registered datasets",
|
|
161
|
+
)
|
|
162
|
+
self._show_datasets_btn.on_click(self._on_show_datasets_click)
|
|
163
|
+
self._show_dimensions_btn = widgets.Button(
|
|
164
|
+
disabled=True,
|
|
165
|
+
description="Show dimensions",
|
|
166
|
+
tooltip="Display a table showing all registered dimensions",
|
|
167
|
+
)
|
|
168
|
+
self._show_dimensions_btn.on_click(self._on_show_dimensions_click)
|
|
169
|
+
self._dim_filter_message_text = widgets.HTML("Filter dimensions")
|
|
170
|
+
self._dimensions_filter_text = widgets.Text(
|
|
171
|
+
self._defaults["dimensions_filter"], placeholder="Type == geography"
|
|
172
|
+
)
|
|
173
|
+
self._project_dimensions_filter_text = widgets.HTML("Filter dimensions by project")
|
|
174
|
+
self._project_dimensions_filter_dd = widgets.Dropdown(
|
|
175
|
+
options=self._project_ids,
|
|
176
|
+
value=self._project_ids[0],
|
|
177
|
+
disabled=True,
|
|
178
|
+
)
|
|
179
|
+
self._show_dimension_mappings_btn = widgets.Button(
|
|
180
|
+
disabled=True,
|
|
181
|
+
description="Show mappings",
|
|
182
|
+
tooltip="Display a table showing all registered dimension mappings",
|
|
183
|
+
)
|
|
184
|
+
self._show_dimension_mappings_btn.on_click(self._on_show_dimension_mappings_click)
|
|
185
|
+
self._reset_tables_btn = widgets.Button(description="Reset tables")
|
|
186
|
+
self._reset_tables_btn.on_click(self._reset_tables_click)
|
|
187
|
+
self._reset_btn = widgets.Button(description="Reset all")
|
|
188
|
+
self._reset_btn.on_click(self._on_reset_click)
|
|
189
|
+
|
|
190
|
+
# Disabling because these tables are not well-formed.
|
|
191
|
+
# self._project_table = widgets.HTML(value="", description="Projects")
|
|
192
|
+
# self._dataset_table = widgets.HTML(value="", description="Datasets")
|
|
193
|
+
# self._dimension_table = widgets.HTML(value="", description="Dimensions")
|
|
194
|
+
# self._dimension_mapping_table = widgets.HTML(value="", description="Dimension Mappings")
|
|
195
|
+
|
|
196
|
+
def _display_widgets(self):
|
|
197
|
+
registry_box = widgets.VBox(
|
|
198
|
+
(
|
|
199
|
+
self._remote_path_text,
|
|
200
|
+
self._local_path_text,
|
|
201
|
+
self._spark_cluster_text,
|
|
202
|
+
self._log_file_text,
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
options_box = widgets.VBox((self._online_mode_cbox, self._sync_cbox))
|
|
206
|
+
|
|
207
|
+
register_project_box = widgets.HBox(
|
|
208
|
+
(self._register_project_btn, self._project_file_text, self._project_file_ex)
|
|
209
|
+
)
|
|
210
|
+
register_and_submit_dataset_box = widgets.HBox(
|
|
211
|
+
(
|
|
212
|
+
self._register_and_submit_dataset_btn,
|
|
213
|
+
widgets.VBox(
|
|
214
|
+
(
|
|
215
|
+
widgets.HBox((self._dataset_file_text, self._dataset_file_ex)),
|
|
216
|
+
self._dataset_path_text,
|
|
217
|
+
widgets.HBox(
|
|
218
|
+
(self._dimension_mapping_label, self._dimension_mapping_text)
|
|
219
|
+
),
|
|
220
|
+
self._dataset_project_id_dd,
|
|
221
|
+
),
|
|
222
|
+
),
|
|
223
|
+
),
|
|
224
|
+
)
|
|
225
|
+
log_box = widgets.HBox((self._log_message_label, self._log_message_text))
|
|
226
|
+
register_box = widgets.VBox(
|
|
227
|
+
(register_project_box, register_and_submit_dataset_box, log_box)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
show_dims_box = widgets.HBox(
|
|
231
|
+
(
|
|
232
|
+
self._show_dimensions_btn,
|
|
233
|
+
self._dim_filter_message_text,
|
|
234
|
+
self._dimensions_filter_text,
|
|
235
|
+
self._project_dimensions_filter_text,
|
|
236
|
+
self._project_dimensions_filter_dd,
|
|
237
|
+
)
|
|
238
|
+
)
|
|
239
|
+
show_box = widgets.VBox(
|
|
240
|
+
(
|
|
241
|
+
self._show_projects_btn,
|
|
242
|
+
self._show_datasets_btn,
|
|
243
|
+
show_dims_box,
|
|
244
|
+
self._show_dimension_mappings_btn,
|
|
245
|
+
# self._project_table,
|
|
246
|
+
# self._dataset_table,
|
|
247
|
+
# self._dimension_table,
|
|
248
|
+
# self._dimension_mapping_table,
|
|
249
|
+
self._reset_tables_btn,
|
|
250
|
+
)
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
display(
|
|
254
|
+
self._main_label,
|
|
255
|
+
widgets.HBox((registry_box, options_box)),
|
|
256
|
+
self._load_btn,
|
|
257
|
+
register_box,
|
|
258
|
+
show_box,
|
|
259
|
+
self._reset_btn,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def _enable_manager_actions(self):
|
|
263
|
+
self._register_project_btn.disabled = False
|
|
264
|
+
self._register_and_submit_dataset_btn.disabled = False
|
|
265
|
+
self._dataset_project_id_dd.disabled = False
|
|
266
|
+
self._show_projects_btn.disabled = False
|
|
267
|
+
self._show_datasets_btn.disabled = False
|
|
268
|
+
self._show_dimensions_btn.disabled = False
|
|
269
|
+
self._show_dimension_mappings_btn.disabled = False
|
|
270
|
+
self._project_dimensions_filter_dd.disabled = False
|
|
271
|
+
self._update_project_ids()
|
|
272
|
+
out = widgets.Output()
|
|
273
|
+
with out:
|
|
274
|
+
self._on_show_projects_click(self._show_projects_btn)
|
|
275
|
+
self._on_show_datasets_click(self._show_datasets_btn)
|
|
276
|
+
out.clear_output()
|
|
277
|
+
|
|
278
|
+
def _on_online_click(self, _):
|
|
279
|
+
# Syncing is always enabled when in online mode.
|
|
280
|
+
if self._online_mode_cbox.value:
|
|
281
|
+
self._sync_cbox.value = True
|
|
282
|
+
self._sync_cbox.disabled = self._online_mode_cbox.value
|
|
283
|
+
|
|
284
|
+
def _on_load_click(self, _):
|
|
285
|
+
# TODO: We should log to an Output widget that gets updated periodically.
|
|
286
|
+
logger = setup_logging(__name__, self._log_file_text.value, mode="a")
|
|
287
|
+
if (
|
|
288
|
+
self._spark_cluster_text.value not in ("local mode", "")
|
|
289
|
+
and SparkSession.getActiveSession() is None
|
|
290
|
+
):
|
|
291
|
+
os.environ["SPARK_CLUSTER"] = self._spark_cluster_text.value
|
|
292
|
+
out = widgets.Output()
|
|
293
|
+
with out:
|
|
294
|
+
init_spark()
|
|
295
|
+
out.clear_output()
|
|
296
|
+
|
|
297
|
+
sync = self._sync_cbox.value
|
|
298
|
+
online = self._online_mode_cbox.value
|
|
299
|
+
conn = DatabaseConnection()
|
|
300
|
+
try:
|
|
301
|
+
if sync and not online:
|
|
302
|
+
# This exists only to sync data locally.
|
|
303
|
+
RegistryManager.load(
|
|
304
|
+
conn,
|
|
305
|
+
remote_path=self._remote_path_text.value,
|
|
306
|
+
offline_mode=False,
|
|
307
|
+
user=getpass.getuser(),
|
|
308
|
+
)
|
|
309
|
+
self._manager = RegistryManager.load(
|
|
310
|
+
conn,
|
|
311
|
+
remote_path=self._remote_path_text.value,
|
|
312
|
+
offline_mode=not online,
|
|
313
|
+
user=getpass.getuser(),
|
|
314
|
+
)
|
|
315
|
+
except DSGBaseException:
|
|
316
|
+
logger.exception("Failed to load registry %s", self._local_path_text.value)
|
|
317
|
+
return
|
|
318
|
+
|
|
319
|
+
self._enable_manager_actions()
|
|
320
|
+
|
|
321
|
+
def _update_project_ids(self):
|
|
322
|
+
self._project_ids[1:] = self._manager.project_manager.list_ids()
|
|
323
|
+
self._project_dimensions_filter_dd.options = self._project_ids
|
|
324
|
+
self._project_dimensions_filter_dd.value = self._project_ids[0]
|
|
325
|
+
self._dataset_project_id_dd.options = self._project_ids
|
|
326
|
+
self._dataset_project_id_dd.value = self._project_ids[0]
|
|
327
|
+
|
|
328
|
+
def _on_register_project_click(self, _):
|
|
329
|
+
project_file = Path(self._project_file_text.value)
|
|
330
|
+
if str(project_file) == "":
|
|
331
|
+
print("project_file cannot be empty", file=sys.stderr)
|
|
332
|
+
return
|
|
333
|
+
if not self._registration_pre_check():
|
|
334
|
+
return
|
|
335
|
+
try:
|
|
336
|
+
self._manager.project_manager.register(
|
|
337
|
+
project_file, submitter=getpass.getuser(), log_message=self._log_message_text.value
|
|
338
|
+
)
|
|
339
|
+
except DSGBaseException:
|
|
340
|
+
logger.exception("Failed to register project %s", project_file)
|
|
341
|
+
return
|
|
342
|
+
|
|
343
|
+
self._update_project_ids()
|
|
344
|
+
self._post_registration_handling()
|
|
345
|
+
|
|
346
|
+
def _on_register_and_submit_dataset_click(self, _):
|
|
347
|
+
dataset_file = Path(self._dataset_file_text.value)
|
|
348
|
+
if str(dataset_file) == "":
|
|
349
|
+
print("dataset_file cannot be empty", file=sys.stderr)
|
|
350
|
+
return
|
|
351
|
+
dataset_path = Path(self._dataset_path_text.value)
|
|
352
|
+
if str(dataset_path) == "":
|
|
353
|
+
print("dataset_path cannot be empty", file=sys.stderr)
|
|
354
|
+
return
|
|
355
|
+
dimension_mapping_file = Path(self._dimension_mapping_text.value)
|
|
356
|
+
if str(dimension_mapping_file) == "":
|
|
357
|
+
dimension_mapping_file = None
|
|
358
|
+
project_id = self._dataset_project_id_dd.value
|
|
359
|
+
if project_id == "":
|
|
360
|
+
print("project_id cannot be empty", file=sys.stderr)
|
|
361
|
+
return
|
|
362
|
+
if not self._registration_pre_check():
|
|
363
|
+
return
|
|
364
|
+
try:
|
|
365
|
+
self._manager.project_manager.register_and_submit_dataset(
|
|
366
|
+
dataset_file,
|
|
367
|
+
dataset_path,
|
|
368
|
+
project_id,
|
|
369
|
+
dimension_mapping_file=dimension_mapping_file,
|
|
370
|
+
submitter=getpass.getuser(),
|
|
371
|
+
log_message=self._log_message_text.value,
|
|
372
|
+
)
|
|
373
|
+
except DSGBaseException:
|
|
374
|
+
logger.exception("Failed to register and submit dataset %s", dataset_file)
|
|
375
|
+
return
|
|
376
|
+
|
|
377
|
+
self._post_registration_handling()
|
|
378
|
+
self._update_project_ids()
|
|
379
|
+
|
|
380
|
+
def _registration_pre_check(self):
|
|
381
|
+
log_message = self._log_message_text.value
|
|
382
|
+
if log_message == "":
|
|
383
|
+
print("log_message cannot be empty", file=sys.stderr)
|
|
384
|
+
return False
|
|
385
|
+
return True
|
|
386
|
+
|
|
387
|
+
def _post_registration_handling(self):
|
|
388
|
+
self._log_message_text.value = ""
|
|
389
|
+
|
|
390
|
+
def _on_show_projects_click(self, _):
|
|
391
|
+
table = self._manager.project_manager.show(return_table=True)
|
|
392
|
+
# self._project_table.value = table.get_html_string()
|
|
393
|
+
self._display_table("Projects", table)
|
|
394
|
+
|
|
395
|
+
def _on_show_datasets_click(self, _):
|
|
396
|
+
table = self._manager.dataset_manager.show(return_table=True)
|
|
397
|
+
# self._dataset_table.value = table.get_html_string()
|
|
398
|
+
self._display_table("Datasets", table)
|
|
399
|
+
|
|
400
|
+
def _on_show_dimensions_click(self, _):
|
|
401
|
+
filters = [self._dimensions_filter_text.value]
|
|
402
|
+
if filters == [""]:
|
|
403
|
+
filters = None
|
|
404
|
+
project_id = self._project_dimensions_filter_dd.value
|
|
405
|
+
if project_id == "":
|
|
406
|
+
dimension_ids = None
|
|
407
|
+
else:
|
|
408
|
+
project_config = self._manager.project_manager.get_by_id(project_id)
|
|
409
|
+
dimension_ids = {x.id for x in project_config.base_dimensions}
|
|
410
|
+
for key in project_config.supplemental_dimensions:
|
|
411
|
+
dimension_ids.add(key.id)
|
|
412
|
+
|
|
413
|
+
table = self._manager.dimension_manager.show(
|
|
414
|
+
filters=filters, dimension_ids=dimension_ids, return_table=True
|
|
415
|
+
)
|
|
416
|
+
self._display_table("Dimensions", table)
|
|
417
|
+
|
|
418
|
+
def _display_table(self, name, table):
|
|
419
|
+
self._tables_out.clear_output()
|
|
420
|
+
self._tables_out = widgets.Output()
|
|
421
|
+
with self._tables_out:
|
|
422
|
+
display(HTML(f"<b>{name}</b>"))
|
|
423
|
+
display(HTML(table.get_html_string()))
|
|
424
|
+
display(self._tables_out)
|
|
425
|
+
|
|
426
|
+
def _on_show_dimension_mappings_click(self, _):
|
|
427
|
+
table = self._manager.dimension_mapping_manager.show(return_table=True)
|
|
428
|
+
# self._dimension_mapping_table.value = table.get_html_string()
|
|
429
|
+
self._display_table("Dimension Mappings", table)
|
|
430
|
+
|
|
431
|
+
def _reset_tables_click(self, _):
|
|
432
|
+
# self._project_table.value = ""
|
|
433
|
+
# self._dataset_table.value = ""
|
|
434
|
+
# self._dimension_table.value = ""
|
|
435
|
+
# self._dimension_mapping_table.value = ""
|
|
436
|
+
self._tables_out.clear_output()
|
|
437
|
+
|
|
438
|
+
def _on_reset_click(self, _):
|
|
439
|
+
for val in self.__dict__.values():
|
|
440
|
+
if isinstance(val, widgets.Widget):
|
|
441
|
+
val.close_all()
|
|
442
|
+
self._make_widgets()
|
|
443
|
+
self._display_widgets()
|
|
444
|
+
self._enable_manager_actions()
|
dsgrid/chronify.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from contextlib import contextmanager
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Generator
|
|
4
|
+
|
|
5
|
+
import chronify
|
|
6
|
+
|
|
7
|
+
import dsgrid
|
|
8
|
+
from dsgrid.common import BackendEngine
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@contextmanager
|
|
12
|
+
def create_store(store_file: Path) -> Generator[chronify.Store, None, None]:
|
|
13
|
+
"""Create a chronify Store based on the dsgrid runtime configuration."""
|
|
14
|
+
config = dsgrid.runtime_config
|
|
15
|
+
if config.backend_engine == BackendEngine.SPARK:
|
|
16
|
+
store = chronify.Store.create_new_hive_store(config.thrift_server_url)
|
|
17
|
+
else:
|
|
18
|
+
store = chronify.Store.create_file_db(store_file)
|
|
19
|
+
try:
|
|
20
|
+
yield store
|
|
21
|
+
finally:
|
|
22
|
+
store.dispose()
|
dsgrid/cli/__init__.py
ADDED
|
File without changes
|
dsgrid/cli/common.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import rich_click as click
|
|
7
|
+
|
|
8
|
+
from dsgrid.dsgrid_rc import DsgridRuntimeConfig
|
|
9
|
+
from dsgrid.exceptions import DSGBaseException
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def check_output_directory(path: Path, fs_interface, force: bool):
|
|
16
|
+
"""Ensures that the parameter path is an empty directory.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
path : Path
|
|
21
|
+
fs_interface : FilesystemInterface
|
|
22
|
+
force : bool
|
|
23
|
+
If False and the directory exists and has content, exit.
|
|
24
|
+
"""
|
|
25
|
+
if path.exists():
|
|
26
|
+
if not bool(path.iterdir()):
|
|
27
|
+
return
|
|
28
|
+
if force:
|
|
29
|
+
fs_interface.rm_tree(path)
|
|
30
|
+
else:
|
|
31
|
+
print(
|
|
32
|
+
f"{path} already exists. Choose a different name or pass --force to overwrite it.",
|
|
33
|
+
file=sys.stderr,
|
|
34
|
+
)
|
|
35
|
+
sys.exit(1)
|
|
36
|
+
|
|
37
|
+
path.mkdir()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_log_level_from_str(level):
|
|
41
|
+
"""Convert a log level string to logging type."""
|
|
42
|
+
match level:
|
|
43
|
+
case "debug":
|
|
44
|
+
return logging.DEBUG
|
|
45
|
+
case "info":
|
|
46
|
+
return logging.INFO
|
|
47
|
+
case "warning":
|
|
48
|
+
return logging.WARNING
|
|
49
|
+
case "error":
|
|
50
|
+
return logging.ERROR
|
|
51
|
+
case _:
|
|
52
|
+
msg = f"Unsupported level={level}"
|
|
53
|
+
raise Exception(msg)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_value_from_context(ctx, field) -> Any:
|
|
57
|
+
"""Get the field value from the root of a click context."""
|
|
58
|
+
return ctx.find_root().params[field]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def handle_dsgrid_exception(ctx, func, *args, **kwargs) -> tuple[Any, int]:
|
|
62
|
+
"""Handle any dsgrid exceptions as specified by the CLI parameters."""
|
|
63
|
+
res = None
|
|
64
|
+
try:
|
|
65
|
+
res = func(*args, **kwargs)
|
|
66
|
+
return res, 0
|
|
67
|
+
except DSGBaseException:
|
|
68
|
+
exc_type, exc_value, exc_tb = sys.exc_info()
|
|
69
|
+
filename = exc_tb.tb_frame.f_code.co_filename
|
|
70
|
+
line = exc_tb.tb_lineno
|
|
71
|
+
msg = f'{func.__name__} failed: exception={exc_type.__name__} message="{exc_value}" {filename=} {line=}'
|
|
72
|
+
logger.error(msg)
|
|
73
|
+
if ctx.find_root().params["reraise_exceptions"]:
|
|
74
|
+
raise
|
|
75
|
+
return res, 1
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def handle_scratch_dir(*args):
|
|
79
|
+
"""Handle the user input for scratch_dir. If a path is passed, ensure it exists."""
|
|
80
|
+
val = args[2]
|
|
81
|
+
if val is None:
|
|
82
|
+
return val
|
|
83
|
+
path = Path(val)
|
|
84
|
+
if not path.exists:
|
|
85
|
+
msg = f"scratch-dir={path} does not exist"
|
|
86
|
+
raise ValueError(msg)
|
|
87
|
+
return path
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def path_callback(*args) -> Path | None:
|
|
91
|
+
"""Ensure that a Path CLI option value is returned as a Path object."""
|
|
92
|
+
val = args[2]
|
|
93
|
+
if val is None:
|
|
94
|
+
return val
|
|
95
|
+
return Path(val)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# Copied from
|
|
99
|
+
# https://stackoverflow.com/questions/45868549/creating-a-click-option-with-prompt-that-shows-only-if-default-value-is-empty
|
|
100
|
+
# and modified for our desired password behavior.
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class OptionPromptPassword(click.Option):
|
|
104
|
+
"""Custom class that only prompts for the password if the user set a different username value
|
|
105
|
+
than what is in the runtime config file."""
|
|
106
|
+
|
|
107
|
+
def get_default(self, ctx, **kwargs):
|
|
108
|
+
config = DsgridRuntimeConfig.load()
|
|
109
|
+
username = ctx.find_root().params.get("username")
|
|
110
|
+
if username != config.database_user:
|
|
111
|
+
return None
|
|
112
|
+
return config.database_password
|
|
113
|
+
|
|
114
|
+
def prompt_for_value(self, ctx):
|
|
115
|
+
default = self.get_default(ctx)
|
|
116
|
+
|
|
117
|
+
if default is None:
|
|
118
|
+
return super().prompt_for_value(ctx)
|
|
119
|
+
|
|
120
|
+
return default
|