dsgrid-toolkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dsgrid-toolkit might be problematic. Click here for more details.

Files changed (152) hide show
  1. dsgrid/__init__.py +22 -0
  2. dsgrid/api/__init__.py +0 -0
  3. dsgrid/api/api_manager.py +179 -0
  4. dsgrid/api/app.py +420 -0
  5. dsgrid/api/models.py +60 -0
  6. dsgrid/api/response_models.py +116 -0
  7. dsgrid/apps/__init__.py +0 -0
  8. dsgrid/apps/project_viewer/app.py +216 -0
  9. dsgrid/apps/registration_gui.py +444 -0
  10. dsgrid/chronify.py +22 -0
  11. dsgrid/cli/__init__.py +0 -0
  12. dsgrid/cli/common.py +120 -0
  13. dsgrid/cli/config.py +177 -0
  14. dsgrid/cli/download.py +13 -0
  15. dsgrid/cli/dsgrid.py +142 -0
  16. dsgrid/cli/dsgrid_admin.py +349 -0
  17. dsgrid/cli/install_notebooks.py +62 -0
  18. dsgrid/cli/query.py +711 -0
  19. dsgrid/cli/registry.py +1773 -0
  20. dsgrid/cloud/__init__.py +0 -0
  21. dsgrid/cloud/cloud_storage_interface.py +140 -0
  22. dsgrid/cloud/factory.py +31 -0
  23. dsgrid/cloud/fake_storage_interface.py +37 -0
  24. dsgrid/cloud/s3_storage_interface.py +156 -0
  25. dsgrid/common.py +35 -0
  26. dsgrid/config/__init__.py +0 -0
  27. dsgrid/config/annual_time_dimension_config.py +187 -0
  28. dsgrid/config/common.py +131 -0
  29. dsgrid/config/config_base.py +148 -0
  30. dsgrid/config/dataset_config.py +684 -0
  31. dsgrid/config/dataset_schema_handler_factory.py +41 -0
  32. dsgrid/config/date_time_dimension_config.py +108 -0
  33. dsgrid/config/dimension_config.py +54 -0
  34. dsgrid/config/dimension_config_factory.py +65 -0
  35. dsgrid/config/dimension_mapping_base.py +349 -0
  36. dsgrid/config/dimension_mappings_config.py +48 -0
  37. dsgrid/config/dimensions.py +775 -0
  38. dsgrid/config/dimensions_config.py +71 -0
  39. dsgrid/config/index_time_dimension_config.py +76 -0
  40. dsgrid/config/input_dataset_requirements.py +31 -0
  41. dsgrid/config/mapping_tables.py +209 -0
  42. dsgrid/config/noop_time_dimension_config.py +42 -0
  43. dsgrid/config/project_config.py +1457 -0
  44. dsgrid/config/registration_models.py +199 -0
  45. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  46. dsgrid/config/simple_models.py +49 -0
  47. dsgrid/config/supplemental_dimension.py +29 -0
  48. dsgrid/config/time_dimension_base_config.py +200 -0
  49. dsgrid/data_models.py +155 -0
  50. dsgrid/dataset/__init__.py +0 -0
  51. dsgrid/dataset/dataset.py +123 -0
  52. dsgrid/dataset/dataset_expression_handler.py +86 -0
  53. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  54. dsgrid/dataset/dataset_schema_handler_base.py +899 -0
  55. dsgrid/dataset/dataset_schema_handler_one_table.py +196 -0
  56. dsgrid/dataset/dataset_schema_handler_standard.py +303 -0
  57. dsgrid/dataset/growth_rates.py +162 -0
  58. dsgrid/dataset/models.py +44 -0
  59. dsgrid/dataset/table_format_handler_base.py +257 -0
  60. dsgrid/dataset/table_format_handler_factory.py +17 -0
  61. dsgrid/dataset/unpivoted_table.py +121 -0
  62. dsgrid/dimension/__init__.py +0 -0
  63. dsgrid/dimension/base_models.py +218 -0
  64. dsgrid/dimension/dimension_filters.py +308 -0
  65. dsgrid/dimension/standard.py +213 -0
  66. dsgrid/dimension/time.py +531 -0
  67. dsgrid/dimension/time_utils.py +88 -0
  68. dsgrid/dsgrid_rc.py +88 -0
  69. dsgrid/exceptions.py +105 -0
  70. dsgrid/filesystem/__init__.py +0 -0
  71. dsgrid/filesystem/cloud_filesystem.py +32 -0
  72. dsgrid/filesystem/factory.py +32 -0
  73. dsgrid/filesystem/filesystem_interface.py +136 -0
  74. dsgrid/filesystem/local_filesystem.py +74 -0
  75. dsgrid/filesystem/s3_filesystem.py +118 -0
  76. dsgrid/loggers.py +132 -0
  77. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +950 -0
  78. dsgrid/notebooks/registration.ipynb +48 -0
  79. dsgrid/notebooks/start_notebook.sh +11 -0
  80. dsgrid/project.py +451 -0
  81. dsgrid/query/__init__.py +0 -0
  82. dsgrid/query/dataset_mapping_plan.py +142 -0
  83. dsgrid/query/derived_dataset.py +384 -0
  84. dsgrid/query/models.py +726 -0
  85. dsgrid/query/query_context.py +287 -0
  86. dsgrid/query/query_submitter.py +847 -0
  87. dsgrid/query/report_factory.py +19 -0
  88. dsgrid/query/report_peak_load.py +70 -0
  89. dsgrid/query/reports_base.py +20 -0
  90. dsgrid/registry/__init__.py +0 -0
  91. dsgrid/registry/bulk_register.py +161 -0
  92. dsgrid/registry/common.py +287 -0
  93. dsgrid/registry/config_update_checker_base.py +63 -0
  94. dsgrid/registry/data_store_factory.py +34 -0
  95. dsgrid/registry/data_store_interface.py +69 -0
  96. dsgrid/registry/dataset_config_generator.py +156 -0
  97. dsgrid/registry/dataset_registry_manager.py +734 -0
  98. dsgrid/registry/dataset_update_checker.py +16 -0
  99. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  100. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  101. dsgrid/registry/dimension_registry_manager.py +413 -0
  102. dsgrid/registry/dimension_update_checker.py +16 -0
  103. dsgrid/registry/duckdb_data_store.py +185 -0
  104. dsgrid/registry/filesystem_data_store.py +141 -0
  105. dsgrid/registry/filter_registry_manager.py +123 -0
  106. dsgrid/registry/project_config_generator.py +57 -0
  107. dsgrid/registry/project_registry_manager.py +1616 -0
  108. dsgrid/registry/project_update_checker.py +48 -0
  109. dsgrid/registry/registration_context.py +223 -0
  110. dsgrid/registry/registry_auto_updater.py +316 -0
  111. dsgrid/registry/registry_database.py +662 -0
  112. dsgrid/registry/registry_interface.py +446 -0
  113. dsgrid/registry/registry_manager.py +544 -0
  114. dsgrid/registry/registry_manager_base.py +367 -0
  115. dsgrid/registry/versioning.py +92 -0
  116. dsgrid/spark/__init__.py +0 -0
  117. dsgrid/spark/functions.py +545 -0
  118. dsgrid/spark/types.py +50 -0
  119. dsgrid/tests/__init__.py +0 -0
  120. dsgrid/tests/common.py +139 -0
  121. dsgrid/tests/make_us_data_registry.py +204 -0
  122. dsgrid/tests/register_derived_datasets.py +103 -0
  123. dsgrid/tests/utils.py +25 -0
  124. dsgrid/time/__init__.py +0 -0
  125. dsgrid/time/time_conversions.py +80 -0
  126. dsgrid/time/types.py +67 -0
  127. dsgrid/units/__init__.py +0 -0
  128. dsgrid/units/constants.py +113 -0
  129. dsgrid/units/convert.py +71 -0
  130. dsgrid/units/energy.py +145 -0
  131. dsgrid/units/power.py +87 -0
  132. dsgrid/utils/__init__.py +0 -0
  133. dsgrid/utils/dataset.py +612 -0
  134. dsgrid/utils/files.py +179 -0
  135. dsgrid/utils/filters.py +125 -0
  136. dsgrid/utils/id_remappings.py +100 -0
  137. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  138. dsgrid/utils/py_expression_eval/README.md +8 -0
  139. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  140. dsgrid/utils/py_expression_eval/tests.py +283 -0
  141. dsgrid/utils/run_command.py +70 -0
  142. dsgrid/utils/scratch_dir_context.py +64 -0
  143. dsgrid/utils/spark.py +918 -0
  144. dsgrid/utils/spark_partition.py +98 -0
  145. dsgrid/utils/timing.py +239 -0
  146. dsgrid/utils/utilities.py +184 -0
  147. dsgrid/utils/versioning.py +36 -0
  148. dsgrid_toolkit-0.2.0.dist-info/METADATA +216 -0
  149. dsgrid_toolkit-0.2.0.dist-info/RECORD +152 -0
  150. dsgrid_toolkit-0.2.0.dist-info/WHEEL +4 -0
  151. dsgrid_toolkit-0.2.0.dist-info/entry_points.txt +4 -0
  152. dsgrid_toolkit-0.2.0.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,444 @@
1
+ import copy
2
+ import getpass
3
+ import logging
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ from IPython.display import display, HTML
9
+ import ipywidgets as widgets
10
+
11
+ from dsgrid.common import REMOTE_REGISTRY, LOCAL_REGISTRY
12
+ from dsgrid.exceptions import DSGBaseException
13
+ from dsgrid.registry.registry_database import DatabaseConnection
14
+ from dsgrid.registry.registry_manager import RegistryManager
15
+ from dsgrid.loggers import setup_logging
16
+ from dsgrid.spark.types import SparkSession
17
+ from dsgrid.utils.spark import init_spark
18
+
19
+ SS_PROJECT = "https://github.com/dsgrid/dsgrid-project-StandardScenarios/blob/main/dsgrid_project/project.json5"
20
+ RS_DATASET = "https://github.com/dsgrid/dsgrid-project-StandardScenarios/blob/main/dsgrid_project/datasets/modeled/resstock/dataset.json5"
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class RegistrationGui:
26
+ """Provides a UI for registering dsgrid projects and datasets."""
27
+
28
+ DEFAULTS = {
29
+ "remote_registry": REMOTE_REGISTRY,
30
+ "local_registry": LOCAL_REGISTRY,
31
+ "project_file": "",
32
+ "dataset_file": "",
33
+ "dataset_path": "",
34
+ "dimension_mapping_file": "",
35
+ "dimensions_filter": "",
36
+ "log_file": Path(os.environ.get("DSGRID_LOG_FILE_PATH", ".")) / "dsgrid.log",
37
+ "log_message": "",
38
+ "spark_cluster": os.environ.get("SPARK_CLUSTER", "local mode"),
39
+ }
40
+
41
+ def __init__(self, defaults=None):
42
+ self._manager = None
43
+ self._defaults = copy.deepcopy(self.DEFAULTS)
44
+ if defaults is not None:
45
+ self._defaults.update(defaults)
46
+ self._project_ids = [""]
47
+ self._make_widgets()
48
+ self._display_widgets()
49
+ self._tables_out = widgets.Output()
50
+
51
+ @property
52
+ def manager(self):
53
+ return self._manager
54
+
55
+ @property
56
+ def dimension_manager(self):
57
+ return self._manager.dimension_manager
58
+
59
+ @property
60
+ def dimension_mapping_manager(self):
61
+ return self._manager.dimension_mapping_manager
62
+
63
+ @property
64
+ def dataset_manager(self):
65
+ return self._manager.dataset_manager
66
+
67
+ @property
68
+ def project_manager(self):
69
+ return self._manager.project_manager
70
+
71
+ def _make_widgets(self):
72
+ self._main_label = widgets.HTML("<b>dsgrid Registration Tool</b>")
73
+ text_layout = widgets.Layout(width="400px")
74
+ button_layout = widgets.Layout(width="200px")
75
+ self._remote_path_text = widgets.Text(
76
+ str(self._defaults["remote_registry"]),
77
+ description="Remote registry",
78
+ layout=text_layout,
79
+ )
80
+ self._local_path_text = widgets.Text(
81
+ str(self._defaults["local_registry"]),
82
+ description="Local registry",
83
+ layout=text_layout,
84
+ )
85
+ self._spark_cluster_text = widgets.Text(
86
+ self._defaults["spark_cluster"],
87
+ description="Spark cluster",
88
+ layout=text_layout,
89
+ )
90
+ log_file = self._defaults["log_file"]
91
+ # TODO: setup detection of changes to this text box and reconfigure logging
92
+ self._log_file_text = widgets.Text(
93
+ str(log_file),
94
+ description="Log file",
95
+ layout=text_layout,
96
+ )
97
+ self._online_mode_cbox = widgets.Checkbox(
98
+ value=False,
99
+ description="Online mode",
100
+ )
101
+ self._online_mode_cbox.observe(self._on_online_click, names="value")
102
+ self._sync_cbox = widgets.Checkbox(
103
+ value=True,
104
+ description="Sync pull",
105
+ )
106
+ self._load_btn = widgets.Button(description="Load registry", layout=button_layout)
107
+ self._load_btn.on_click(self._on_load_click)
108
+ self._register_project_btn = widgets.Button(
109
+ description="Register project", disabled=True, layout=button_layout
110
+ )
111
+ self._register_project_btn.on_click(self._on_register_project_click)
112
+ self._project_file_text = widgets.Text(
113
+ str(self._defaults["project_file"]),
114
+ description="Project File",
115
+ placeholder="project.json5",
116
+ )
117
+ self._project_file_ex = widgets.HTML(
118
+ f"<a href={SS_PROJECT} target='_blank'>Example: Standard Scenarios</a>"
119
+ )
120
+ self._register_and_submit_dataset_btn = widgets.Button(
121
+ description="Register and submit dataset", disabled=True, layout=button_layout
122
+ )
123
+ self._register_and_submit_dataset_btn.on_click(self._on_register_and_submit_dataset_click)
124
+ self._dataset_file_ex = widgets.HTML(
125
+ f"<a href={RS_DATASET} target='_blank'>Example: ResStock</a>"
126
+ )
127
+ self._dataset_file_text = widgets.Text(
128
+ str(self._defaults["dataset_file"]),
129
+ description="Dataset File",
130
+ placeholder="dataset.json5",
131
+ )
132
+ self._dataset_path_text = widgets.Text(
133
+ self._defaults["dataset_path"],
134
+ description="Dataset Path",
135
+ placeholder="load_data_path",
136
+ )
137
+ self._dimension_mapping_label = widgets.HTML("Dimension mapping file")
138
+ self._dimension_mapping_text = widgets.Text(
139
+ str(self._defaults["dimension_mapping_file"]), placeholder="dimension_mappings.json5"
140
+ )
141
+ self._dataset_project_id_dd = widgets.Dropdown(
142
+ description="Project ID",
143
+ options=self._project_ids,
144
+ value=self._project_ids[0],
145
+ disabled=True,
146
+ )
147
+ self._log_message_label = widgets.HTML("Registration log message")
148
+ self._log_message_text = widgets.Text(
149
+ self._defaults["log_message"], layout=widgets.Layout(width="400px")
150
+ )
151
+ self._show_projects_btn = widgets.Button(
152
+ disabled=True,
153
+ description="Show projects",
154
+ tooltip="Display a table showing all registered projects",
155
+ )
156
+ self._show_projects_btn.on_click(self._on_show_projects_click)
157
+ self._show_datasets_btn = widgets.Button(
158
+ disabled=True,
159
+ description="Show datasets",
160
+ tooltip="Display a table showing all registered datasets",
161
+ )
162
+ self._show_datasets_btn.on_click(self._on_show_datasets_click)
163
+ self._show_dimensions_btn = widgets.Button(
164
+ disabled=True,
165
+ description="Show dimensions",
166
+ tooltip="Display a table showing all registered dimensions",
167
+ )
168
+ self._show_dimensions_btn.on_click(self._on_show_dimensions_click)
169
+ self._dim_filter_message_text = widgets.HTML("Filter dimensions")
170
+ self._dimensions_filter_text = widgets.Text(
171
+ self._defaults["dimensions_filter"], placeholder="Type == geography"
172
+ )
173
+ self._project_dimensions_filter_text = widgets.HTML("Filter dimensions by project")
174
+ self._project_dimensions_filter_dd = widgets.Dropdown(
175
+ options=self._project_ids,
176
+ value=self._project_ids[0],
177
+ disabled=True,
178
+ )
179
+ self._show_dimension_mappings_btn = widgets.Button(
180
+ disabled=True,
181
+ description="Show mappings",
182
+ tooltip="Display a table showing all registered dimension mappings",
183
+ )
184
+ self._show_dimension_mappings_btn.on_click(self._on_show_dimension_mappings_click)
185
+ self._reset_tables_btn = widgets.Button(description="Reset tables")
186
+ self._reset_tables_btn.on_click(self._reset_tables_click)
187
+ self._reset_btn = widgets.Button(description="Reset all")
188
+ self._reset_btn.on_click(self._on_reset_click)
189
+
190
+ # Disabling because these tables are not well-formed.
191
+ # self._project_table = widgets.HTML(value="", description="Projects")
192
+ # self._dataset_table = widgets.HTML(value="", description="Datasets")
193
+ # self._dimension_table = widgets.HTML(value="", description="Dimensions")
194
+ # self._dimension_mapping_table = widgets.HTML(value="", description="Dimension Mappings")
195
+
196
+ def _display_widgets(self):
197
+ registry_box = widgets.VBox(
198
+ (
199
+ self._remote_path_text,
200
+ self._local_path_text,
201
+ self._spark_cluster_text,
202
+ self._log_file_text,
203
+ )
204
+ )
205
+ options_box = widgets.VBox((self._online_mode_cbox, self._sync_cbox))
206
+
207
+ register_project_box = widgets.HBox(
208
+ (self._register_project_btn, self._project_file_text, self._project_file_ex)
209
+ )
210
+ register_and_submit_dataset_box = widgets.HBox(
211
+ (
212
+ self._register_and_submit_dataset_btn,
213
+ widgets.VBox(
214
+ (
215
+ widgets.HBox((self._dataset_file_text, self._dataset_file_ex)),
216
+ self._dataset_path_text,
217
+ widgets.HBox(
218
+ (self._dimension_mapping_label, self._dimension_mapping_text)
219
+ ),
220
+ self._dataset_project_id_dd,
221
+ ),
222
+ ),
223
+ ),
224
+ )
225
+ log_box = widgets.HBox((self._log_message_label, self._log_message_text))
226
+ register_box = widgets.VBox(
227
+ (register_project_box, register_and_submit_dataset_box, log_box)
228
+ )
229
+
230
+ show_dims_box = widgets.HBox(
231
+ (
232
+ self._show_dimensions_btn,
233
+ self._dim_filter_message_text,
234
+ self._dimensions_filter_text,
235
+ self._project_dimensions_filter_text,
236
+ self._project_dimensions_filter_dd,
237
+ )
238
+ )
239
+ show_box = widgets.VBox(
240
+ (
241
+ self._show_projects_btn,
242
+ self._show_datasets_btn,
243
+ show_dims_box,
244
+ self._show_dimension_mappings_btn,
245
+ # self._project_table,
246
+ # self._dataset_table,
247
+ # self._dimension_table,
248
+ # self._dimension_mapping_table,
249
+ self._reset_tables_btn,
250
+ )
251
+ )
252
+
253
+ display(
254
+ self._main_label,
255
+ widgets.HBox((registry_box, options_box)),
256
+ self._load_btn,
257
+ register_box,
258
+ show_box,
259
+ self._reset_btn,
260
+ )
261
+
262
+ def _enable_manager_actions(self):
263
+ self._register_project_btn.disabled = False
264
+ self._register_and_submit_dataset_btn.disabled = False
265
+ self._dataset_project_id_dd.disabled = False
266
+ self._show_projects_btn.disabled = False
267
+ self._show_datasets_btn.disabled = False
268
+ self._show_dimensions_btn.disabled = False
269
+ self._show_dimension_mappings_btn.disabled = False
270
+ self._project_dimensions_filter_dd.disabled = False
271
+ self._update_project_ids()
272
+ out = widgets.Output()
273
+ with out:
274
+ self._on_show_projects_click(self._show_projects_btn)
275
+ self._on_show_datasets_click(self._show_datasets_btn)
276
+ out.clear_output()
277
+
278
+ def _on_online_click(self, _):
279
+ # Syncing is always enabled when in online mode.
280
+ if self._online_mode_cbox.value:
281
+ self._sync_cbox.value = True
282
+ self._sync_cbox.disabled = self._online_mode_cbox.value
283
+
284
+ def _on_load_click(self, _):
285
+ # TODO: We should log to an Output widget that gets updated periodically.
286
+ logger = setup_logging(__name__, self._log_file_text.value, mode="a")
287
+ if (
288
+ self._spark_cluster_text.value not in ("local mode", "")
289
+ and SparkSession.getActiveSession() is None
290
+ ):
291
+ os.environ["SPARK_CLUSTER"] = self._spark_cluster_text.value
292
+ out = widgets.Output()
293
+ with out:
294
+ init_spark()
295
+ out.clear_output()
296
+
297
+ sync = self._sync_cbox.value
298
+ online = self._online_mode_cbox.value
299
+ conn = DatabaseConnection()
300
+ try:
301
+ if sync and not online:
302
+ # This exists only to sync data locally.
303
+ RegistryManager.load(
304
+ conn,
305
+ remote_path=self._remote_path_text.value,
306
+ offline_mode=False,
307
+ user=getpass.getuser(),
308
+ )
309
+ self._manager = RegistryManager.load(
310
+ conn,
311
+ remote_path=self._remote_path_text.value,
312
+ offline_mode=not online,
313
+ user=getpass.getuser(),
314
+ )
315
+ except DSGBaseException:
316
+ logger.exception("Failed to load registry %s", self._local_path_text.value)
317
+ return
318
+
319
+ self._enable_manager_actions()
320
+
321
+ def _update_project_ids(self):
322
+ self._project_ids[1:] = self._manager.project_manager.list_ids()
323
+ self._project_dimensions_filter_dd.options = self._project_ids
324
+ self._project_dimensions_filter_dd.value = self._project_ids[0]
325
+ self._dataset_project_id_dd.options = self._project_ids
326
+ self._dataset_project_id_dd.value = self._project_ids[0]
327
+
328
+ def _on_register_project_click(self, _):
329
+ project_file = Path(self._project_file_text.value)
330
+ if str(project_file) == "":
331
+ print("project_file cannot be empty", file=sys.stderr)
332
+ return
333
+ if not self._registration_pre_check():
334
+ return
335
+ try:
336
+ self._manager.project_manager.register(
337
+ project_file, submitter=getpass.getuser(), log_message=self._log_message_text.value
338
+ )
339
+ except DSGBaseException:
340
+ logger.exception("Failed to register project %s", project_file)
341
+ return
342
+
343
+ self._update_project_ids()
344
+ self._post_registration_handling()
345
+
346
+ def _on_register_and_submit_dataset_click(self, _):
347
+ dataset_file = Path(self._dataset_file_text.value)
348
+ if str(dataset_file) == "":
349
+ print("dataset_file cannot be empty", file=sys.stderr)
350
+ return
351
+ dataset_path = Path(self._dataset_path_text.value)
352
+ if str(dataset_path) == "":
353
+ print("dataset_path cannot be empty", file=sys.stderr)
354
+ return
355
+ dimension_mapping_file = Path(self._dimension_mapping_text.value)
356
+ if str(dimension_mapping_file) == "":
357
+ dimension_mapping_file = None
358
+ project_id = self._dataset_project_id_dd.value
359
+ if project_id == "":
360
+ print("project_id cannot be empty", file=sys.stderr)
361
+ return
362
+ if not self._registration_pre_check():
363
+ return
364
+ try:
365
+ self._manager.project_manager.register_and_submit_dataset(
366
+ dataset_file,
367
+ dataset_path,
368
+ project_id,
369
+ dimension_mapping_file=dimension_mapping_file,
370
+ submitter=getpass.getuser(),
371
+ log_message=self._log_message_text.value,
372
+ )
373
+ except DSGBaseException:
374
+ logger.exception("Failed to register and submit dataset %s", dataset_file)
375
+ return
376
+
377
+ self._post_registration_handling()
378
+ self._update_project_ids()
379
+
380
+ def _registration_pre_check(self):
381
+ log_message = self._log_message_text.value
382
+ if log_message == "":
383
+ print("log_message cannot be empty", file=sys.stderr)
384
+ return False
385
+ return True
386
+
387
+ def _post_registration_handling(self):
388
+ self._log_message_text.value = ""
389
+
390
+ def _on_show_projects_click(self, _):
391
+ table = self._manager.project_manager.show(return_table=True)
392
+ # self._project_table.value = table.get_html_string()
393
+ self._display_table("Projects", table)
394
+
395
+ def _on_show_datasets_click(self, _):
396
+ table = self._manager.dataset_manager.show(return_table=True)
397
+ # self._dataset_table.value = table.get_html_string()
398
+ self._display_table("Datasets", table)
399
+
400
+ def _on_show_dimensions_click(self, _):
401
+ filters = [self._dimensions_filter_text.value]
402
+ if filters == [""]:
403
+ filters = None
404
+ project_id = self._project_dimensions_filter_dd.value
405
+ if project_id == "":
406
+ dimension_ids = None
407
+ else:
408
+ project_config = self._manager.project_manager.get_by_id(project_id)
409
+ dimension_ids = {x.id for x in project_config.base_dimensions}
410
+ for key in project_config.supplemental_dimensions:
411
+ dimension_ids.add(key.id)
412
+
413
+ table = self._manager.dimension_manager.show(
414
+ filters=filters, dimension_ids=dimension_ids, return_table=True
415
+ )
416
+ self._display_table("Dimensions", table)
417
+
418
+ def _display_table(self, name, table):
419
+ self._tables_out.clear_output()
420
+ self._tables_out = widgets.Output()
421
+ with self._tables_out:
422
+ display(HTML(f"<b>{name}</b>"))
423
+ display(HTML(table.get_html_string()))
424
+ display(self._tables_out)
425
+
426
+ def _on_show_dimension_mappings_click(self, _):
427
+ table = self._manager.dimension_mapping_manager.show(return_table=True)
428
+ # self._dimension_mapping_table.value = table.get_html_string()
429
+ self._display_table("Dimension Mappings", table)
430
+
431
+ def _reset_tables_click(self, _):
432
+ # self._project_table.value = ""
433
+ # self._dataset_table.value = ""
434
+ # self._dimension_table.value = ""
435
+ # self._dimension_mapping_table.value = ""
436
+ self._tables_out.clear_output()
437
+
438
+ def _on_reset_click(self, _):
439
+ for val in self.__dict__.values():
440
+ if isinstance(val, widgets.Widget):
441
+ val.close_all()
442
+ self._make_widgets()
443
+ self._display_widgets()
444
+ self._enable_manager_actions()
dsgrid/chronify.py ADDED
@@ -0,0 +1,22 @@
1
+ from contextlib import contextmanager
2
+ from pathlib import Path
3
+ from typing import Generator
4
+
5
+ import chronify
6
+
7
+ import dsgrid
8
+ from dsgrid.common import BackendEngine
9
+
10
+
11
+ @contextmanager
12
+ def create_store(store_file: Path) -> Generator[chronify.Store, None, None]:
13
+ """Create a chronify Store based on the dsgrid runtime configuration."""
14
+ config = dsgrid.runtime_config
15
+ if config.backend_engine == BackendEngine.SPARK:
16
+ store = chronify.Store.create_new_hive_store(config.thrift_server_url)
17
+ else:
18
+ store = chronify.Store.create_file_db(store_file)
19
+ try:
20
+ yield store
21
+ finally:
22
+ store.dispose()
dsgrid/cli/__init__.py ADDED
File without changes
dsgrid/cli/common.py ADDED
@@ -0,0 +1,120 @@
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ import rich_click as click
7
+
8
+ from dsgrid.dsgrid_rc import DsgridRuntimeConfig
9
+ from dsgrid.exceptions import DSGBaseException
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def check_output_directory(path: Path, fs_interface, force: bool):
16
+ """Ensures that the parameter path is an empty directory.
17
+
18
+ Parameters
19
+ ----------
20
+ path : Path
21
+ fs_interface : FilesystemInterface
22
+ force : bool
23
+ If False and the directory exists and has content, exit.
24
+ """
25
+ if path.exists():
26
+ if not bool(path.iterdir()):
27
+ return
28
+ if force:
29
+ fs_interface.rm_tree(path)
30
+ else:
31
+ print(
32
+ f"{path} already exists. Choose a different name or pass --force to overwrite it.",
33
+ file=sys.stderr,
34
+ )
35
+ sys.exit(1)
36
+
37
+ path.mkdir()
38
+
39
+
40
+ def get_log_level_from_str(level):
41
+ """Convert a log level string to logging type."""
42
+ match level:
43
+ case "debug":
44
+ return logging.DEBUG
45
+ case "info":
46
+ return logging.INFO
47
+ case "warning":
48
+ return logging.WARNING
49
+ case "error":
50
+ return logging.ERROR
51
+ case _:
52
+ msg = f"Unsupported level={level}"
53
+ raise Exception(msg)
54
+
55
+
56
+ def get_value_from_context(ctx, field) -> Any:
57
+ """Get the field value from the root of a click context."""
58
+ return ctx.find_root().params[field]
59
+
60
+
61
+ def handle_dsgrid_exception(ctx, func, *args, **kwargs) -> tuple[Any, int]:
62
+ """Handle any dsgrid exceptions as specified by the CLI parameters."""
63
+ res = None
64
+ try:
65
+ res = func(*args, **kwargs)
66
+ return res, 0
67
+ except DSGBaseException:
68
+ exc_type, exc_value, exc_tb = sys.exc_info()
69
+ filename = exc_tb.tb_frame.f_code.co_filename
70
+ line = exc_tb.tb_lineno
71
+ msg = f'{func.__name__} failed: exception={exc_type.__name__} message="{exc_value}" {filename=} {line=}'
72
+ logger.error(msg)
73
+ if ctx.find_root().params["reraise_exceptions"]:
74
+ raise
75
+ return res, 1
76
+
77
+
78
+ def handle_scratch_dir(*args):
79
+ """Handle the user input for scratch_dir. If a path is passed, ensure it exists."""
80
+ val = args[2]
81
+ if val is None:
82
+ return val
83
+ path = Path(val)
84
+ if not path.exists:
85
+ msg = f"scratch-dir={path} does not exist"
86
+ raise ValueError(msg)
87
+ return path
88
+
89
+
90
+ def path_callback(*args) -> Path | None:
91
+ """Ensure that a Path CLI option value is returned as a Path object."""
92
+ val = args[2]
93
+ if val is None:
94
+ return val
95
+ return Path(val)
96
+
97
+
98
+ # Copied from
99
+ # https://stackoverflow.com/questions/45868549/creating-a-click-option-with-prompt-that-shows-only-if-default-value-is-empty
100
+ # and modified for our desired password behavior.
101
+
102
+
103
+ class OptionPromptPassword(click.Option):
104
+ """Custom class that only prompts for the password if the user set a different username value
105
+ than what is in the runtime config file."""
106
+
107
+ def get_default(self, ctx, **kwargs):
108
+ config = DsgridRuntimeConfig.load()
109
+ username = ctx.find_root().params.get("username")
110
+ if username != config.database_user:
111
+ return None
112
+ return config.database_password
113
+
114
+ def prompt_for_value(self, ctx):
115
+ default = self.get_default(ctx)
116
+
117
+ if default is None:
118
+ return super().prompt_for_value(ctx)
119
+
120
+ return default