dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. build_backend.py +93 -0
  2. dsgrid/__init__.py +22 -0
  3. dsgrid/api/__init__.py +0 -0
  4. dsgrid/api/api_manager.py +179 -0
  5. dsgrid/api/app.py +419 -0
  6. dsgrid/api/models.py +60 -0
  7. dsgrid/api/response_models.py +116 -0
  8. dsgrid/apps/__init__.py +0 -0
  9. dsgrid/apps/project_viewer/app.py +216 -0
  10. dsgrid/apps/registration_gui.py +444 -0
  11. dsgrid/chronify.py +32 -0
  12. dsgrid/cli/__init__.py +0 -0
  13. dsgrid/cli/common.py +120 -0
  14. dsgrid/cli/config.py +176 -0
  15. dsgrid/cli/download.py +13 -0
  16. dsgrid/cli/dsgrid.py +157 -0
  17. dsgrid/cli/dsgrid_admin.py +92 -0
  18. dsgrid/cli/install_notebooks.py +62 -0
  19. dsgrid/cli/query.py +729 -0
  20. dsgrid/cli/registry.py +1862 -0
  21. dsgrid/cloud/__init__.py +0 -0
  22. dsgrid/cloud/cloud_storage_interface.py +140 -0
  23. dsgrid/cloud/factory.py +31 -0
  24. dsgrid/cloud/fake_storage_interface.py +37 -0
  25. dsgrid/cloud/s3_storage_interface.py +156 -0
  26. dsgrid/common.py +36 -0
  27. dsgrid/config/__init__.py +0 -0
  28. dsgrid/config/annual_time_dimension_config.py +194 -0
  29. dsgrid/config/common.py +142 -0
  30. dsgrid/config/config_base.py +148 -0
  31. dsgrid/config/dataset_config.py +907 -0
  32. dsgrid/config/dataset_schema_handler_factory.py +46 -0
  33. dsgrid/config/date_time_dimension_config.py +136 -0
  34. dsgrid/config/dimension_config.py +54 -0
  35. dsgrid/config/dimension_config_factory.py +65 -0
  36. dsgrid/config/dimension_mapping_base.py +350 -0
  37. dsgrid/config/dimension_mappings_config.py +48 -0
  38. dsgrid/config/dimensions.py +1025 -0
  39. dsgrid/config/dimensions_config.py +71 -0
  40. dsgrid/config/file_schema.py +190 -0
  41. dsgrid/config/index_time_dimension_config.py +80 -0
  42. dsgrid/config/input_dataset_requirements.py +31 -0
  43. dsgrid/config/mapping_tables.py +209 -0
  44. dsgrid/config/noop_time_dimension_config.py +42 -0
  45. dsgrid/config/project_config.py +1462 -0
  46. dsgrid/config/registration_models.py +188 -0
  47. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  48. dsgrid/config/simple_models.py +49 -0
  49. dsgrid/config/supplemental_dimension.py +29 -0
  50. dsgrid/config/time_dimension_base_config.py +192 -0
  51. dsgrid/data_models.py +155 -0
  52. dsgrid/dataset/__init__.py +0 -0
  53. dsgrid/dataset/dataset.py +123 -0
  54. dsgrid/dataset/dataset_expression_handler.py +86 -0
  55. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  56. dsgrid/dataset/dataset_schema_handler_base.py +945 -0
  57. dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
  58. dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
  59. dsgrid/dataset/growth_rates.py +162 -0
  60. dsgrid/dataset/models.py +51 -0
  61. dsgrid/dataset/table_format_handler_base.py +257 -0
  62. dsgrid/dataset/table_format_handler_factory.py +17 -0
  63. dsgrid/dataset/unpivoted_table.py +121 -0
  64. dsgrid/dimension/__init__.py +0 -0
  65. dsgrid/dimension/base_models.py +230 -0
  66. dsgrid/dimension/dimension_filters.py +308 -0
  67. dsgrid/dimension/standard.py +252 -0
  68. dsgrid/dimension/time.py +352 -0
  69. dsgrid/dimension/time_utils.py +103 -0
  70. dsgrid/dsgrid_rc.py +88 -0
  71. dsgrid/exceptions.py +105 -0
  72. dsgrid/filesystem/__init__.py +0 -0
  73. dsgrid/filesystem/cloud_filesystem.py +32 -0
  74. dsgrid/filesystem/factory.py +32 -0
  75. dsgrid/filesystem/filesystem_interface.py +136 -0
  76. dsgrid/filesystem/local_filesystem.py +74 -0
  77. dsgrid/filesystem/s3_filesystem.py +118 -0
  78. dsgrid/loggers.py +132 -0
  79. dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
  80. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
  81. dsgrid/notebooks/registration.ipynb +48 -0
  82. dsgrid/notebooks/start_notebook.sh +11 -0
  83. dsgrid/project.py +451 -0
  84. dsgrid/query/__init__.py +0 -0
  85. dsgrid/query/dataset_mapping_plan.py +142 -0
  86. dsgrid/query/derived_dataset.py +388 -0
  87. dsgrid/query/models.py +728 -0
  88. dsgrid/query/query_context.py +287 -0
  89. dsgrid/query/query_submitter.py +994 -0
  90. dsgrid/query/report_factory.py +19 -0
  91. dsgrid/query/report_peak_load.py +70 -0
  92. dsgrid/query/reports_base.py +20 -0
  93. dsgrid/registry/__init__.py +0 -0
  94. dsgrid/registry/bulk_register.py +165 -0
  95. dsgrid/registry/common.py +287 -0
  96. dsgrid/registry/config_update_checker_base.py +63 -0
  97. dsgrid/registry/data_store_factory.py +34 -0
  98. dsgrid/registry/data_store_interface.py +74 -0
  99. dsgrid/registry/dataset_config_generator.py +158 -0
  100. dsgrid/registry/dataset_registry_manager.py +950 -0
  101. dsgrid/registry/dataset_update_checker.py +16 -0
  102. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  103. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  104. dsgrid/registry/dimension_registry_manager.py +413 -0
  105. dsgrid/registry/dimension_update_checker.py +16 -0
  106. dsgrid/registry/duckdb_data_store.py +207 -0
  107. dsgrid/registry/filesystem_data_store.py +150 -0
  108. dsgrid/registry/filter_registry_manager.py +123 -0
  109. dsgrid/registry/project_config_generator.py +57 -0
  110. dsgrid/registry/project_registry_manager.py +1623 -0
  111. dsgrid/registry/project_update_checker.py +48 -0
  112. dsgrid/registry/registration_context.py +223 -0
  113. dsgrid/registry/registry_auto_updater.py +316 -0
  114. dsgrid/registry/registry_database.py +667 -0
  115. dsgrid/registry/registry_interface.py +446 -0
  116. dsgrid/registry/registry_manager.py +558 -0
  117. dsgrid/registry/registry_manager_base.py +367 -0
  118. dsgrid/registry/versioning.py +92 -0
  119. dsgrid/rust_ext/__init__.py +14 -0
  120. dsgrid/rust_ext/find_minimal_patterns.py +129 -0
  121. dsgrid/spark/__init__.py +0 -0
  122. dsgrid/spark/functions.py +589 -0
  123. dsgrid/spark/types.py +110 -0
  124. dsgrid/tests/__init__.py +0 -0
  125. dsgrid/tests/common.py +140 -0
  126. dsgrid/tests/make_us_data_registry.py +265 -0
  127. dsgrid/tests/register_derived_datasets.py +103 -0
  128. dsgrid/tests/utils.py +25 -0
  129. dsgrid/time/__init__.py +0 -0
  130. dsgrid/time/time_conversions.py +80 -0
  131. dsgrid/time/types.py +67 -0
  132. dsgrid/units/__init__.py +0 -0
  133. dsgrid/units/constants.py +113 -0
  134. dsgrid/units/convert.py +71 -0
  135. dsgrid/units/energy.py +145 -0
  136. dsgrid/units/power.py +87 -0
  137. dsgrid/utils/__init__.py +0 -0
  138. dsgrid/utils/dataset.py +830 -0
  139. dsgrid/utils/files.py +179 -0
  140. dsgrid/utils/filters.py +125 -0
  141. dsgrid/utils/id_remappings.py +100 -0
  142. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  143. dsgrid/utils/py_expression_eval/README.md +8 -0
  144. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  145. dsgrid/utils/py_expression_eval/tests.py +283 -0
  146. dsgrid/utils/run_command.py +70 -0
  147. dsgrid/utils/scratch_dir_context.py +65 -0
  148. dsgrid/utils/spark.py +918 -0
  149. dsgrid/utils/spark_partition.py +98 -0
  150. dsgrid/utils/timing.py +239 -0
  151. dsgrid/utils/utilities.py +221 -0
  152. dsgrid/utils/versioning.py +36 -0
  153. dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
  154. dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
  155. dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
  156. dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
  157. dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,216 @@
1
+ import pandas as pd
2
+ import requests
3
+ from dash import Dash, dash_table, dcc, html, Input, Output, State
4
+ import dash_bootstrap_components as dbc
5
+
6
+ from dash.exceptions import PreventUpdate
7
+
8
+ from dsgrid.api.response_models import ListProjectsResponse
9
+
10
+
11
+ DSGRID_API_URL = "http://127.0.0.1:8000"
12
+
13
+ # Copied DataTable styles from https://gist.github.com/marcogoldin/8fc4c3945cef17ca38d55c4e17ebbbe6
14
+ STYLE_TABLE = {
15
+ "fontFamily": '-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,'
16
+ '"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color '
17
+ 'Emoji"'
18
+ }
19
+ STYLE_HEADER = {"backgroundColor": "white", "fontWeight": "bold", "padding": "0.75rem"}
20
+ STYLE_CELL = {
21
+ "fontFamily": STYLE_TABLE["fontFamily"],
22
+ "fontWeight": "400",
23
+ "lineHeight": "1.5",
24
+ "color": "#212529",
25
+ "textAlign": "left",
26
+ "whiteSpace": "normal",
27
+ "height": "auto",
28
+ "padding": "0.75rem",
29
+ "border": "1px solid #dee2e6",
30
+ "verticalAlign": "top",
31
+ }
32
+ STYLE_DATA_CONDITIONAL = [{"if": {"row_index": "odd"}, "backgroundColor": "#f8f9fa"}]
33
+
34
+ app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])
35
+ app.layout = dbc.Container(
36
+ [
37
+ dbc.Alert("dsgrid Project Viewer", color="info"),
38
+ html.Div(
39
+ [
40
+ "URL: ",
41
+ dcc.Input(
42
+ id="url_text",
43
+ value=DSGRID_API_URL,
44
+ ),
45
+ html.Label("Status:", style={"margin-left": "15px", "margin-right": "15px"}),
46
+ dcc.Input(
47
+ id="status_text",
48
+ value="disconnected",
49
+ ),
50
+ dbc.Button(
51
+ "Connect",
52
+ outline=True,
53
+ color="primary",
54
+ id="connect_button",
55
+ n_clicks=0,
56
+ style={"margin-left": "15px"},
57
+ ),
58
+ ]
59
+ ),
60
+ html.Br(),
61
+ html.Div(
62
+ [
63
+ "Select a project",
64
+ dcc.Dropdown([], "", id="project_dd"),
65
+ ],
66
+ ),
67
+ html.Br(),
68
+ dbc.Button(
69
+ "List dimensions",
70
+ outline=True,
71
+ color="primary",
72
+ id="list_dimensions_button",
73
+ n_clicks=0,
74
+ ),
75
+ html.Div(
76
+ [
77
+ dash_table.DataTable([], [], id="dimensions_table"),
78
+ html.Div(id="dimensions_table_container"),
79
+ ],
80
+ ),
81
+ html.Br(),
82
+ html.H5("Dimension Records"),
83
+ dcc.Input(
84
+ id="dimension_name",
85
+ value="",
86
+ readOnly=True,
87
+ ),
88
+ html.Div(
89
+ [
90
+ dash_table.DataTable([], [], id="dimension_records_table"),
91
+ html.Div(id="dimension_records_table_container"),
92
+ ]
93
+ ),
94
+ ],
95
+ className="m-5",
96
+ # fluid=True,
97
+ # Per the docs, this should be set to True. However, that causes a display issue where the
98
+ # right part of the GUI is hidden and so you have to scroll horizontally to see it.
99
+ # When passing the allowed parameters, like lg, xl, xxl, the width is too small.
100
+ # Passing a non-supported string makes it work. This will almost certainly break in the future.
101
+ fluid="invalidparameter",
102
+ )
103
+
104
+
105
+ @app.callback(
106
+ Output("project_dd", "options"),
107
+ Output("status_text", "value"),
108
+ Input("connect_button", "n_clicks"),
109
+ State("url_text", "value"),
110
+ )
111
+ def on_connect(n_clicks, url):
112
+ if n_clicks is None:
113
+ raise PreventUpdate
114
+ return list_project_ids(url), "connected"
115
+
116
+
117
+ @app.callback(
118
+ Output("project_dd", "value"),
119
+ Input("project_dd", "options"),
120
+ )
121
+ def on_project_options_change(options):
122
+ if options:
123
+ return options[0]
124
+ return ""
125
+
126
+
127
+ @app.callback(
128
+ Output("dimensions_table_container", "children"),
129
+ Input("list_dimensions_button", "n_clicks"),
130
+ State("project_dd", "value"),
131
+ State("url_text", "value"),
132
+ )
133
+ def on_list_dimensions(n_clicks, project_id, url):
134
+ if n_clicks is None or project_id == "":
135
+ raise PreventUpdate
136
+ table = list_project_dimensions(project_id, url)
137
+ return dash_table.DataTable(
138
+ table,
139
+ [{"name": x, "id": x} for x in table[0].keys()],
140
+ id="dimensions_table",
141
+ editable=False,
142
+ filter_action="native",
143
+ sort_action="native",
144
+ row_selectable="single",
145
+ selected_rows=[],
146
+ style_table=STYLE_TABLE,
147
+ style_header=STYLE_HEADER,
148
+ style_cell=STYLE_CELL,
149
+ style_data_conditional=STYLE_DATA_CONDITIONAL,
150
+ )
151
+
152
+
153
+ @app.callback(
154
+ Output("dimension_records_table_container", "children"),
155
+ Output("dimension_name", "value"),
156
+ Input("dimensions_table", "derived_viewport_selected_rows"),
157
+ Input("dimensions_table", "derived_viewport_data"),
158
+ State("url_text", "value"),
159
+ )
160
+ def on_list_dimension_records(row_indexes, row_data, url):
161
+ if not row_indexes:
162
+ raise PreventUpdate
163
+
164
+ row_index = row_indexes[0]
165
+ records = list_dimension_records(row_data[row_index]["dimension_id"], url)
166
+ if not records:
167
+ raise PreventUpdate
168
+
169
+ df = pd.DataFrame.from_records(records)
170
+ columns = []
171
+ for column in records[0].keys():
172
+ num_unique = df[column].nunique()
173
+ new_name = f"{column} ({num_unique} unique)"
174
+ columns.append({"name": new_name, "id": column})
175
+
176
+ return (
177
+ dash_table.DataTable(
178
+ records,
179
+ columns,
180
+ id="dimension_records_table",
181
+ editable=False,
182
+ filter_action="native",
183
+ sort_action="native",
184
+ style_table=STYLE_TABLE,
185
+ style_header=STYLE_HEADER,
186
+ style_cell=STYLE_CELL,
187
+ style_data_conditional=STYLE_DATA_CONDITIONAL,
188
+ ),
189
+ row_data[row_index]["name"],
190
+ )
191
+
192
+
193
+ def list_project_ids(url):
194
+ response = ListProjectsResponse(**check_request("projects", url))
195
+ return [x.project_id for x in response.projects]
196
+
197
+
198
+ def list_project_dimensions(project_id, url):
199
+ return check_request(f"projects/{project_id}/dimensions", url)["dimensions"]
200
+
201
+
202
+ def list_dimension_records(dimension_id, url):
203
+ return check_request(f"dimensions/records/{dimension_id}", url)["records"]
204
+
205
+
206
+ def check_request(endpoint, url):
207
+ target = f"{url}/{endpoint}"
208
+ response = requests.get(target)
209
+ if response.status_code != 200:
210
+ msg = f"request to {target} failed: {response.status_code}"
211
+ raise Exception(msg)
212
+ return response.json()
213
+
214
+
215
+ if __name__ == "__main__":
216
+ app.run(debug=True)
@@ -0,0 +1,444 @@
1
+ import copy
2
+ import getpass
3
+ import logging
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ from IPython.display import display, HTML
9
+ import ipywidgets as widgets
10
+
11
+ from dsgrid.common import REMOTE_REGISTRY, LOCAL_REGISTRY
12
+ from dsgrid.exceptions import DSGBaseException
13
+ from dsgrid.registry.registry_database import DatabaseConnection
14
+ from dsgrid.registry.registry_manager import RegistryManager
15
+ from dsgrid.loggers import setup_logging
16
+ from dsgrid.spark.types import SparkSession
17
+ from dsgrid.utils.spark import init_spark
18
+
19
+ SS_PROJECT = "https://github.com/dsgrid/dsgrid-project-StandardScenarios/blob/main/dsgrid_project/project.json5"
20
+ RS_DATASET = "https://github.com/dsgrid/dsgrid-project-StandardScenarios/blob/main/dsgrid_project/datasets/modeled/resstock/dataset.json5"
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class RegistrationGui:
26
+ """Provides a UI for registering dsgrid projects and datasets."""
27
+
28
+ DEFAULTS = {
29
+ "remote_registry": REMOTE_REGISTRY,
30
+ "local_registry": LOCAL_REGISTRY,
31
+ "project_file": "",
32
+ "dataset_file": "",
33
+ "dataset_path": "",
34
+ "dimension_mapping_file": "",
35
+ "dimensions_filter": "",
36
+ "log_file": Path(os.environ.get("DSGRID_LOG_FILE_PATH", ".")) / "dsgrid.log",
37
+ "log_message": "",
38
+ "spark_cluster": os.environ.get("SPARK_CLUSTER", "local mode"),
39
+ }
40
+
41
+ def __init__(self, defaults=None):
42
+ self._manager = None
43
+ self._defaults = copy.deepcopy(self.DEFAULTS)
44
+ if defaults is not None:
45
+ self._defaults.update(defaults)
46
+ self._project_ids = [""]
47
+ self._make_widgets()
48
+ self._display_widgets()
49
+ self._tables_out = widgets.Output()
50
+
51
+ @property
52
+ def manager(self):
53
+ return self._manager
54
+
55
+ @property
56
+ def dimension_manager(self):
57
+ return self._manager.dimension_manager
58
+
59
+ @property
60
+ def dimension_mapping_manager(self):
61
+ return self._manager.dimension_mapping_manager
62
+
63
+ @property
64
+ def dataset_manager(self):
65
+ return self._manager.dataset_manager
66
+
67
+ @property
68
+ def project_manager(self):
69
+ return self._manager.project_manager
70
+
71
+ def _make_widgets(self):
72
+ self._main_label = widgets.HTML("<b>dsgrid Registration Tool</b>")
73
+ text_layout = widgets.Layout(width="400px")
74
+ button_layout = widgets.Layout(width="200px")
75
+ self._remote_path_text = widgets.Text(
76
+ str(self._defaults["remote_registry"]),
77
+ description="Remote registry",
78
+ layout=text_layout,
79
+ )
80
+ self._local_path_text = widgets.Text(
81
+ str(self._defaults["local_registry"]),
82
+ description="Local registry",
83
+ layout=text_layout,
84
+ )
85
+ self._spark_cluster_text = widgets.Text(
86
+ self._defaults["spark_cluster"],
87
+ description="Spark cluster",
88
+ layout=text_layout,
89
+ )
90
+ log_file = self._defaults["log_file"]
91
+ # TODO: setup detection of changes to this text box and reconfigure logging
92
+ self._log_file_text = widgets.Text(
93
+ str(log_file),
94
+ description="Log file",
95
+ layout=text_layout,
96
+ )
97
+ self._online_mode_cbox = widgets.Checkbox(
98
+ value=False,
99
+ description="Online mode",
100
+ )
101
+ self._online_mode_cbox.observe(self._on_online_click, names="value")
102
+ self._sync_cbox = widgets.Checkbox(
103
+ value=True,
104
+ description="Sync pull",
105
+ )
106
+ self._load_btn = widgets.Button(description="Load registry", layout=button_layout)
107
+ self._load_btn.on_click(self._on_load_click)
108
+ self._register_project_btn = widgets.Button(
109
+ description="Register project", disabled=True, layout=button_layout
110
+ )
111
+ self._register_project_btn.on_click(self._on_register_project_click)
112
+ self._project_file_text = widgets.Text(
113
+ str(self._defaults["project_file"]),
114
+ description="Project File",
115
+ placeholder="project.json5",
116
+ )
117
+ self._project_file_ex = widgets.HTML(
118
+ f"<a href={SS_PROJECT} target='_blank'>Example: Standard Scenarios</a>"
119
+ )
120
+ self._register_and_submit_dataset_btn = widgets.Button(
121
+ description="Register and submit dataset", disabled=True, layout=button_layout
122
+ )
123
+ self._register_and_submit_dataset_btn.on_click(self._on_register_and_submit_dataset_click)
124
+ self._dataset_file_ex = widgets.HTML(
125
+ f"<a href={RS_DATASET} target='_blank'>Example: ResStock</a>"
126
+ )
127
+ self._dataset_file_text = widgets.Text(
128
+ str(self._defaults["dataset_file"]),
129
+ description="Dataset File",
130
+ placeholder="dataset.json5",
131
+ )
132
+ self._dataset_path_text = widgets.Text(
133
+ self._defaults["dataset_path"],
134
+ description="Dataset Path",
135
+ placeholder="load_data_path",
136
+ )
137
+ self._dimension_mapping_label = widgets.HTML("Dimension mapping file")
138
+ self._dimension_mapping_text = widgets.Text(
139
+ str(self._defaults["dimension_mapping_file"]), placeholder="dimension_mappings.json5"
140
+ )
141
+ self._dataset_project_id_dd = widgets.Dropdown(
142
+ description="Project ID",
143
+ options=self._project_ids,
144
+ value=self._project_ids[0],
145
+ disabled=True,
146
+ )
147
+ self._log_message_label = widgets.HTML("Registration log message")
148
+ self._log_message_text = widgets.Text(
149
+ self._defaults["log_message"], layout=widgets.Layout(width="400px")
150
+ )
151
+ self._show_projects_btn = widgets.Button(
152
+ disabled=True,
153
+ description="Show projects",
154
+ tooltip="Display a table showing all registered projects",
155
+ )
156
+ self._show_projects_btn.on_click(self._on_show_projects_click)
157
+ self._show_datasets_btn = widgets.Button(
158
+ disabled=True,
159
+ description="Show datasets",
160
+ tooltip="Display a table showing all registered datasets",
161
+ )
162
+ self._show_datasets_btn.on_click(self._on_show_datasets_click)
163
+ self._show_dimensions_btn = widgets.Button(
164
+ disabled=True,
165
+ description="Show dimensions",
166
+ tooltip="Display a table showing all registered dimensions",
167
+ )
168
+ self._show_dimensions_btn.on_click(self._on_show_dimensions_click)
169
+ self._dim_filter_message_text = widgets.HTML("Filter dimensions")
170
+ self._dimensions_filter_text = widgets.Text(
171
+ self._defaults["dimensions_filter"], placeholder="Type == geography"
172
+ )
173
+ self._project_dimensions_filter_text = widgets.HTML("Filter dimensions by project")
174
+ self._project_dimensions_filter_dd = widgets.Dropdown(
175
+ options=self._project_ids,
176
+ value=self._project_ids[0],
177
+ disabled=True,
178
+ )
179
+ self._show_dimension_mappings_btn = widgets.Button(
180
+ disabled=True,
181
+ description="Show mappings",
182
+ tooltip="Display a table showing all registered dimension mappings",
183
+ )
184
+ self._show_dimension_mappings_btn.on_click(self._on_show_dimension_mappings_click)
185
+ self._reset_tables_btn = widgets.Button(description="Reset tables")
186
+ self._reset_tables_btn.on_click(self._reset_tables_click)
187
+ self._reset_btn = widgets.Button(description="Reset all")
188
+ self._reset_btn.on_click(self._on_reset_click)
189
+
190
+ # Disabling because these tables are not well-formed.
191
+ # self._project_table = widgets.HTML(value="", description="Projects")
192
+ # self._dataset_table = widgets.HTML(value="", description="Datasets")
193
+ # self._dimension_table = widgets.HTML(value="", description="Dimensions")
194
+ # self._dimension_mapping_table = widgets.HTML(value="", description="Dimension Mappings")
195
+
196
+ def _display_widgets(self):
197
+ registry_box = widgets.VBox(
198
+ (
199
+ self._remote_path_text,
200
+ self._local_path_text,
201
+ self._spark_cluster_text,
202
+ self._log_file_text,
203
+ )
204
+ )
205
+ options_box = widgets.VBox((self._online_mode_cbox, self._sync_cbox))
206
+
207
+ register_project_box = widgets.HBox(
208
+ (self._register_project_btn, self._project_file_text, self._project_file_ex)
209
+ )
210
+ register_and_submit_dataset_box = widgets.HBox(
211
+ (
212
+ self._register_and_submit_dataset_btn,
213
+ widgets.VBox(
214
+ (
215
+ widgets.HBox((self._dataset_file_text, self._dataset_file_ex)),
216
+ self._dataset_path_text,
217
+ widgets.HBox(
218
+ (self._dimension_mapping_label, self._dimension_mapping_text)
219
+ ),
220
+ self._dataset_project_id_dd,
221
+ ),
222
+ ),
223
+ ),
224
+ )
225
+ log_box = widgets.HBox((self._log_message_label, self._log_message_text))
226
+ register_box = widgets.VBox(
227
+ (register_project_box, register_and_submit_dataset_box, log_box)
228
+ )
229
+
230
+ show_dims_box = widgets.HBox(
231
+ (
232
+ self._show_dimensions_btn,
233
+ self._dim_filter_message_text,
234
+ self._dimensions_filter_text,
235
+ self._project_dimensions_filter_text,
236
+ self._project_dimensions_filter_dd,
237
+ )
238
+ )
239
+ show_box = widgets.VBox(
240
+ (
241
+ self._show_projects_btn,
242
+ self._show_datasets_btn,
243
+ show_dims_box,
244
+ self._show_dimension_mappings_btn,
245
+ # self._project_table,
246
+ # self._dataset_table,
247
+ # self._dimension_table,
248
+ # self._dimension_mapping_table,
249
+ self._reset_tables_btn,
250
+ )
251
+ )
252
+
253
+ display(
254
+ self._main_label,
255
+ widgets.HBox((registry_box, options_box)),
256
+ self._load_btn,
257
+ register_box,
258
+ show_box,
259
+ self._reset_btn,
260
+ )
261
+
262
+ def _enable_manager_actions(self):
263
+ self._register_project_btn.disabled = False
264
+ self._register_and_submit_dataset_btn.disabled = False
265
+ self._dataset_project_id_dd.disabled = False
266
+ self._show_projects_btn.disabled = False
267
+ self._show_datasets_btn.disabled = False
268
+ self._show_dimensions_btn.disabled = False
269
+ self._show_dimension_mappings_btn.disabled = False
270
+ self._project_dimensions_filter_dd.disabled = False
271
+ self._update_project_ids()
272
+ out = widgets.Output()
273
+ with out:
274
+ self._on_show_projects_click(self._show_projects_btn)
275
+ self._on_show_datasets_click(self._show_datasets_btn)
276
+ out.clear_output()
277
+
278
+ def _on_online_click(self, _):
279
+ # Syncing is always enabled when in online mode.
280
+ if self._online_mode_cbox.value:
281
+ self._sync_cbox.value = True
282
+ self._sync_cbox.disabled = self._online_mode_cbox.value
283
+
284
+ def _on_load_click(self, _):
285
+ # TODO: We should log to an Output widget that gets updated periodically.
286
+ logger = setup_logging(__name__, self._log_file_text.value, mode="a")
287
+ if (
288
+ self._spark_cluster_text.value not in ("local mode", "")
289
+ and SparkSession.getActiveSession() is None
290
+ ):
291
+ os.environ["SPARK_CLUSTER"] = self._spark_cluster_text.value
292
+ out = widgets.Output()
293
+ with out:
294
+ init_spark()
295
+ out.clear_output()
296
+
297
+ sync = self._sync_cbox.value
298
+ online = self._online_mode_cbox.value
299
+ conn = DatabaseConnection()
300
+ try:
301
+ if sync and not online:
302
+ # This exists only to sync data locally.
303
+ RegistryManager.load(
304
+ conn,
305
+ remote_path=self._remote_path_text.value,
306
+ offline_mode=False,
307
+ user=getpass.getuser(),
308
+ )
309
+ self._manager = RegistryManager.load(
310
+ conn,
311
+ remote_path=self._remote_path_text.value,
312
+ offline_mode=not online,
313
+ user=getpass.getuser(),
314
+ )
315
+ except DSGBaseException:
316
+ logger.exception("Failed to load registry %s", self._local_path_text.value)
317
+ return
318
+
319
+ self._enable_manager_actions()
320
+
321
+ def _update_project_ids(self):
322
+ self._project_ids[1:] = self._manager.project_manager.list_ids()
323
+ self._project_dimensions_filter_dd.options = self._project_ids
324
+ self._project_dimensions_filter_dd.value = self._project_ids[0]
325
+ self._dataset_project_id_dd.options = self._project_ids
326
+ self._dataset_project_id_dd.value = self._project_ids[0]
327
+
328
+ def _on_register_project_click(self, _):
329
+ project_file = Path(self._project_file_text.value)
330
+ if str(project_file) == "":
331
+ print("project_file cannot be empty", file=sys.stderr)
332
+ return
333
+ if not self._registration_pre_check():
334
+ return
335
+ try:
336
+ self._manager.project_manager.register(
337
+ project_file, submitter=getpass.getuser(), log_message=self._log_message_text.value
338
+ )
339
+ except DSGBaseException:
340
+ logger.exception("Failed to register project %s", project_file)
341
+ return
342
+
343
+ self._update_project_ids()
344
+ self._post_registration_handling()
345
+
346
+ def _on_register_and_submit_dataset_click(self, _):
347
+ dataset_file = Path(self._dataset_file_text.value)
348
+ if str(dataset_file) == "":
349
+ print("dataset_file cannot be empty", file=sys.stderr)
350
+ return
351
+ dataset_path = Path(self._dataset_path_text.value)
352
+ if str(dataset_path) == "":
353
+ print("dataset_path cannot be empty", file=sys.stderr)
354
+ return
355
+ dimension_mapping_file = Path(self._dimension_mapping_text.value)
356
+ if str(dimension_mapping_file) == "":
357
+ dimension_mapping_file = None
358
+ project_id = self._dataset_project_id_dd.value
359
+ if project_id == "":
360
+ print("project_id cannot be empty", file=sys.stderr)
361
+ return
362
+ if not self._registration_pre_check():
363
+ return
364
+ try:
365
+ self._manager.project_manager.register_and_submit_dataset(
366
+ dataset_file,
367
+ dataset_path,
368
+ project_id,
369
+ dimension_mapping_file=dimension_mapping_file,
370
+ submitter=getpass.getuser(),
371
+ log_message=self._log_message_text.value,
372
+ )
373
+ except DSGBaseException:
374
+ logger.exception("Failed to register and submit dataset %s", dataset_file)
375
+ return
376
+
377
+ self._post_registration_handling()
378
+ self._update_project_ids()
379
+
380
+ def _registration_pre_check(self):
381
+ log_message = self._log_message_text.value
382
+ if log_message == "":
383
+ print("log_message cannot be empty", file=sys.stderr)
384
+ return False
385
+ return True
386
+
387
+ def _post_registration_handling(self):
388
+ self._log_message_text.value = ""
389
+
390
+ def _on_show_projects_click(self, _):
391
+ table = self._manager.project_manager.show(return_table=True)
392
+ # self._project_table.value = table.get_html_string()
393
+ self._display_table("Projects", table)
394
+
395
+ def _on_show_datasets_click(self, _):
396
+ table = self._manager.dataset_manager.show(return_table=True)
397
+ # self._dataset_table.value = table.get_html_string()
398
+ self._display_table("Datasets", table)
399
+
400
+ def _on_show_dimensions_click(self, _):
401
+ filters = [self._dimensions_filter_text.value]
402
+ if filters == [""]:
403
+ filters = None
404
+ project_id = self._project_dimensions_filter_dd.value
405
+ if project_id == "":
406
+ dimension_ids = None
407
+ else:
408
+ project_config = self._manager.project_manager.get_by_id(project_id)
409
+ dimension_ids = {x.id for x in project_config.base_dimensions}
410
+ for key in project_config.supplemental_dimensions:
411
+ dimension_ids.add(key.id)
412
+
413
+ table = self._manager.dimension_manager.show(
414
+ filters=filters, dimension_ids=dimension_ids, return_table=True
415
+ )
416
+ self._display_table("Dimensions", table)
417
+
418
+ def _display_table(self, name, table):
419
+ self._tables_out.clear_output()
420
+ self._tables_out = widgets.Output()
421
+ with self._tables_out:
422
+ display(HTML(f"<b>{name}</b>"))
423
+ display(HTML(table.get_html_string()))
424
+ display(self._tables_out)
425
+
426
+ def _on_show_dimension_mappings_click(self, _):
427
+ table = self._manager.dimension_mapping_manager.show(return_table=True)
428
+ # self._dimension_mapping_table.value = table.get_html_string()
429
+ self._display_table("Dimension Mappings", table)
430
+
431
+ def _reset_tables_click(self, _):
432
+ # self._project_table.value = ""
433
+ # self._dataset_table.value = ""
434
+ # self._dimension_table.value = ""
435
+ # self._dimension_mapping_table.value = ""
436
+ self._tables_out.clear_output()
437
+
438
+ def _on_reset_click(self, _):
439
+ for val in self.__dict__.values():
440
+ if isinstance(val, widgets.Widget):
441
+ val.close_all()
442
+ self._make_widgets()
443
+ self._display_widgets()
444
+ self._enable_manager_actions()
dsgrid/chronify.py ADDED
@@ -0,0 +1,32 @@
1
+ from contextlib import contextmanager
2
+ from pathlib import Path
3
+ from typing import Generator
4
+
5
+ import chronify
6
+
7
+ import dsgrid
8
+ from dsgrid.common import BackendEngine
9
+
10
+
11
+ @contextmanager
12
+ def create_store(store_file: Path) -> Generator[chronify.Store, None, None]:
13
+ """Create a chronify Store based on the dsgrid runtime configuration."""
14
+ config = dsgrid.runtime_config
15
+ if config.backend_engine == BackendEngine.SPARK:
16
+ store = chronify.Store.create_new_hive_store(config.thrift_server_url)
17
+ else:
18
+ store = chronify.Store.create_file_db(store_file)
19
+ try:
20
+ yield store
21
+ finally:
22
+ store.dispose()
23
+
24
+
25
+ @contextmanager
26
+ def create_in_memory_store() -> Generator[chronify.Store, None, None]:
27
+ """Create an in-memory chronify Store."""
28
+ store = chronify.Store.create_in_memory_db()
29
+ try:
30
+ yield store
31
+ finally:
32
+ store.dispose()
dsgrid/cli/__init__.py ADDED
File without changes