dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backend.py +93 -0
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +419 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +32 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +176 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +157 -0
- dsgrid/cli/dsgrid_admin.py +92 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +729 -0
- dsgrid/cli/registry.py +1862 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +36 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +194 -0
- dsgrid/config/common.py +142 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +907 -0
- dsgrid/config/dataset_schema_handler_factory.py +46 -0
- dsgrid/config/date_time_dimension_config.py +136 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +350 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +1025 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/file_schema.py +190 -0
- dsgrid/config/index_time_dimension_config.py +80 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1462 -0
- dsgrid/config/registration_models.py +188 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +192 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +945 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
- dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +51 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +230 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +252 -0
- dsgrid/dimension/time.py +352 -0
- dsgrid/dimension/time_utils.py +103 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +388 -0
- dsgrid/query/models.py +728 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +994 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +165 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +74 -0
- dsgrid/registry/dataset_config_generator.py +158 -0
- dsgrid/registry/dataset_registry_manager.py +950 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +207 -0
- dsgrid/registry/filesystem_data_store.py +150 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1623 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +667 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +558 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/rust_ext/__init__.py +14 -0
- dsgrid/rust_ext/find_minimal_patterns.py +129 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +589 -0
- dsgrid/spark/types.py +110 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +140 -0
- dsgrid/tests/make_us_data_registry.py +265 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +830 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +65 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +221 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
- dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
- dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import requests
|
|
3
|
+
from dash import Dash, dash_table, dcc, html, Input, Output, State
|
|
4
|
+
import dash_bootstrap_components as dbc
|
|
5
|
+
|
|
6
|
+
from dash.exceptions import PreventUpdate
|
|
7
|
+
|
|
8
|
+
from dsgrid.api.response_models import ListProjectsResponse
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
DSGRID_API_URL = "http://127.0.0.1:8000"
|
|
12
|
+
|
|
13
|
+
# Copied DataTable styles from https://gist.github.com/marcogoldin/8fc4c3945cef17ca38d55c4e17ebbbe6
|
|
14
|
+
STYLE_TABLE = {
|
|
15
|
+
"fontFamily": '-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,'
|
|
16
|
+
'"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color '
|
|
17
|
+
'Emoji"'
|
|
18
|
+
}
|
|
19
|
+
STYLE_HEADER = {"backgroundColor": "white", "fontWeight": "bold", "padding": "0.75rem"}
|
|
20
|
+
STYLE_CELL = {
|
|
21
|
+
"fontFamily": STYLE_TABLE["fontFamily"],
|
|
22
|
+
"fontWeight": "400",
|
|
23
|
+
"lineHeight": "1.5",
|
|
24
|
+
"color": "#212529",
|
|
25
|
+
"textAlign": "left",
|
|
26
|
+
"whiteSpace": "normal",
|
|
27
|
+
"height": "auto",
|
|
28
|
+
"padding": "0.75rem",
|
|
29
|
+
"border": "1px solid #dee2e6",
|
|
30
|
+
"verticalAlign": "top",
|
|
31
|
+
}
|
|
32
|
+
STYLE_DATA_CONDITIONAL = [{"if": {"row_index": "odd"}, "backgroundColor": "#f8f9fa"}]
|
|
33
|
+
|
|
34
|
+
app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])
|
|
35
|
+
app.layout = dbc.Container(
|
|
36
|
+
[
|
|
37
|
+
dbc.Alert("dsgrid Project Viewer", color="info"),
|
|
38
|
+
html.Div(
|
|
39
|
+
[
|
|
40
|
+
"URL: ",
|
|
41
|
+
dcc.Input(
|
|
42
|
+
id="url_text",
|
|
43
|
+
value=DSGRID_API_URL,
|
|
44
|
+
),
|
|
45
|
+
html.Label("Status:", style={"margin-left": "15px", "margin-right": "15px"}),
|
|
46
|
+
dcc.Input(
|
|
47
|
+
id="status_text",
|
|
48
|
+
value="disconnected",
|
|
49
|
+
),
|
|
50
|
+
dbc.Button(
|
|
51
|
+
"Connect",
|
|
52
|
+
outline=True,
|
|
53
|
+
color="primary",
|
|
54
|
+
id="connect_button",
|
|
55
|
+
n_clicks=0,
|
|
56
|
+
style={"margin-left": "15px"},
|
|
57
|
+
),
|
|
58
|
+
]
|
|
59
|
+
),
|
|
60
|
+
html.Br(),
|
|
61
|
+
html.Div(
|
|
62
|
+
[
|
|
63
|
+
"Select a project",
|
|
64
|
+
dcc.Dropdown([], "", id="project_dd"),
|
|
65
|
+
],
|
|
66
|
+
),
|
|
67
|
+
html.Br(),
|
|
68
|
+
dbc.Button(
|
|
69
|
+
"List dimensions",
|
|
70
|
+
outline=True,
|
|
71
|
+
color="primary",
|
|
72
|
+
id="list_dimensions_button",
|
|
73
|
+
n_clicks=0,
|
|
74
|
+
),
|
|
75
|
+
html.Div(
|
|
76
|
+
[
|
|
77
|
+
dash_table.DataTable([], [], id="dimensions_table"),
|
|
78
|
+
html.Div(id="dimensions_table_container"),
|
|
79
|
+
],
|
|
80
|
+
),
|
|
81
|
+
html.Br(),
|
|
82
|
+
html.H5("Dimension Records"),
|
|
83
|
+
dcc.Input(
|
|
84
|
+
id="dimension_name",
|
|
85
|
+
value="",
|
|
86
|
+
readOnly=True,
|
|
87
|
+
),
|
|
88
|
+
html.Div(
|
|
89
|
+
[
|
|
90
|
+
dash_table.DataTable([], [], id="dimension_records_table"),
|
|
91
|
+
html.Div(id="dimension_records_table_container"),
|
|
92
|
+
]
|
|
93
|
+
),
|
|
94
|
+
],
|
|
95
|
+
className="m-5",
|
|
96
|
+
# fluid=True,
|
|
97
|
+
# Per the docs, this should be set to True. However, that causes a display issue where the
|
|
98
|
+
# right part of the GUI is hidden and so you have to scroll horizontally to see it.
|
|
99
|
+
# When passing the allowed parameters, like lg, xl, xxl, the width is too small.
|
|
100
|
+
# Passing a non-supported string makes it work. This will almost certainly break in the future.
|
|
101
|
+
fluid="invalidparameter",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@app.callback(
|
|
106
|
+
Output("project_dd", "options"),
|
|
107
|
+
Output("status_text", "value"),
|
|
108
|
+
Input("connect_button", "n_clicks"),
|
|
109
|
+
State("url_text", "value"),
|
|
110
|
+
)
|
|
111
|
+
def on_connect(n_clicks, url):
|
|
112
|
+
if n_clicks is None:
|
|
113
|
+
raise PreventUpdate
|
|
114
|
+
return list_project_ids(url), "connected"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@app.callback(
|
|
118
|
+
Output("project_dd", "value"),
|
|
119
|
+
Input("project_dd", "options"),
|
|
120
|
+
)
|
|
121
|
+
def on_project_options_change(options):
|
|
122
|
+
if options:
|
|
123
|
+
return options[0]
|
|
124
|
+
return ""
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@app.callback(
|
|
128
|
+
Output("dimensions_table_container", "children"),
|
|
129
|
+
Input("list_dimensions_button", "n_clicks"),
|
|
130
|
+
State("project_dd", "value"),
|
|
131
|
+
State("url_text", "value"),
|
|
132
|
+
)
|
|
133
|
+
def on_list_dimensions(n_clicks, project_id, url):
|
|
134
|
+
if n_clicks is None or project_id == "":
|
|
135
|
+
raise PreventUpdate
|
|
136
|
+
table = list_project_dimensions(project_id, url)
|
|
137
|
+
return dash_table.DataTable(
|
|
138
|
+
table,
|
|
139
|
+
[{"name": x, "id": x} for x in table[0].keys()],
|
|
140
|
+
id="dimensions_table",
|
|
141
|
+
editable=False,
|
|
142
|
+
filter_action="native",
|
|
143
|
+
sort_action="native",
|
|
144
|
+
row_selectable="single",
|
|
145
|
+
selected_rows=[],
|
|
146
|
+
style_table=STYLE_TABLE,
|
|
147
|
+
style_header=STYLE_HEADER,
|
|
148
|
+
style_cell=STYLE_CELL,
|
|
149
|
+
style_data_conditional=STYLE_DATA_CONDITIONAL,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@app.callback(
|
|
154
|
+
Output("dimension_records_table_container", "children"),
|
|
155
|
+
Output("dimension_name", "value"),
|
|
156
|
+
Input("dimensions_table", "derived_viewport_selected_rows"),
|
|
157
|
+
Input("dimensions_table", "derived_viewport_data"),
|
|
158
|
+
State("url_text", "value"),
|
|
159
|
+
)
|
|
160
|
+
def on_list_dimension_records(row_indexes, row_data, url):
|
|
161
|
+
if not row_indexes:
|
|
162
|
+
raise PreventUpdate
|
|
163
|
+
|
|
164
|
+
row_index = row_indexes[0]
|
|
165
|
+
records = list_dimension_records(row_data[row_index]["dimension_id"], url)
|
|
166
|
+
if not records:
|
|
167
|
+
raise PreventUpdate
|
|
168
|
+
|
|
169
|
+
df = pd.DataFrame.from_records(records)
|
|
170
|
+
columns = []
|
|
171
|
+
for column in records[0].keys():
|
|
172
|
+
num_unique = df[column].nunique()
|
|
173
|
+
new_name = f"{column} ({num_unique} unique)"
|
|
174
|
+
columns.append({"name": new_name, "id": column})
|
|
175
|
+
|
|
176
|
+
return (
|
|
177
|
+
dash_table.DataTable(
|
|
178
|
+
records,
|
|
179
|
+
columns,
|
|
180
|
+
id="dimension_records_table",
|
|
181
|
+
editable=False,
|
|
182
|
+
filter_action="native",
|
|
183
|
+
sort_action="native",
|
|
184
|
+
style_table=STYLE_TABLE,
|
|
185
|
+
style_header=STYLE_HEADER,
|
|
186
|
+
style_cell=STYLE_CELL,
|
|
187
|
+
style_data_conditional=STYLE_DATA_CONDITIONAL,
|
|
188
|
+
),
|
|
189
|
+
row_data[row_index]["name"],
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def list_project_ids(url):
|
|
194
|
+
response = ListProjectsResponse(**check_request("projects", url))
|
|
195
|
+
return [x.project_id for x in response.projects]
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def list_project_dimensions(project_id, url):
|
|
199
|
+
return check_request(f"projects/{project_id}/dimensions", url)["dimensions"]
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def list_dimension_records(dimension_id, url):
|
|
203
|
+
return check_request(f"dimensions/records/{dimension_id}", url)["records"]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def check_request(endpoint, url):
|
|
207
|
+
target = f"{url}/{endpoint}"
|
|
208
|
+
response = requests.get(target)
|
|
209
|
+
if response.status_code != 200:
|
|
210
|
+
msg = f"request to {target} failed: {response.status_code}"
|
|
211
|
+
raise Exception(msg)
|
|
212
|
+
return response.json()
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if __name__ == "__main__":
|
|
216
|
+
app.run(debug=True)
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import getpass
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from IPython.display import display, HTML
|
|
9
|
+
import ipywidgets as widgets
|
|
10
|
+
|
|
11
|
+
from dsgrid.common import REMOTE_REGISTRY, LOCAL_REGISTRY
|
|
12
|
+
from dsgrid.exceptions import DSGBaseException
|
|
13
|
+
from dsgrid.registry.registry_database import DatabaseConnection
|
|
14
|
+
from dsgrid.registry.registry_manager import RegistryManager
|
|
15
|
+
from dsgrid.loggers import setup_logging
|
|
16
|
+
from dsgrid.spark.types import SparkSession
|
|
17
|
+
from dsgrid.utils.spark import init_spark
|
|
18
|
+
|
|
19
|
+
SS_PROJECT = "https://github.com/dsgrid/dsgrid-project-StandardScenarios/blob/main/dsgrid_project/project.json5"
|
|
20
|
+
RS_DATASET = "https://github.com/dsgrid/dsgrid-project-StandardScenarios/blob/main/dsgrid_project/datasets/modeled/resstock/dataset.json5"
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RegistrationGui:
|
|
26
|
+
"""Provides a UI for registering dsgrid projects and datasets."""
|
|
27
|
+
|
|
28
|
+
DEFAULTS = {
|
|
29
|
+
"remote_registry": REMOTE_REGISTRY,
|
|
30
|
+
"local_registry": LOCAL_REGISTRY,
|
|
31
|
+
"project_file": "",
|
|
32
|
+
"dataset_file": "",
|
|
33
|
+
"dataset_path": "",
|
|
34
|
+
"dimension_mapping_file": "",
|
|
35
|
+
"dimensions_filter": "",
|
|
36
|
+
"log_file": Path(os.environ.get("DSGRID_LOG_FILE_PATH", ".")) / "dsgrid.log",
|
|
37
|
+
"log_message": "",
|
|
38
|
+
"spark_cluster": os.environ.get("SPARK_CLUSTER", "local mode"),
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
def __init__(self, defaults=None):
|
|
42
|
+
self._manager = None
|
|
43
|
+
self._defaults = copy.deepcopy(self.DEFAULTS)
|
|
44
|
+
if defaults is not None:
|
|
45
|
+
self._defaults.update(defaults)
|
|
46
|
+
self._project_ids = [""]
|
|
47
|
+
self._make_widgets()
|
|
48
|
+
self._display_widgets()
|
|
49
|
+
self._tables_out = widgets.Output()
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def manager(self):
|
|
53
|
+
return self._manager
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def dimension_manager(self):
|
|
57
|
+
return self._manager.dimension_manager
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def dimension_mapping_manager(self):
|
|
61
|
+
return self._manager.dimension_mapping_manager
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def dataset_manager(self):
|
|
65
|
+
return self._manager.dataset_manager
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def project_manager(self):
|
|
69
|
+
return self._manager.project_manager
|
|
70
|
+
|
|
71
|
+
def _make_widgets(self):
|
|
72
|
+
self._main_label = widgets.HTML("<b>dsgrid Registration Tool</b>")
|
|
73
|
+
text_layout = widgets.Layout(width="400px")
|
|
74
|
+
button_layout = widgets.Layout(width="200px")
|
|
75
|
+
self._remote_path_text = widgets.Text(
|
|
76
|
+
str(self._defaults["remote_registry"]),
|
|
77
|
+
description="Remote registry",
|
|
78
|
+
layout=text_layout,
|
|
79
|
+
)
|
|
80
|
+
self._local_path_text = widgets.Text(
|
|
81
|
+
str(self._defaults["local_registry"]),
|
|
82
|
+
description="Local registry",
|
|
83
|
+
layout=text_layout,
|
|
84
|
+
)
|
|
85
|
+
self._spark_cluster_text = widgets.Text(
|
|
86
|
+
self._defaults["spark_cluster"],
|
|
87
|
+
description="Spark cluster",
|
|
88
|
+
layout=text_layout,
|
|
89
|
+
)
|
|
90
|
+
log_file = self._defaults["log_file"]
|
|
91
|
+
# TODO: setup detection of changes to this text box and reconfigure logging
|
|
92
|
+
self._log_file_text = widgets.Text(
|
|
93
|
+
str(log_file),
|
|
94
|
+
description="Log file",
|
|
95
|
+
layout=text_layout,
|
|
96
|
+
)
|
|
97
|
+
self._online_mode_cbox = widgets.Checkbox(
|
|
98
|
+
value=False,
|
|
99
|
+
description="Online mode",
|
|
100
|
+
)
|
|
101
|
+
self._online_mode_cbox.observe(self._on_online_click, names="value")
|
|
102
|
+
self._sync_cbox = widgets.Checkbox(
|
|
103
|
+
value=True,
|
|
104
|
+
description="Sync pull",
|
|
105
|
+
)
|
|
106
|
+
self._load_btn = widgets.Button(description="Load registry", layout=button_layout)
|
|
107
|
+
self._load_btn.on_click(self._on_load_click)
|
|
108
|
+
self._register_project_btn = widgets.Button(
|
|
109
|
+
description="Register project", disabled=True, layout=button_layout
|
|
110
|
+
)
|
|
111
|
+
self._register_project_btn.on_click(self._on_register_project_click)
|
|
112
|
+
self._project_file_text = widgets.Text(
|
|
113
|
+
str(self._defaults["project_file"]),
|
|
114
|
+
description="Project File",
|
|
115
|
+
placeholder="project.json5",
|
|
116
|
+
)
|
|
117
|
+
self._project_file_ex = widgets.HTML(
|
|
118
|
+
f"<a href={SS_PROJECT} target='_blank'>Example: Standard Scenarios</a>"
|
|
119
|
+
)
|
|
120
|
+
self._register_and_submit_dataset_btn = widgets.Button(
|
|
121
|
+
description="Register and submit dataset", disabled=True, layout=button_layout
|
|
122
|
+
)
|
|
123
|
+
self._register_and_submit_dataset_btn.on_click(self._on_register_and_submit_dataset_click)
|
|
124
|
+
self._dataset_file_ex = widgets.HTML(
|
|
125
|
+
f"<a href={RS_DATASET} target='_blank'>Example: ResStock</a>"
|
|
126
|
+
)
|
|
127
|
+
self._dataset_file_text = widgets.Text(
|
|
128
|
+
str(self._defaults["dataset_file"]),
|
|
129
|
+
description="Dataset File",
|
|
130
|
+
placeholder="dataset.json5",
|
|
131
|
+
)
|
|
132
|
+
self._dataset_path_text = widgets.Text(
|
|
133
|
+
self._defaults["dataset_path"],
|
|
134
|
+
description="Dataset Path",
|
|
135
|
+
placeholder="load_data_path",
|
|
136
|
+
)
|
|
137
|
+
self._dimension_mapping_label = widgets.HTML("Dimension mapping file")
|
|
138
|
+
self._dimension_mapping_text = widgets.Text(
|
|
139
|
+
str(self._defaults["dimension_mapping_file"]), placeholder="dimension_mappings.json5"
|
|
140
|
+
)
|
|
141
|
+
self._dataset_project_id_dd = widgets.Dropdown(
|
|
142
|
+
description="Project ID",
|
|
143
|
+
options=self._project_ids,
|
|
144
|
+
value=self._project_ids[0],
|
|
145
|
+
disabled=True,
|
|
146
|
+
)
|
|
147
|
+
self._log_message_label = widgets.HTML("Registration log message")
|
|
148
|
+
self._log_message_text = widgets.Text(
|
|
149
|
+
self._defaults["log_message"], layout=widgets.Layout(width="400px")
|
|
150
|
+
)
|
|
151
|
+
self._show_projects_btn = widgets.Button(
|
|
152
|
+
disabled=True,
|
|
153
|
+
description="Show projects",
|
|
154
|
+
tooltip="Display a table showing all registered projects",
|
|
155
|
+
)
|
|
156
|
+
self._show_projects_btn.on_click(self._on_show_projects_click)
|
|
157
|
+
self._show_datasets_btn = widgets.Button(
|
|
158
|
+
disabled=True,
|
|
159
|
+
description="Show datasets",
|
|
160
|
+
tooltip="Display a table showing all registered datasets",
|
|
161
|
+
)
|
|
162
|
+
self._show_datasets_btn.on_click(self._on_show_datasets_click)
|
|
163
|
+
self._show_dimensions_btn = widgets.Button(
|
|
164
|
+
disabled=True,
|
|
165
|
+
description="Show dimensions",
|
|
166
|
+
tooltip="Display a table showing all registered dimensions",
|
|
167
|
+
)
|
|
168
|
+
self._show_dimensions_btn.on_click(self._on_show_dimensions_click)
|
|
169
|
+
self._dim_filter_message_text = widgets.HTML("Filter dimensions")
|
|
170
|
+
self._dimensions_filter_text = widgets.Text(
|
|
171
|
+
self._defaults["dimensions_filter"], placeholder="Type == geography"
|
|
172
|
+
)
|
|
173
|
+
self._project_dimensions_filter_text = widgets.HTML("Filter dimensions by project")
|
|
174
|
+
self._project_dimensions_filter_dd = widgets.Dropdown(
|
|
175
|
+
options=self._project_ids,
|
|
176
|
+
value=self._project_ids[0],
|
|
177
|
+
disabled=True,
|
|
178
|
+
)
|
|
179
|
+
self._show_dimension_mappings_btn = widgets.Button(
|
|
180
|
+
disabled=True,
|
|
181
|
+
description="Show mappings",
|
|
182
|
+
tooltip="Display a table showing all registered dimension mappings",
|
|
183
|
+
)
|
|
184
|
+
self._show_dimension_mappings_btn.on_click(self._on_show_dimension_mappings_click)
|
|
185
|
+
self._reset_tables_btn = widgets.Button(description="Reset tables")
|
|
186
|
+
self._reset_tables_btn.on_click(self._reset_tables_click)
|
|
187
|
+
self._reset_btn = widgets.Button(description="Reset all")
|
|
188
|
+
self._reset_btn.on_click(self._on_reset_click)
|
|
189
|
+
|
|
190
|
+
# Disabling because these tables are not well-formed.
|
|
191
|
+
# self._project_table = widgets.HTML(value="", description="Projects")
|
|
192
|
+
# self._dataset_table = widgets.HTML(value="", description="Datasets")
|
|
193
|
+
# self._dimension_table = widgets.HTML(value="", description="Dimensions")
|
|
194
|
+
# self._dimension_mapping_table = widgets.HTML(value="", description="Dimension Mappings")
|
|
195
|
+
|
|
196
|
+
def _display_widgets(self):
|
|
197
|
+
registry_box = widgets.VBox(
|
|
198
|
+
(
|
|
199
|
+
self._remote_path_text,
|
|
200
|
+
self._local_path_text,
|
|
201
|
+
self._spark_cluster_text,
|
|
202
|
+
self._log_file_text,
|
|
203
|
+
)
|
|
204
|
+
)
|
|
205
|
+
options_box = widgets.VBox((self._online_mode_cbox, self._sync_cbox))
|
|
206
|
+
|
|
207
|
+
register_project_box = widgets.HBox(
|
|
208
|
+
(self._register_project_btn, self._project_file_text, self._project_file_ex)
|
|
209
|
+
)
|
|
210
|
+
register_and_submit_dataset_box = widgets.HBox(
|
|
211
|
+
(
|
|
212
|
+
self._register_and_submit_dataset_btn,
|
|
213
|
+
widgets.VBox(
|
|
214
|
+
(
|
|
215
|
+
widgets.HBox((self._dataset_file_text, self._dataset_file_ex)),
|
|
216
|
+
self._dataset_path_text,
|
|
217
|
+
widgets.HBox(
|
|
218
|
+
(self._dimension_mapping_label, self._dimension_mapping_text)
|
|
219
|
+
),
|
|
220
|
+
self._dataset_project_id_dd,
|
|
221
|
+
),
|
|
222
|
+
),
|
|
223
|
+
),
|
|
224
|
+
)
|
|
225
|
+
log_box = widgets.HBox((self._log_message_label, self._log_message_text))
|
|
226
|
+
register_box = widgets.VBox(
|
|
227
|
+
(register_project_box, register_and_submit_dataset_box, log_box)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
show_dims_box = widgets.HBox(
|
|
231
|
+
(
|
|
232
|
+
self._show_dimensions_btn,
|
|
233
|
+
self._dim_filter_message_text,
|
|
234
|
+
self._dimensions_filter_text,
|
|
235
|
+
self._project_dimensions_filter_text,
|
|
236
|
+
self._project_dimensions_filter_dd,
|
|
237
|
+
)
|
|
238
|
+
)
|
|
239
|
+
show_box = widgets.VBox(
|
|
240
|
+
(
|
|
241
|
+
self._show_projects_btn,
|
|
242
|
+
self._show_datasets_btn,
|
|
243
|
+
show_dims_box,
|
|
244
|
+
self._show_dimension_mappings_btn,
|
|
245
|
+
# self._project_table,
|
|
246
|
+
# self._dataset_table,
|
|
247
|
+
# self._dimension_table,
|
|
248
|
+
# self._dimension_mapping_table,
|
|
249
|
+
self._reset_tables_btn,
|
|
250
|
+
)
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
display(
|
|
254
|
+
self._main_label,
|
|
255
|
+
widgets.HBox((registry_box, options_box)),
|
|
256
|
+
self._load_btn,
|
|
257
|
+
register_box,
|
|
258
|
+
show_box,
|
|
259
|
+
self._reset_btn,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def _enable_manager_actions(self):
|
|
263
|
+
self._register_project_btn.disabled = False
|
|
264
|
+
self._register_and_submit_dataset_btn.disabled = False
|
|
265
|
+
self._dataset_project_id_dd.disabled = False
|
|
266
|
+
self._show_projects_btn.disabled = False
|
|
267
|
+
self._show_datasets_btn.disabled = False
|
|
268
|
+
self._show_dimensions_btn.disabled = False
|
|
269
|
+
self._show_dimension_mappings_btn.disabled = False
|
|
270
|
+
self._project_dimensions_filter_dd.disabled = False
|
|
271
|
+
self._update_project_ids()
|
|
272
|
+
out = widgets.Output()
|
|
273
|
+
with out:
|
|
274
|
+
self._on_show_projects_click(self._show_projects_btn)
|
|
275
|
+
self._on_show_datasets_click(self._show_datasets_btn)
|
|
276
|
+
out.clear_output()
|
|
277
|
+
|
|
278
|
+
def _on_online_click(self, _):
|
|
279
|
+
# Syncing is always enabled when in online mode.
|
|
280
|
+
if self._online_mode_cbox.value:
|
|
281
|
+
self._sync_cbox.value = True
|
|
282
|
+
self._sync_cbox.disabled = self._online_mode_cbox.value
|
|
283
|
+
|
|
284
|
+
def _on_load_click(self, _):
|
|
285
|
+
# TODO: We should log to an Output widget that gets updated periodically.
|
|
286
|
+
logger = setup_logging(__name__, self._log_file_text.value, mode="a")
|
|
287
|
+
if (
|
|
288
|
+
self._spark_cluster_text.value not in ("local mode", "")
|
|
289
|
+
and SparkSession.getActiveSession() is None
|
|
290
|
+
):
|
|
291
|
+
os.environ["SPARK_CLUSTER"] = self._spark_cluster_text.value
|
|
292
|
+
out = widgets.Output()
|
|
293
|
+
with out:
|
|
294
|
+
init_spark()
|
|
295
|
+
out.clear_output()
|
|
296
|
+
|
|
297
|
+
sync = self._sync_cbox.value
|
|
298
|
+
online = self._online_mode_cbox.value
|
|
299
|
+
conn = DatabaseConnection()
|
|
300
|
+
try:
|
|
301
|
+
if sync and not online:
|
|
302
|
+
# This exists only to sync data locally.
|
|
303
|
+
RegistryManager.load(
|
|
304
|
+
conn,
|
|
305
|
+
remote_path=self._remote_path_text.value,
|
|
306
|
+
offline_mode=False,
|
|
307
|
+
user=getpass.getuser(),
|
|
308
|
+
)
|
|
309
|
+
self._manager = RegistryManager.load(
|
|
310
|
+
conn,
|
|
311
|
+
remote_path=self._remote_path_text.value,
|
|
312
|
+
offline_mode=not online,
|
|
313
|
+
user=getpass.getuser(),
|
|
314
|
+
)
|
|
315
|
+
except DSGBaseException:
|
|
316
|
+
logger.exception("Failed to load registry %s", self._local_path_text.value)
|
|
317
|
+
return
|
|
318
|
+
|
|
319
|
+
self._enable_manager_actions()
|
|
320
|
+
|
|
321
|
+
def _update_project_ids(self):
|
|
322
|
+
self._project_ids[1:] = self._manager.project_manager.list_ids()
|
|
323
|
+
self._project_dimensions_filter_dd.options = self._project_ids
|
|
324
|
+
self._project_dimensions_filter_dd.value = self._project_ids[0]
|
|
325
|
+
self._dataset_project_id_dd.options = self._project_ids
|
|
326
|
+
self._dataset_project_id_dd.value = self._project_ids[0]
|
|
327
|
+
|
|
328
|
+
def _on_register_project_click(self, _):
|
|
329
|
+
project_file = Path(self._project_file_text.value)
|
|
330
|
+
if str(project_file) == "":
|
|
331
|
+
print("project_file cannot be empty", file=sys.stderr)
|
|
332
|
+
return
|
|
333
|
+
if not self._registration_pre_check():
|
|
334
|
+
return
|
|
335
|
+
try:
|
|
336
|
+
self._manager.project_manager.register(
|
|
337
|
+
project_file, submitter=getpass.getuser(), log_message=self._log_message_text.value
|
|
338
|
+
)
|
|
339
|
+
except DSGBaseException:
|
|
340
|
+
logger.exception("Failed to register project %s", project_file)
|
|
341
|
+
return
|
|
342
|
+
|
|
343
|
+
self._update_project_ids()
|
|
344
|
+
self._post_registration_handling()
|
|
345
|
+
|
|
346
|
+
def _on_register_and_submit_dataset_click(self, _):
|
|
347
|
+
dataset_file = Path(self._dataset_file_text.value)
|
|
348
|
+
if str(dataset_file) == "":
|
|
349
|
+
print("dataset_file cannot be empty", file=sys.stderr)
|
|
350
|
+
return
|
|
351
|
+
dataset_path = Path(self._dataset_path_text.value)
|
|
352
|
+
if str(dataset_path) == "":
|
|
353
|
+
print("dataset_path cannot be empty", file=sys.stderr)
|
|
354
|
+
return
|
|
355
|
+
dimension_mapping_file = Path(self._dimension_mapping_text.value)
|
|
356
|
+
if str(dimension_mapping_file) == "":
|
|
357
|
+
dimension_mapping_file = None
|
|
358
|
+
project_id = self._dataset_project_id_dd.value
|
|
359
|
+
if project_id == "":
|
|
360
|
+
print("project_id cannot be empty", file=sys.stderr)
|
|
361
|
+
return
|
|
362
|
+
if not self._registration_pre_check():
|
|
363
|
+
return
|
|
364
|
+
try:
|
|
365
|
+
self._manager.project_manager.register_and_submit_dataset(
|
|
366
|
+
dataset_file,
|
|
367
|
+
dataset_path,
|
|
368
|
+
project_id,
|
|
369
|
+
dimension_mapping_file=dimension_mapping_file,
|
|
370
|
+
submitter=getpass.getuser(),
|
|
371
|
+
log_message=self._log_message_text.value,
|
|
372
|
+
)
|
|
373
|
+
except DSGBaseException:
|
|
374
|
+
logger.exception("Failed to register and submit dataset %s", dataset_file)
|
|
375
|
+
return
|
|
376
|
+
|
|
377
|
+
self._post_registration_handling()
|
|
378
|
+
self._update_project_ids()
|
|
379
|
+
|
|
380
|
+
def _registration_pre_check(self):
|
|
381
|
+
log_message = self._log_message_text.value
|
|
382
|
+
if log_message == "":
|
|
383
|
+
print("log_message cannot be empty", file=sys.stderr)
|
|
384
|
+
return False
|
|
385
|
+
return True
|
|
386
|
+
|
|
387
|
+
def _post_registration_handling(self):
|
|
388
|
+
self._log_message_text.value = ""
|
|
389
|
+
|
|
390
|
+
def _on_show_projects_click(self, _):
|
|
391
|
+
table = self._manager.project_manager.show(return_table=True)
|
|
392
|
+
# self._project_table.value = table.get_html_string()
|
|
393
|
+
self._display_table("Projects", table)
|
|
394
|
+
|
|
395
|
+
def _on_show_datasets_click(self, _):
|
|
396
|
+
table = self._manager.dataset_manager.show(return_table=True)
|
|
397
|
+
# self._dataset_table.value = table.get_html_string()
|
|
398
|
+
self._display_table("Datasets", table)
|
|
399
|
+
|
|
400
|
+
def _on_show_dimensions_click(self, _):
|
|
401
|
+
filters = [self._dimensions_filter_text.value]
|
|
402
|
+
if filters == [""]:
|
|
403
|
+
filters = None
|
|
404
|
+
project_id = self._project_dimensions_filter_dd.value
|
|
405
|
+
if project_id == "":
|
|
406
|
+
dimension_ids = None
|
|
407
|
+
else:
|
|
408
|
+
project_config = self._manager.project_manager.get_by_id(project_id)
|
|
409
|
+
dimension_ids = {x.id for x in project_config.base_dimensions}
|
|
410
|
+
for key in project_config.supplemental_dimensions:
|
|
411
|
+
dimension_ids.add(key.id)
|
|
412
|
+
|
|
413
|
+
table = self._manager.dimension_manager.show(
|
|
414
|
+
filters=filters, dimension_ids=dimension_ids, return_table=True
|
|
415
|
+
)
|
|
416
|
+
self._display_table("Dimensions", table)
|
|
417
|
+
|
|
418
|
+
def _display_table(self, name, table):
|
|
419
|
+
self._tables_out.clear_output()
|
|
420
|
+
self._tables_out = widgets.Output()
|
|
421
|
+
with self._tables_out:
|
|
422
|
+
display(HTML(f"<b>{name}</b>"))
|
|
423
|
+
display(HTML(table.get_html_string()))
|
|
424
|
+
display(self._tables_out)
|
|
425
|
+
|
|
426
|
+
def _on_show_dimension_mappings_click(self, _):
|
|
427
|
+
table = self._manager.dimension_mapping_manager.show(return_table=True)
|
|
428
|
+
# self._dimension_mapping_table.value = table.get_html_string()
|
|
429
|
+
self._display_table("Dimension Mappings", table)
|
|
430
|
+
|
|
431
|
+
def _reset_tables_click(self, _):
|
|
432
|
+
# self._project_table.value = ""
|
|
433
|
+
# self._dataset_table.value = ""
|
|
434
|
+
# self._dimension_table.value = ""
|
|
435
|
+
# self._dimension_mapping_table.value = ""
|
|
436
|
+
self._tables_out.clear_output()
|
|
437
|
+
|
|
438
|
+
def _on_reset_click(self, _):
|
|
439
|
+
for val in self.__dict__.values():
|
|
440
|
+
if isinstance(val, widgets.Widget):
|
|
441
|
+
val.close_all()
|
|
442
|
+
self._make_widgets()
|
|
443
|
+
self._display_widgets()
|
|
444
|
+
self._enable_manager_actions()
|
dsgrid/chronify.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from contextlib import contextmanager
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Generator
|
|
4
|
+
|
|
5
|
+
import chronify
|
|
6
|
+
|
|
7
|
+
import dsgrid
|
|
8
|
+
from dsgrid.common import BackendEngine
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@contextmanager
|
|
12
|
+
def create_store(store_file: Path) -> Generator[chronify.Store, None, None]:
|
|
13
|
+
"""Create a chronify Store based on the dsgrid runtime configuration."""
|
|
14
|
+
config = dsgrid.runtime_config
|
|
15
|
+
if config.backend_engine == BackendEngine.SPARK:
|
|
16
|
+
store = chronify.Store.create_new_hive_store(config.thrift_server_url)
|
|
17
|
+
else:
|
|
18
|
+
store = chronify.Store.create_file_db(store_file)
|
|
19
|
+
try:
|
|
20
|
+
yield store
|
|
21
|
+
finally:
|
|
22
|
+
store.dispose()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@contextmanager
|
|
26
|
+
def create_in_memory_store() -> Generator[chronify.Store, None, None]:
|
|
27
|
+
"""Create an in-memory chronify Store."""
|
|
28
|
+
store = chronify.Store.create_in_memory_db()
|
|
29
|
+
try:
|
|
30
|
+
yield store
|
|
31
|
+
finally:
|
|
32
|
+
store.dispose()
|
dsgrid/cli/__init__.py
ADDED
|
File without changes
|