dsgrid-toolkit 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dsgrid-toolkit might be problematic. Click here for more details.

Files changed (152) hide show
  1. dsgrid/__init__.py +22 -0
  2. dsgrid/api/__init__.py +0 -0
  3. dsgrid/api/api_manager.py +179 -0
  4. dsgrid/api/app.py +420 -0
  5. dsgrid/api/models.py +60 -0
  6. dsgrid/api/response_models.py +116 -0
  7. dsgrid/apps/__init__.py +0 -0
  8. dsgrid/apps/project_viewer/app.py +216 -0
  9. dsgrid/apps/registration_gui.py +444 -0
  10. dsgrid/chronify.py +22 -0
  11. dsgrid/cli/__init__.py +0 -0
  12. dsgrid/cli/common.py +120 -0
  13. dsgrid/cli/config.py +177 -0
  14. dsgrid/cli/download.py +13 -0
  15. dsgrid/cli/dsgrid.py +142 -0
  16. dsgrid/cli/dsgrid_admin.py +349 -0
  17. dsgrid/cli/install_notebooks.py +62 -0
  18. dsgrid/cli/query.py +711 -0
  19. dsgrid/cli/registry.py +1773 -0
  20. dsgrid/cloud/__init__.py +0 -0
  21. dsgrid/cloud/cloud_storage_interface.py +140 -0
  22. dsgrid/cloud/factory.py +31 -0
  23. dsgrid/cloud/fake_storage_interface.py +37 -0
  24. dsgrid/cloud/s3_storage_interface.py +156 -0
  25. dsgrid/common.py +35 -0
  26. dsgrid/config/__init__.py +0 -0
  27. dsgrid/config/annual_time_dimension_config.py +187 -0
  28. dsgrid/config/common.py +131 -0
  29. dsgrid/config/config_base.py +148 -0
  30. dsgrid/config/dataset_config.py +684 -0
  31. dsgrid/config/dataset_schema_handler_factory.py +41 -0
  32. dsgrid/config/date_time_dimension_config.py +108 -0
  33. dsgrid/config/dimension_config.py +54 -0
  34. dsgrid/config/dimension_config_factory.py +65 -0
  35. dsgrid/config/dimension_mapping_base.py +349 -0
  36. dsgrid/config/dimension_mappings_config.py +48 -0
  37. dsgrid/config/dimensions.py +775 -0
  38. dsgrid/config/dimensions_config.py +71 -0
  39. dsgrid/config/index_time_dimension_config.py +76 -0
  40. dsgrid/config/input_dataset_requirements.py +31 -0
  41. dsgrid/config/mapping_tables.py +209 -0
  42. dsgrid/config/noop_time_dimension_config.py +42 -0
  43. dsgrid/config/project_config.py +1457 -0
  44. dsgrid/config/registration_models.py +199 -0
  45. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  46. dsgrid/config/simple_models.py +49 -0
  47. dsgrid/config/supplemental_dimension.py +29 -0
  48. dsgrid/config/time_dimension_base_config.py +200 -0
  49. dsgrid/data_models.py +155 -0
  50. dsgrid/dataset/__init__.py +0 -0
  51. dsgrid/dataset/dataset.py +123 -0
  52. dsgrid/dataset/dataset_expression_handler.py +86 -0
  53. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  54. dsgrid/dataset/dataset_schema_handler_base.py +899 -0
  55. dsgrid/dataset/dataset_schema_handler_one_table.py +196 -0
  56. dsgrid/dataset/dataset_schema_handler_standard.py +303 -0
  57. dsgrid/dataset/growth_rates.py +162 -0
  58. dsgrid/dataset/models.py +44 -0
  59. dsgrid/dataset/table_format_handler_base.py +257 -0
  60. dsgrid/dataset/table_format_handler_factory.py +17 -0
  61. dsgrid/dataset/unpivoted_table.py +121 -0
  62. dsgrid/dimension/__init__.py +0 -0
  63. dsgrid/dimension/base_models.py +218 -0
  64. dsgrid/dimension/dimension_filters.py +308 -0
  65. dsgrid/dimension/standard.py +213 -0
  66. dsgrid/dimension/time.py +531 -0
  67. dsgrid/dimension/time_utils.py +88 -0
  68. dsgrid/dsgrid_rc.py +88 -0
  69. dsgrid/exceptions.py +105 -0
  70. dsgrid/filesystem/__init__.py +0 -0
  71. dsgrid/filesystem/cloud_filesystem.py +32 -0
  72. dsgrid/filesystem/factory.py +32 -0
  73. dsgrid/filesystem/filesystem_interface.py +136 -0
  74. dsgrid/filesystem/local_filesystem.py +74 -0
  75. dsgrid/filesystem/s3_filesystem.py +118 -0
  76. dsgrid/loggers.py +132 -0
  77. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +950 -0
  78. dsgrid/notebooks/registration.ipynb +48 -0
  79. dsgrid/notebooks/start_notebook.sh +11 -0
  80. dsgrid/project.py +451 -0
  81. dsgrid/query/__init__.py +0 -0
  82. dsgrid/query/dataset_mapping_plan.py +142 -0
  83. dsgrid/query/derived_dataset.py +384 -0
  84. dsgrid/query/models.py +726 -0
  85. dsgrid/query/query_context.py +287 -0
  86. dsgrid/query/query_submitter.py +847 -0
  87. dsgrid/query/report_factory.py +19 -0
  88. dsgrid/query/report_peak_load.py +70 -0
  89. dsgrid/query/reports_base.py +20 -0
  90. dsgrid/registry/__init__.py +0 -0
  91. dsgrid/registry/bulk_register.py +161 -0
  92. dsgrid/registry/common.py +287 -0
  93. dsgrid/registry/config_update_checker_base.py +63 -0
  94. dsgrid/registry/data_store_factory.py +34 -0
  95. dsgrid/registry/data_store_interface.py +69 -0
  96. dsgrid/registry/dataset_config_generator.py +156 -0
  97. dsgrid/registry/dataset_registry_manager.py +734 -0
  98. dsgrid/registry/dataset_update_checker.py +16 -0
  99. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  100. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  101. dsgrid/registry/dimension_registry_manager.py +413 -0
  102. dsgrid/registry/dimension_update_checker.py +16 -0
  103. dsgrid/registry/duckdb_data_store.py +185 -0
  104. dsgrid/registry/filesystem_data_store.py +141 -0
  105. dsgrid/registry/filter_registry_manager.py +123 -0
  106. dsgrid/registry/project_config_generator.py +57 -0
  107. dsgrid/registry/project_registry_manager.py +1616 -0
  108. dsgrid/registry/project_update_checker.py +48 -0
  109. dsgrid/registry/registration_context.py +223 -0
  110. dsgrid/registry/registry_auto_updater.py +316 -0
  111. dsgrid/registry/registry_database.py +662 -0
  112. dsgrid/registry/registry_interface.py +446 -0
  113. dsgrid/registry/registry_manager.py +544 -0
  114. dsgrid/registry/registry_manager_base.py +367 -0
  115. dsgrid/registry/versioning.py +92 -0
  116. dsgrid/spark/__init__.py +0 -0
  117. dsgrid/spark/functions.py +545 -0
  118. dsgrid/spark/types.py +50 -0
  119. dsgrid/tests/__init__.py +0 -0
  120. dsgrid/tests/common.py +139 -0
  121. dsgrid/tests/make_us_data_registry.py +204 -0
  122. dsgrid/tests/register_derived_datasets.py +103 -0
  123. dsgrid/tests/utils.py +25 -0
  124. dsgrid/time/__init__.py +0 -0
  125. dsgrid/time/time_conversions.py +80 -0
  126. dsgrid/time/types.py +67 -0
  127. dsgrid/units/__init__.py +0 -0
  128. dsgrid/units/constants.py +113 -0
  129. dsgrid/units/convert.py +71 -0
  130. dsgrid/units/energy.py +145 -0
  131. dsgrid/units/power.py +87 -0
  132. dsgrid/utils/__init__.py +0 -0
  133. dsgrid/utils/dataset.py +612 -0
  134. dsgrid/utils/files.py +179 -0
  135. dsgrid/utils/filters.py +125 -0
  136. dsgrid/utils/id_remappings.py +100 -0
  137. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  138. dsgrid/utils/py_expression_eval/README.md +8 -0
  139. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  140. dsgrid/utils/py_expression_eval/tests.py +283 -0
  141. dsgrid/utils/run_command.py +70 -0
  142. dsgrid/utils/scratch_dir_context.py +64 -0
  143. dsgrid/utils/spark.py +918 -0
  144. dsgrid/utils/spark_partition.py +98 -0
  145. dsgrid/utils/timing.py +239 -0
  146. dsgrid/utils/utilities.py +184 -0
  147. dsgrid/utils/versioning.py +36 -0
  148. dsgrid_toolkit-0.2.0.dist-info/METADATA +216 -0
  149. dsgrid_toolkit-0.2.0.dist-info/RECORD +152 -0
  150. dsgrid_toolkit-0.2.0.dist-info/WHEEL +4 -0
  151. dsgrid_toolkit-0.2.0.dist-info/entry_points.txt +4 -0
  152. dsgrid_toolkit-0.2.0.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,116 @@
1
+ from dsgrid.data_models import DSGBaseModel
2
+ from dsgrid.config.dataset_config import DatasetConfigModel
3
+ from dsgrid.config.dimensions import DimensionCommonModel, ProjectDimensionModel
4
+ from dsgrid.config.project_config import ProjectConfigModel, ProjectDimensionNamesModel
5
+ from dsgrid.dataset.models import TableFormatType
6
+ from dsgrid.dimension.base_models import DimensionType
7
+ from dsgrid.query.models import ReportType
8
+ from .models import AsyncTaskModel
9
+
10
+
11
+ class ListProjectsResponse(DSGBaseModel):
12
+ """Defines the reponse to the list_projects command."""
13
+
14
+ projects: list[ProjectConfigModel]
15
+
16
+
17
+ class GetProjectResponse(DSGBaseModel):
18
+ """Defines the reponse to the get_project command."""
19
+
20
+ project: ProjectConfigModel
21
+
22
+
23
+ class ListDatasetsResponse(DSGBaseModel):
24
+ """Defines the reponse to the list_datasets command."""
25
+
26
+ datasets: list[DatasetConfigModel]
27
+
28
+
29
+ class GetDatasetResponse(DSGBaseModel):
30
+ """Defines the reponse to the get_dataset command."""
31
+
32
+ dataset: DatasetConfigModel
33
+
34
+
35
+ class ListProjectDimensionsResponse(DSGBaseModel):
36
+ """Defines the reponse to the list_project_dimensions command."""
37
+
38
+ project_id: str
39
+ dimensions: list[ProjectDimensionModel]
40
+
41
+
42
+ class GetProjectDimensionNamesResponse(DSGBaseModel):
43
+ """Defines the reponse to the get_project_dimension_names command."""
44
+
45
+ project_id: str
46
+ dimension_names: ProjectDimensionNamesModel
47
+
48
+
49
+ class GetProjectBaseDimensionNameResponse(DSGBaseModel):
50
+ """Defines the reponse to the get_project_dimension_name command."""
51
+
52
+ project_id: str
53
+ dimension_type: DimensionType
54
+ dimension_name: str
55
+
56
+
57
+ class ListProjectSupplementalDimensionNames(DSGBaseModel):
58
+ """Defines the response to the list_project_supplemental_dimension_names command"""
59
+
60
+ project_id: str
61
+ dimension_type: DimensionType
62
+ dimension_names: list[str]
63
+
64
+
65
+ class ListDimensionTypesResponse(DSGBaseModel):
66
+ """Defines the response to the list_dimension_types command."""
67
+
68
+ types: list[DimensionType]
69
+
70
+
71
+ class ListDimensionsResponse(DSGBaseModel):
72
+ """Defines the response to the list_dimensions command."""
73
+
74
+ dimensions: list[DimensionCommonModel]
75
+
76
+
77
+ class GetDimensionResponse(DSGBaseModel):
78
+ """Defines the response to the get_dimension command."""
79
+
80
+ dimension: DimensionCommonModel
81
+
82
+
83
+ class ListDimensionRecordsResponse(DSGBaseModel):
84
+ """Defines the response to the list_dimension_records command."""
85
+
86
+ records: list[dict]
87
+
88
+
89
+ class ListReportTypesResponse(DSGBaseModel):
90
+ """Defines the response to the list_report_types command."""
91
+
92
+ types: list[ReportType]
93
+
94
+
95
+ class ListTableFormatTypesResponse(DSGBaseModel):
96
+ """Defines the response to the list_table_format_types command."""
97
+
98
+ types: list[TableFormatType]
99
+
100
+
101
+ class SparkSubmitProjectQueryResponse(DSGBaseModel):
102
+ """Defines the response to the submit_project_query command."""
103
+
104
+ async_task_id: int
105
+
106
+
107
+ class ListAsyncTasksResponse(DSGBaseModel):
108
+ """Defines the response to the list_async_tasks command."""
109
+
110
+ async_tasks: list[AsyncTaskModel]
111
+
112
+
113
+ class GetAsyncTaskResponse(DSGBaseModel):
114
+ """Defines the response to the list_async_tasks command."""
115
+
116
+ async_task: AsyncTaskModel
File without changes
@@ -0,0 +1,216 @@
1
+ import pandas as pd
2
+ import requests
3
+ from dash import Dash, dash_table, dcc, html, Input, Output, State
4
+ import dash_bootstrap_components as dbc
5
+
6
+ from dash.exceptions import PreventUpdate
7
+
8
+ from dsgrid.api.response_models import ListProjectsResponse
9
+
10
+
11
+ DSGRID_API_URL = "http://127.0.0.1:8000"
12
+
13
+ # Copied DataTable styles from https://gist.github.com/marcogoldin/8fc4c3945cef17ca38d55c4e17ebbbe6
14
+ STYLE_TABLE = {
15
+ "fontFamily": '-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,'
16
+ '"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color '
17
+ 'Emoji"'
18
+ }
19
+ STYLE_HEADER = {"backgroundColor": "white", "fontWeight": "bold", "padding": "0.75rem"}
20
+ STYLE_CELL = {
21
+ "fontFamily": STYLE_TABLE["fontFamily"],
22
+ "fontWeight": "400",
23
+ "lineHeight": "1.5",
24
+ "color": "#212529",
25
+ "textAlign": "left",
26
+ "whiteSpace": "normal",
27
+ "height": "auto",
28
+ "padding": "0.75rem",
29
+ "border": "1px solid #dee2e6",
30
+ "verticalAlign": "top",
31
+ }
32
+ STYLE_DATA_CONDITIONAL = [{"if": {"row_index": "odd"}, "backgroundColor": "#f8f9fa"}]
33
+
34
+ app = Dash(__name__, external_stylesheets=[dbc.themes.SPACELAB])
35
+ app.layout = dbc.Container(
36
+ [
37
+ dbc.Alert("dsgrid Project Viewer", color="info"),
38
+ html.Div(
39
+ [
40
+ "URL: ",
41
+ dcc.Input(
42
+ id="url_text",
43
+ value=DSGRID_API_URL,
44
+ ),
45
+ html.Label("Status:", style={"margin-left": "15px", "margin-right": "15px"}),
46
+ dcc.Input(
47
+ id="status_text",
48
+ value="disconnected",
49
+ ),
50
+ dbc.Button(
51
+ "Connect",
52
+ outline=True,
53
+ color="primary",
54
+ id="connect_button",
55
+ n_clicks=0,
56
+ style={"margin-left": "15px"},
57
+ ),
58
+ ]
59
+ ),
60
+ html.Br(),
61
+ html.Div(
62
+ [
63
+ "Select a project",
64
+ dcc.Dropdown([], "", id="project_dd"),
65
+ ],
66
+ ),
67
+ html.Br(),
68
+ dbc.Button(
69
+ "List dimensions",
70
+ outline=True,
71
+ color="primary",
72
+ id="list_dimensions_button",
73
+ n_clicks=0,
74
+ ),
75
+ html.Div(
76
+ [
77
+ dash_table.DataTable([], [], id="dimensions_table"),
78
+ html.Div(id="dimensions_table_container"),
79
+ ],
80
+ ),
81
+ html.Br(),
82
+ html.H5("Dimension Records"),
83
+ dcc.Input(
84
+ id="dimension_name",
85
+ value="",
86
+ readOnly=True,
87
+ ),
88
+ html.Div(
89
+ [
90
+ dash_table.DataTable([], [], id="dimension_records_table"),
91
+ html.Div(id="dimension_records_table_container"),
92
+ ]
93
+ ),
94
+ ],
95
+ className="m-5",
96
+ # fluid=True,
97
+ # Per the docs, this should be set to True. However, that causes a display issue where the
98
+ # right part of the GUI is hidden and so you have to scroll horizontally to see it.
99
+ # When passing the allowed parameters, like lg, xl, xxl, the width is too small.
100
+ # Passing a non-supported string makes it work. This will almost certainly break in the future.
101
+ fluid="invalidparameter",
102
+ )
103
+
104
+
105
+ @app.callback(
106
+ Output("project_dd", "options"),
107
+ Output("status_text", "value"),
108
+ Input("connect_button", "n_clicks"),
109
+ State("url_text", "value"),
110
+ )
111
+ def on_connect(n_clicks, url):
112
+ if n_clicks is None:
113
+ raise PreventUpdate
114
+ return list_project_ids(url), "connected"
115
+
116
+
117
+ @app.callback(
118
+ Output("project_dd", "value"),
119
+ Input("project_dd", "options"),
120
+ )
121
+ def on_project_options_change(options):
122
+ if options:
123
+ return options[0]
124
+ return ""
125
+
126
+
127
+ @app.callback(
128
+ Output("dimensions_table_container", "children"),
129
+ Input("list_dimensions_button", "n_clicks"),
130
+ State("project_dd", "value"),
131
+ State("url_text", "value"),
132
+ )
133
+ def on_list_dimensions(n_clicks, project_id, url):
134
+ if n_clicks is None or project_id == "":
135
+ raise PreventUpdate
136
+ table = list_project_dimensions(project_id, url)
137
+ return dash_table.DataTable(
138
+ table,
139
+ [{"name": x, "id": x} for x in table[0].keys()],
140
+ id="dimensions_table",
141
+ editable=False,
142
+ filter_action="native",
143
+ sort_action="native",
144
+ row_selectable="single",
145
+ selected_rows=[],
146
+ style_table=STYLE_TABLE,
147
+ style_header=STYLE_HEADER,
148
+ style_cell=STYLE_CELL,
149
+ style_data_conditional=STYLE_DATA_CONDITIONAL,
150
+ )
151
+
152
+
153
+ @app.callback(
154
+ Output("dimension_records_table_container", "children"),
155
+ Output("dimension_name", "value"),
156
+ Input("dimensions_table", "derived_viewport_selected_rows"),
157
+ Input("dimensions_table", "derived_viewport_data"),
158
+ State("url_text", "value"),
159
+ )
160
+ def on_list_dimension_records(row_indexes, row_data, url):
161
+ if not row_indexes:
162
+ raise PreventUpdate
163
+
164
+ row_index = row_indexes[0]
165
+ records = list_dimension_records(row_data[row_index]["dimension_id"], url)
166
+ if not records:
167
+ raise PreventUpdate
168
+
169
+ df = pd.DataFrame.from_records(records)
170
+ columns = []
171
+ for column in records[0].keys():
172
+ num_unique = df[column].nunique()
173
+ new_name = f"{column} ({num_unique} unique)"
174
+ columns.append({"name": new_name, "id": column})
175
+
176
+ return (
177
+ dash_table.DataTable(
178
+ records,
179
+ columns,
180
+ id="dimension_records_table",
181
+ editable=False,
182
+ filter_action="native",
183
+ sort_action="native",
184
+ style_table=STYLE_TABLE,
185
+ style_header=STYLE_HEADER,
186
+ style_cell=STYLE_CELL,
187
+ style_data_conditional=STYLE_DATA_CONDITIONAL,
188
+ ),
189
+ row_data[row_index]["name"],
190
+ )
191
+
192
+
193
+ def list_project_ids(url):
194
+ response = ListProjectsResponse(**check_request("projects", url))
195
+ return [x.project_id for x in response.projects]
196
+
197
+
198
+ def list_project_dimensions(project_id, url):
199
+ return check_request(f"projects/{project_id}/dimensions", url)["dimensions"]
200
+
201
+
202
+ def list_dimension_records(dimension_id, url):
203
+ return check_request(f"dimensions/records/{dimension_id}", url)["records"]
204
+
205
+
206
+ def check_request(endpoint, url):
207
+ target = f"{url}/{endpoint}"
208
+ response = requests.get(target)
209
+ if response.status_code != 200:
210
+ msg = f"request to {target} failed: {response.status_code}"
211
+ raise Exception(msg)
212
+ return response.json()
213
+
214
+
215
+ if __name__ == "__main__":
216
+ app.run(debug=True)