dsgrid-toolkit 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. dsgrid_toolkit-0.3.0/LICENSE +29 -0
  2. dsgrid_toolkit-0.3.0/PKG-INFO +193 -0
  3. dsgrid_toolkit-0.3.0/README.md +123 -0
  4. dsgrid_toolkit-0.3.0/dsgrid/__init__.py +22 -0
  5. dsgrid_toolkit-0.3.0/dsgrid/api/__init__.py +0 -0
  6. dsgrid_toolkit-0.3.0/dsgrid/api/api_manager.py +179 -0
  7. dsgrid_toolkit-0.3.0/dsgrid/api/app.py +419 -0
  8. dsgrid_toolkit-0.3.0/dsgrid/api/models.py +60 -0
  9. dsgrid_toolkit-0.3.0/dsgrid/api/response_models.py +116 -0
  10. dsgrid_toolkit-0.3.0/dsgrid/apps/__init__.py +0 -0
  11. dsgrid_toolkit-0.3.0/dsgrid/apps/project_viewer/app.py +216 -0
  12. dsgrid_toolkit-0.3.0/dsgrid/apps/registration_gui.py +444 -0
  13. dsgrid_toolkit-0.3.0/dsgrid/chronify.py +32 -0
  14. dsgrid_toolkit-0.3.0/dsgrid/cli/__init__.py +0 -0
  15. dsgrid_toolkit-0.3.0/dsgrid/cli/common.py +120 -0
  16. dsgrid_toolkit-0.3.0/dsgrid/cli/config.py +176 -0
  17. dsgrid_toolkit-0.3.0/dsgrid/cli/download.py +13 -0
  18. dsgrid_toolkit-0.3.0/dsgrid/cli/dsgrid.py +157 -0
  19. dsgrid_toolkit-0.3.0/dsgrid/cli/dsgrid_admin.py +92 -0
  20. dsgrid_toolkit-0.3.0/dsgrid/cli/install_notebooks.py +62 -0
  21. dsgrid_toolkit-0.3.0/dsgrid/cli/query.py +729 -0
  22. dsgrid_toolkit-0.3.0/dsgrid/cli/registry.py +1862 -0
  23. dsgrid_toolkit-0.3.0/dsgrid/cloud/__init__.py +0 -0
  24. dsgrid_toolkit-0.3.0/dsgrid/cloud/cloud_storage_interface.py +140 -0
  25. dsgrid_toolkit-0.3.0/dsgrid/cloud/factory.py +31 -0
  26. dsgrid_toolkit-0.3.0/dsgrid/cloud/fake_storage_interface.py +37 -0
  27. dsgrid_toolkit-0.3.0/dsgrid/cloud/s3_storage_interface.py +156 -0
  28. dsgrid_toolkit-0.3.0/dsgrid/common.py +36 -0
  29. dsgrid_toolkit-0.3.0/dsgrid/config/__init__.py +0 -0
  30. dsgrid_toolkit-0.3.0/dsgrid/config/annual_time_dimension_config.py +194 -0
  31. dsgrid_toolkit-0.3.0/dsgrid/config/common.py +142 -0
  32. dsgrid_toolkit-0.3.0/dsgrid/config/config_base.py +148 -0
  33. dsgrid_toolkit-0.3.0/dsgrid/config/dataset_config.py +907 -0
  34. dsgrid_toolkit-0.3.0/dsgrid/config/dataset_schema_handler_factory.py +46 -0
  35. dsgrid_toolkit-0.3.0/dsgrid/config/date_time_dimension_config.py +136 -0
  36. dsgrid_toolkit-0.3.0/dsgrid/config/dimension_config.py +54 -0
  37. dsgrid_toolkit-0.3.0/dsgrid/config/dimension_config_factory.py +65 -0
  38. dsgrid_toolkit-0.3.0/dsgrid/config/dimension_mapping_base.py +350 -0
  39. dsgrid_toolkit-0.3.0/dsgrid/config/dimension_mappings_config.py +48 -0
  40. dsgrid_toolkit-0.3.0/dsgrid/config/dimensions.py +1025 -0
  41. dsgrid_toolkit-0.3.0/dsgrid/config/dimensions_config.py +71 -0
  42. dsgrid_toolkit-0.3.0/dsgrid/config/file_schema.py +190 -0
  43. dsgrid_toolkit-0.3.0/dsgrid/config/index_time_dimension_config.py +80 -0
  44. dsgrid_toolkit-0.3.0/dsgrid/config/input_dataset_requirements.py +31 -0
  45. dsgrid_toolkit-0.3.0/dsgrid/config/mapping_tables.py +209 -0
  46. dsgrid_toolkit-0.3.0/dsgrid/config/noop_time_dimension_config.py +42 -0
  47. dsgrid_toolkit-0.3.0/dsgrid/config/project_config.py +1462 -0
  48. dsgrid_toolkit-0.3.0/dsgrid/config/registration_models.py +188 -0
  49. dsgrid_toolkit-0.3.0/dsgrid/config/representative_period_time_dimension_config.py +194 -0
  50. dsgrid_toolkit-0.3.0/dsgrid/config/simple_models.py +49 -0
  51. dsgrid_toolkit-0.3.0/dsgrid/config/supplemental_dimension.py +29 -0
  52. dsgrid_toolkit-0.3.0/dsgrid/config/time_dimension_base_config.py +192 -0
  53. dsgrid_toolkit-0.3.0/dsgrid/data_models.py +155 -0
  54. dsgrid_toolkit-0.3.0/dsgrid/dataset/__init__.py +0 -0
  55. dsgrid_toolkit-0.3.0/dsgrid/dataset/dataset.py +123 -0
  56. dsgrid_toolkit-0.3.0/dsgrid/dataset/dataset_expression_handler.py +86 -0
  57. dsgrid_toolkit-0.3.0/dsgrid/dataset/dataset_mapping_manager.py +121 -0
  58. dsgrid_toolkit-0.3.0/dsgrid/dataset/dataset_schema_handler_base.py +945 -0
  59. dsgrid_toolkit-0.3.0/dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
  60. dsgrid_toolkit-0.3.0/dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
  61. dsgrid_toolkit-0.3.0/dsgrid/dataset/growth_rates.py +162 -0
  62. dsgrid_toolkit-0.3.0/dsgrid/dataset/models.py +51 -0
  63. dsgrid_toolkit-0.3.0/dsgrid/dataset/table_format_handler_base.py +257 -0
  64. dsgrid_toolkit-0.3.0/dsgrid/dataset/table_format_handler_factory.py +17 -0
  65. dsgrid_toolkit-0.3.0/dsgrid/dataset/unpivoted_table.py +121 -0
  66. dsgrid_toolkit-0.3.0/dsgrid/dimension/__init__.py +0 -0
  67. dsgrid_toolkit-0.3.0/dsgrid/dimension/base_models.py +230 -0
  68. dsgrid_toolkit-0.3.0/dsgrid/dimension/dimension_filters.py +308 -0
  69. dsgrid_toolkit-0.3.0/dsgrid/dimension/standard.py +252 -0
  70. dsgrid_toolkit-0.3.0/dsgrid/dimension/time.py +352 -0
  71. dsgrid_toolkit-0.3.0/dsgrid/dimension/time_utils.py +103 -0
  72. dsgrid_toolkit-0.3.0/dsgrid/dsgrid_rc.py +88 -0
  73. dsgrid_toolkit-0.3.0/dsgrid/exceptions.py +105 -0
  74. dsgrid_toolkit-0.3.0/dsgrid/filesystem/__init__.py +0 -0
  75. dsgrid_toolkit-0.3.0/dsgrid/filesystem/cloud_filesystem.py +32 -0
  76. dsgrid_toolkit-0.3.0/dsgrid/filesystem/factory.py +32 -0
  77. dsgrid_toolkit-0.3.0/dsgrid/filesystem/filesystem_interface.py +136 -0
  78. dsgrid_toolkit-0.3.0/dsgrid/filesystem/local_filesystem.py +74 -0
  79. dsgrid_toolkit-0.3.0/dsgrid/filesystem/s3_filesystem.py +118 -0
  80. dsgrid_toolkit-0.3.0/dsgrid/loggers.py +132 -0
  81. dsgrid_toolkit-0.3.0/dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
  82. dsgrid_toolkit-0.3.0/dsgrid/notebooks/registration.ipynb +48 -0
  83. dsgrid_toolkit-0.3.0/dsgrid/notebooks/start_notebook.sh +11 -0
  84. dsgrid_toolkit-0.3.0/dsgrid/project.py +451 -0
  85. dsgrid_toolkit-0.3.0/dsgrid/query/__init__.py +0 -0
  86. dsgrid_toolkit-0.3.0/dsgrid/query/dataset_mapping_plan.py +142 -0
  87. dsgrid_toolkit-0.3.0/dsgrid/query/derived_dataset.py +388 -0
  88. dsgrid_toolkit-0.3.0/dsgrid/query/models.py +728 -0
  89. dsgrid_toolkit-0.3.0/dsgrid/query/query_context.py +287 -0
  90. dsgrid_toolkit-0.3.0/dsgrid/query/query_submitter.py +994 -0
  91. dsgrid_toolkit-0.3.0/dsgrid/query/report_factory.py +19 -0
  92. dsgrid_toolkit-0.3.0/dsgrid/query/report_peak_load.py +70 -0
  93. dsgrid_toolkit-0.3.0/dsgrid/query/reports_base.py +20 -0
  94. dsgrid_toolkit-0.3.0/dsgrid/registry/__init__.py +0 -0
  95. dsgrid_toolkit-0.3.0/dsgrid/registry/bulk_register.py +165 -0
  96. dsgrid_toolkit-0.3.0/dsgrid/registry/common.py +287 -0
  97. dsgrid_toolkit-0.3.0/dsgrid/registry/config_update_checker_base.py +63 -0
  98. dsgrid_toolkit-0.3.0/dsgrid/registry/data_store_factory.py +34 -0
  99. dsgrid_toolkit-0.3.0/dsgrid/registry/data_store_interface.py +74 -0
  100. dsgrid_toolkit-0.3.0/dsgrid/registry/dataset_config_generator.py +158 -0
  101. dsgrid_toolkit-0.3.0/dsgrid/registry/dataset_registry_manager.py +950 -0
  102. dsgrid_toolkit-0.3.0/dsgrid/registry/dataset_update_checker.py +16 -0
  103. dsgrid_toolkit-0.3.0/dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  104. dsgrid_toolkit-0.3.0/dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  105. dsgrid_toolkit-0.3.0/dsgrid/registry/dimension_registry_manager.py +413 -0
  106. dsgrid_toolkit-0.3.0/dsgrid/registry/dimension_update_checker.py +16 -0
  107. dsgrid_toolkit-0.3.0/dsgrid/registry/duckdb_data_store.py +207 -0
  108. dsgrid_toolkit-0.3.0/dsgrid/registry/filesystem_data_store.py +150 -0
  109. dsgrid_toolkit-0.3.0/dsgrid/registry/filter_registry_manager.py +123 -0
  110. dsgrid_toolkit-0.3.0/dsgrid/registry/project_config_generator.py +57 -0
  111. dsgrid_toolkit-0.3.0/dsgrid/registry/project_registry_manager.py +1623 -0
  112. dsgrid_toolkit-0.3.0/dsgrid/registry/project_update_checker.py +48 -0
  113. dsgrid_toolkit-0.3.0/dsgrid/registry/registration_context.py +223 -0
  114. dsgrid_toolkit-0.3.0/dsgrid/registry/registry_auto_updater.py +316 -0
  115. dsgrid_toolkit-0.3.0/dsgrid/registry/registry_database.py +667 -0
  116. dsgrid_toolkit-0.3.0/dsgrid/registry/registry_interface.py +446 -0
  117. dsgrid_toolkit-0.3.0/dsgrid/registry/registry_manager.py +558 -0
  118. dsgrid_toolkit-0.3.0/dsgrid/registry/registry_manager_base.py +367 -0
  119. dsgrid_toolkit-0.3.0/dsgrid/registry/versioning.py +92 -0
  120. dsgrid_toolkit-0.3.0/dsgrid/rust_ext/__init__.py +14 -0
  121. dsgrid_toolkit-0.3.0/dsgrid/rust_ext/find_minimal_patterns.py +129 -0
  122. dsgrid_toolkit-0.3.0/dsgrid/spark/__init__.py +0 -0
  123. dsgrid_toolkit-0.3.0/dsgrid/spark/functions.py +589 -0
  124. dsgrid_toolkit-0.3.0/dsgrid/spark/types.py +110 -0
  125. dsgrid_toolkit-0.3.0/dsgrid/tests/__init__.py +0 -0
  126. dsgrid_toolkit-0.3.0/dsgrid/tests/common.py +140 -0
  127. dsgrid_toolkit-0.3.0/dsgrid/tests/make_us_data_registry.py +265 -0
  128. dsgrid_toolkit-0.3.0/dsgrid/tests/register_derived_datasets.py +103 -0
  129. dsgrid_toolkit-0.3.0/dsgrid/tests/utils.py +25 -0
  130. dsgrid_toolkit-0.3.0/dsgrid/time/__init__.py +0 -0
  131. dsgrid_toolkit-0.3.0/dsgrid/time/time_conversions.py +80 -0
  132. dsgrid_toolkit-0.3.0/dsgrid/time/types.py +67 -0
  133. dsgrid_toolkit-0.3.0/dsgrid/units/__init__.py +0 -0
  134. dsgrid_toolkit-0.3.0/dsgrid/units/constants.py +113 -0
  135. dsgrid_toolkit-0.3.0/dsgrid/units/convert.py +71 -0
  136. dsgrid_toolkit-0.3.0/dsgrid/units/energy.py +145 -0
  137. dsgrid_toolkit-0.3.0/dsgrid/units/power.py +87 -0
  138. dsgrid_toolkit-0.3.0/dsgrid/utils/__init__.py +0 -0
  139. dsgrid_toolkit-0.3.0/dsgrid/utils/dataset.py +830 -0
  140. dsgrid_toolkit-0.3.0/dsgrid/utils/files.py +179 -0
  141. dsgrid_toolkit-0.3.0/dsgrid/utils/filters.py +125 -0
  142. dsgrid_toolkit-0.3.0/dsgrid/utils/id_remappings.py +100 -0
  143. dsgrid_toolkit-0.3.0/dsgrid/utils/py_expression_eval/LICENSE +19 -0
  144. dsgrid_toolkit-0.3.0/dsgrid/utils/py_expression_eval/README.md +8 -0
  145. dsgrid_toolkit-0.3.0/dsgrid/utils/py_expression_eval/__init__.py +847 -0
  146. dsgrid_toolkit-0.3.0/dsgrid/utils/py_expression_eval/tests.py +283 -0
  147. dsgrid_toolkit-0.3.0/dsgrid/utils/run_command.py +70 -0
  148. dsgrid_toolkit-0.3.0/dsgrid/utils/scratch_dir_context.py +65 -0
  149. dsgrid_toolkit-0.3.0/dsgrid/utils/spark.py +918 -0
  150. dsgrid_toolkit-0.3.0/dsgrid/utils/spark_partition.py +98 -0
  151. dsgrid_toolkit-0.3.0/dsgrid/utils/timing.py +239 -0
  152. dsgrid_toolkit-0.3.0/dsgrid/utils/utilities.py +221 -0
  153. dsgrid_toolkit-0.3.0/dsgrid/utils/versioning.py +36 -0
  154. dsgrid_toolkit-0.3.0/pyproject.toml +190 -0
  155. dsgrid_toolkit-0.3.0/rust/Cargo.lock +970 -0
  156. dsgrid_toolkit-0.3.0/rust/Cargo.toml +18 -0
  157. dsgrid_toolkit-0.3.0/rust/README.md +228 -0
  158. dsgrid_toolkit-0.3.0/rust/src/lib.rs +638 -0
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2021, Alliance for Sustainable Energy, LLC
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: dsgrid-toolkit
3
+ Version: 0.3.0
4
+ Classifier: Development Status :: 3 - Alpha
5
+ Classifier: Intended Audience :: Science/Research
6
+ Classifier: License :: OSI Approved :: BSD License
7
+ Classifier: Natural Language :: English
8
+ Classifier: Programming Language :: Python :: 3.11
9
+ Requires-Dist: chronify~=0.6.0
10
+ Requires-Dist: click>=8.2,<9
11
+ Requires-Dist: dash
12
+ Requires-Dist: dash-bootstrap-components
13
+ Requires-Dist: duckdb>=1,<2
14
+ Requires-Dist: fastapi
15
+ Requires-Dist: json5
16
+ Requires-Dist: networkx
17
+ Requires-Dist: pandas
18
+ Requires-Dist: prettytable
19
+ Requires-Dist: pyarrow
20
+ Requires-Dist: pydantic~=2.11.6
21
+ Requires-Dist: requests
22
+ Requires-Dist: rich-click
23
+ Requires-Dist: semver
24
+ Requires-Dist: sqlalchemy>=2,<3
25
+ Requires-Dist: uvicorn
26
+ Requires-Dist: tzdata
27
+ Requires-Dist: httpx ; extra == 'dev'
28
+ Requires-Dist: pytest ; extra == 'dev'
29
+ Requires-Dist: pytest-cov ; extra == 'dev'
30
+ Requires-Dist: pre-commit ; extra == 'dev'
31
+ Requires-Dist: devtools ; extra == 'dev'
32
+ Requires-Dist: flake8 ; extra == 'dev'
33
+ Requires-Dist: mypy ; extra == 'dev'
34
+ Requires-Dist: pyarrow ; extra == 'dev'
35
+ Requires-Dist: maturin ; extra == 'dev'
36
+ Requires-Dist: furo ; extra == 'doc'
37
+ Requires-Dist: ghp-import ; extra == 'doc'
38
+ Requires-Dist: numpydoc ; extra == 'doc'
39
+ Requires-Dist: pandas-stubs ; extra == 'doc'
40
+ Requires-Dist: ruff ; extra == 'doc'
41
+ Requires-Dist: sphinx~=7.2 ; extra == 'doc'
42
+ Requires-Dist: sphinx-click~=5.0 ; extra == 'doc'
43
+ Requires-Dist: sphinx-copybutton~=0.5.2 ; extra == 'doc'
44
+ Requires-Dist: sphinx-tabs~=3.4 ; extra == 'doc'
45
+ Requires-Dist: sphinx-argparse~=0.4.0 ; extra == 'doc'
46
+ Requires-Dist: sphinxcontrib-programoutput ; extra == 'doc'
47
+ Requires-Dist: autodoc-pydantic[erdantic]~=2.0 ; extra == 'doc'
48
+ Requires-Dist: twine ; extra == 'release'
49
+ Requires-Dist: setuptools ; extra == 'release'
50
+ Requires-Dist: wheel ; extra == 'release'
51
+ Requires-Dist: chronify[spark] ; extra == 'spark'
52
+ Requires-Dist: pyspark==4.0.0 ; extra == 'spark'
53
+ Requires-Dist: thrift ; extra == 'spark'
54
+ Requires-Dist: thrift-sasl ; extra == 'spark'
55
+ Provides-Extra: dev
56
+ Provides-Extra: doc
57
+ Provides-Extra: release
58
+ Provides-Extra: spark
59
+ License-File: LICENSE
60
+ Summary: Python API for accessing demand-side grid model (dsgrid) datasets
61
+ Keywords: dsgrid
62
+ Author-email: Elaine Hale <elaine.hale@nrel.gov>, Lixi Liu <lixi.liu@nrel.gov>, Meghan Mooney <meghan.mooney@nrel.gov>, Daniel Thom <daniel.thom@nrel.gov>
63
+ Maintainer-email: Elaine Hale <elaine.hale@nrel.gov>
64
+ Requires-Python: >=3.11
65
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
66
+ Project-URL: Documentation, https://dsgrid.github.io/dsgrid/
67
+ Project-URL: GitHub, https://github.com/dsgrid/dsgrid
68
+ Project-URL: Homepage, https://www.nrel.gov/analysis/dsgrid
69
+
70
+ # dsgrid
71
+ [![Documentation](https://img.shields.io/badge/docs-ready-blue.svg)](https://dsgrid.github.io/dsgrid)
72
+ [![codecov](https://codecov.io/gh/dsgrid/dsgrid/branch/main/graph/badge.svg?token=W0441C9XAL)](https://codecov.io/gh/dsgrid/dsgrid)
73
+
74
+ Python API for contributing to and accessing demand-side grid model (dsgrid) projects and datasets.
75
+
76
+ ⚠️ **dsgrid is under active development and does not yet have a formal package release.** Details listed here are subject to change. Please reach out to the dsgrid coordination team with any questions or other feedback. ⚠️
77
+
78
+ [Install](#install) | [Usage](#usage) | [Uninstall](#uninstall)
79
+
80
+ ## Install
81
+
82
+ [Virtual environment](#virtual-environment) | [Dependencies](#dependencies) | [from PIPY/pip](#from-pipypip) | [from pip+git](#from-pipgit) | [from cloned repository](#from-cloned-repository)
83
+
84
+ ### Virtual environment
85
+
86
+ Create a virtual environment in which to install dsgrid. Anaconda or miniconda is recommended.
87
+
88
+ ```
89
+ conda create -n dsgrid python=3.11
90
+ conda activate dsgrid
91
+ ```
92
+
93
+ ### Dependencies
94
+
95
+ dsgrid uses [Apache Spark](#https://spark.apache.org/) to manage big data. There are no separate installation steps for Apache Spark beyond installing the dsgrid package and installing:
96
+
97
+ ```
98
+ pip install "dsgrid-toolkit[spark]"
99
+ ```
100
+
101
+ Otherwise installing the pyspark Python dependency handles it.
102
+
103
+ However, you should be aware that Apache Spark's Microsoft Windows support is poor and essentially limited to local mode. That is, if you use dsgrid on a Windows machine you should not attempt to install a full version of Spark nor expect to run on a Spark cluster. As such, we recommend limiting dsgrid use on Windows to browsing the registry, registering and submitting small- to medium-sized datasets, or development work with small test projects. Full dsgrid functionality with large projects requires additional computational resources, e.g., high performance or cloud computing, typically on a Linux operating system.
104
+
105
+ #### Additional Notes
106
+ - If pyspark complains about not finding Python, you may need to locate your python executable file (python.exe on Windows), copy it, and rename the copy to python3 (python3.exe on Windows)
107
+
108
+ Spark requires Java 8 or later with the `JAVA_HOME` environment variable set to the Java installation directory.
109
+
110
+ On Linux you can install OpenJDK with conda:
111
+ ```
112
+ conda install openjdk
113
+ ```
114
+
115
+ Windows install instructions are below.
116
+
117
+ #### Windows
118
+
119
+ To install Apache Spark on Windows, follow [these instructions](https://towardsdatascience.com/installing-apache-pyspark-on-windows-10-f5f0c506bea1).
120
+
121
+ ### From PIPY/pip
122
+
123
+ pip install dsgrid-toolkit
124
+
125
+ or
126
+
127
+ pip install "dsgrid-toolkit[spark]"
128
+
129
+ ### From pip+git
130
+
131
+ **With ssh keys:**
132
+ ```
133
+ pip install git+ssh://git@github.com/dsgrid/dsgrid.git@main
134
+
135
+ # or
136
+
137
+ pip install git+ssh://git@github.com/dsgrid/dsgrid.git@develop
138
+ ```
139
+
140
+ **From http:**
141
+ ```
142
+ pip install git+https://github.com/dsgrid/dsgrid.git@main
143
+
144
+ # or
145
+
146
+ pip install git+https://github.com/dsgrid/dsgrid.git@develop
147
+ ```
148
+
149
+ ### From Cloned Repository
150
+
151
+ First, clone the repository and change into the `dsgrid` directory. For example:
152
+
153
+ ```
154
+ cd ~ # or other directory where you put repositories
155
+ git clone git@github.com:dsgrid/dsgrid.git # or the http address
156
+ cd dsgrid
157
+ ```
158
+
159
+ Then install the pacakge using the pip `-e` flag to directly use the files in the
160
+ cloned repository.
161
+
162
+ **Users:**
163
+ ```
164
+ pip install -e .
165
+ ```
166
+
167
+ **Developers:**
168
+ ```
169
+ pip install -e '.[dev,spark]'
170
+ ```
171
+
172
+ ## Usage
173
+
174
+ dsgrid is primarily a command-line interface (CLI) tool. To see the available commands:
175
+ ```
176
+ dsgrid --help
177
+ ```
178
+
179
+ ## Uninstall
180
+
181
+ ```
182
+ pip uninstall dsgrid
183
+ ```
184
+
185
+ If you are using a conda environment
186
+ ```
187
+ conda deactivate
188
+ ```
189
+
190
+ ## Software Record
191
+
192
+ dsgrid is developed under NREL Software Record SWR-21-52, "demand-side grid model".
193
+
@@ -0,0 +1,123 @@
1
+ # dsgrid
2
+ [![Documentation](https://img.shields.io/badge/docs-ready-blue.svg)](https://dsgrid.github.io/dsgrid)
3
+ [![codecov](https://codecov.io/gh/dsgrid/dsgrid/branch/main/graph/badge.svg?token=W0441C9XAL)](https://codecov.io/gh/dsgrid/dsgrid)
4
+
5
+ Python API for contributing to and accessing demand-side grid model (dsgrid) projects and datasets.
6
+
7
+ ⚠️ **dsgrid is under active development and does not yet have a formal package release.** Details listed here are subject to change. Please reach out to the dsgrid coordination team with any questions or other feedback. ⚠️
8
+
9
+ [Install](#install) | [Usage](#usage) | [Uninstall](#uninstall)
10
+
11
+ ## Install
12
+
13
+ [Virtual environment](#virtual-environment) | [Dependencies](#dependencies) | [from PIPY/pip](#from-pipypip) | [from pip+git](#from-pipgit) | [from cloned repository](#from-cloned-repository)
14
+
15
+ ### Virtual environment
16
+
17
+ Create a virtual environment in which to install dsgrid. Anaconda or miniconda is recommended.
18
+
19
+ ```
20
+ conda create -n dsgrid python=3.11
21
+ conda activate dsgrid
22
+ ```
23
+
24
+ ### Dependencies
25
+
26
+ dsgrid uses [Apache Spark](#https://spark.apache.org/) to manage big data. There are no separate installation steps for Apache Spark beyond installing the dsgrid package and installing:
27
+
28
+ ```
29
+ pip install "dsgrid-toolkit[spark]"
30
+ ```
31
+
32
+ Otherwise installing the pyspark Python dependency handles it.
33
+
34
+ However, you should be aware that Apache Spark's Microsoft Windows support is poor and essentially limited to local mode. That is, if you use dsgrid on a Windows machine you should not attempt to install a full version of Spark nor expect to run on a Spark cluster. As such, we recommend limiting dsgrid use on Windows to browsing the registry, registering and submitting small- to medium-sized datasets, or development work with small test projects. Full dsgrid functionality with large projects requires additional computational resources, e.g., high performance or cloud computing, typically on a Linux operating system.
35
+
36
+ #### Additional Notes
37
+ - If pyspark complains about not finding Python, you may need to locate your python executable file (python.exe on Windows), copy it, and rename the copy to python3 (python3.exe on Windows)
38
+
39
+ Spark requires Java 8 or later with the `JAVA_HOME` environment variable set to the Java installation directory.
40
+
41
+ On Linux you can install OpenJDK with conda:
42
+ ```
43
+ conda install openjdk
44
+ ```
45
+
46
+ Windows install instructions are below.
47
+
48
+ #### Windows
49
+
50
+ To install Apache Spark on Windows, follow [these instructions](https://towardsdatascience.com/installing-apache-pyspark-on-windows-10-f5f0c506bea1).
51
+
52
+ ### From PIPY/pip
53
+
54
+ pip install dsgrid-toolkit
55
+
56
+ or
57
+
58
+ pip install "dsgrid-toolkit[spark]"
59
+
60
+ ### From pip+git
61
+
62
+ **With ssh keys:**
63
+ ```
64
+ pip install git+ssh://git@github.com/dsgrid/dsgrid.git@main
65
+
66
+ # or
67
+
68
+ pip install git+ssh://git@github.com/dsgrid/dsgrid.git@develop
69
+ ```
70
+
71
+ **From http:**
72
+ ```
73
+ pip install git+https://github.com/dsgrid/dsgrid.git@main
74
+
75
+ # or
76
+
77
+ pip install git+https://github.com/dsgrid/dsgrid.git@develop
78
+ ```
79
+
80
+ ### From Cloned Repository
81
+
82
+ First, clone the repository and change into the `dsgrid` directory. For example:
83
+
84
+ ```
85
+ cd ~ # or other directory where you put repositories
86
+ git clone git@github.com:dsgrid/dsgrid.git # or the http address
87
+ cd dsgrid
88
+ ```
89
+
90
+ Then install the pacakge using the pip `-e` flag to directly use the files in the
91
+ cloned repository.
92
+
93
+ **Users:**
94
+ ```
95
+ pip install -e .
96
+ ```
97
+
98
+ **Developers:**
99
+ ```
100
+ pip install -e '.[dev,spark]'
101
+ ```
102
+
103
+ ## Usage
104
+
105
+ dsgrid is primarily a command-line interface (CLI) tool. To see the available commands:
106
+ ```
107
+ dsgrid --help
108
+ ```
109
+
110
+ ## Uninstall
111
+
112
+ ```
113
+ pip uninstall dsgrid
114
+ ```
115
+
116
+ If you are using a conda environment
117
+ ```
118
+ conda deactivate
119
+ ```
120
+
121
+ ## Software Record
122
+
123
+ dsgrid is developed under NREL Software Record SWR-21-52, "demand-side grid model".
@@ -0,0 +1,22 @@
1
+ import datetime as dt
2
+ import warnings
3
+
4
+ from dsgrid.dsgrid_rc import DsgridRuntimeConfig
5
+ from dsgrid.utils.timing import timer_stats_collector # noqa: F401
6
+
7
+ __title__ = "dsgrid"
8
+ __description__ = (
9
+ "Python API for registring and accessing demand-side grid model (dsgrid) datasets"
10
+ )
11
+ __url__ = "https://github.com/dsgrid/dsgrid"
12
+ __version__ = "0.3.0"
13
+ __author__ = "NREL"
14
+ __maintainer_email__ = "elaine.hale@nrel.gov"
15
+ __license__ = "BSD-3"
16
+ __copyright__ = "Copyright {}, The Alliance for Sustainable Energy, LLC".format(
17
+ dt.date.today().year
18
+ )
19
+
20
+ warnings.filterwarnings("ignore", module="duckdb_engine")
21
+
22
+ runtime_config = DsgridRuntimeConfig.load()
File without changes
@@ -0,0 +1,179 @@
1
+ import logging
2
+ import threading
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ from dsgrid.exceptions import DSGValueNotStored
7
+ from dsgrid.registry.registry_manager import RegistryManager
8
+ from dsgrid.utils.files import load_data
9
+ from .models import StoreModel, AsyncTaskModel, AsyncTaskStatus, AsyncTaskType
10
+
11
+
12
+ MAX_CONCURRENT_ASYNC_TASKS = 4
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class ApiManager:
18
+ """Manages API requests"""
19
+
20
+ def __init__(
21
+ self,
22
+ home_dir: str | Path,
23
+ registry_manager: RegistryManager,
24
+ max_concurrent_async_tasks=MAX_CONCURRENT_ASYNC_TASKS,
25
+ ):
26
+ self._home_dir = Path(home_dir)
27
+ self._store = Store.load(self._home_dir)
28
+ self._lock = threading.RLock()
29
+ self._max_concurrent_async_tasks = max_concurrent_async_tasks
30
+ self._cached_projects = {}
31
+ self._registry_mgr = registry_manager
32
+
33
+ def can_start_new_async_task(self):
34
+ self._lock.acquire()
35
+ try:
36
+ return len(self._store.data.outstanding_async_tasks) < self._max_concurrent_async_tasks
37
+ finally:
38
+ self._lock.release()
39
+
40
+ def initialize_async_task(self, task_type: AsyncTaskType) -> int:
41
+ self._lock.acquire()
42
+ try:
43
+ num_outstanding = len(self._store.data.outstanding_async_tasks)
44
+ # TODO: implement queueing so that we don't return an error
45
+ if num_outstanding > self._max_concurrent_async_tasks:
46
+ msg = f"Too many async tasks are already running: {num_outstanding}"
47
+ raise Exception(msg)
48
+ async_task_id = self._get_next_async_task_id()
49
+ task = AsyncTaskModel(
50
+ async_task_id=async_task_id,
51
+ task_type=task_type,
52
+ status=AsyncTaskStatus.IN_PROGRESS,
53
+ start_time=datetime.now(),
54
+ )
55
+ self._store.data.async_tasks[async_task_id] = task
56
+ self._store.data.outstanding_async_tasks.add(async_task_id)
57
+ self._store.persist()
58
+ finally:
59
+ self._lock.release()
60
+
61
+ logger.info("Initialized async_task_id=%s", async_task_id)
62
+ return async_task_id
63
+
64
+ def clear_completed_async_tasks(self):
65
+ self._lock.acquire()
66
+ try:
67
+ to_remove = [
68
+ x.async_task_id
69
+ for x in self._store.data.async_tasks
70
+ if x.status == AsyncTaskStatus.COMPLETE
71
+ ]
72
+ for async_task_id in to_remove:
73
+ self._store.data.async_tasks.pop(async_task_id)
74
+ self._store.persist()
75
+ logger.info("Cleared %d completed tasks", len(to_remove))
76
+ finally:
77
+ self._lock.release()
78
+
79
+ def get_async_task_status(self, async_task_id):
80
+ """Return the status of the async ID."""
81
+ self._lock.acquire()
82
+ try:
83
+ return self._store.data.async_tasks[async_task_id]
84
+ finally:
85
+ self._lock.release()
86
+
87
+ def complete_async_task(self, async_task_id, return_code: int, result=None):
88
+ """Complete an asynchronous operation."""
89
+ self._lock.acquire()
90
+ try:
91
+ task = self._store.data.async_tasks[async_task_id]
92
+ task.status = AsyncTaskStatus.COMPLETE
93
+ task.return_code = return_code
94
+ task.completion_time = datetime.now()
95
+ self._store.data.outstanding_async_tasks.remove(async_task_id)
96
+ if result is not None:
97
+ task.result = result
98
+ self._store.persist()
99
+ finally:
100
+ self._lock.release()
101
+
102
+ logger.info("Completed async_task_id=%s", async_task_id)
103
+
104
+ def list_async_tasks(self, async_task_ids=None, status=None) -> list[AsyncTaskModel]:
105
+ """Return async tasks.
106
+
107
+ Parameters
108
+ ----------
109
+ async_task_ids : list | None
110
+ IDs of tasks for which to return status. If not set, return all statuses.
111
+ status : AsyncTaskStatus | None
112
+ If set, filter tasks by this status.
113
+
114
+ """
115
+ self._lock.acquire()
116
+ try:
117
+ if async_task_ids is not None:
118
+ diff = set(async_task_ids).difference(self._store.data.async_tasks.keys())
119
+ if diff:
120
+ msg = f"async_task_ids={diff} are not stored"
121
+ raise DSGValueNotStored(msg)
122
+ tasks = (
123
+ self._store.data.async_tasks.keys() if async_task_ids is None else async_task_ids
124
+ )
125
+ return [
126
+ self._store.data.async_tasks[x]
127
+ for x in tasks
128
+ if status is None or self._store.data.async_tasks[x].status == status
129
+ ]
130
+ finally:
131
+ self._lock.release()
132
+
133
+ def _get_next_async_task_id(self) -> int:
134
+ self._lock.acquire()
135
+ try:
136
+ next_id = self._store.data.next_async_task_id
137
+ self._store.data.next_async_task_id += 1
138
+ self._store.persist()
139
+ finally:
140
+ self._lock.release()
141
+
142
+ return next_id
143
+
144
+ def get_project(self, project_id):
145
+ """Load a Project and cache it for future calls.
146
+ Loading is slow and the Project isn't being changed by this API.
147
+ """
148
+ self._lock.acquire()
149
+ try:
150
+ project = self._cached_projects.get(project_id)
151
+ if project is not None:
152
+ return project
153
+ project = self._registry_mgr.project_manager.load_project(project_id)
154
+ self._cached_projects[project_id] = project
155
+ return project
156
+ finally:
157
+ self._lock.release()
158
+
159
+
160
+ class Store:
161
+ STORE_FILENAME = "api_server_store.json"
162
+
163
+ def __init__(self, store_file: Path, data: StoreModel):
164
+ self._store_file = store_file
165
+ self.data = data
166
+
167
+ @classmethod
168
+ def load(cls, path: Path):
169
+ # TODO: use MongoDB or some other db
170
+ store_file = path / cls.STORE_FILENAME
171
+ if store_file.exists():
172
+ logger.info("Load from existing store: %s", store_file)
173
+ store_data = load_data(store_file)
174
+ return cls(store_file, StoreModel(**store_data))
175
+ logger.info("Create new store: %s", store_file)
176
+ return cls(store_file, StoreModel())
177
+
178
+ def persist(self):
179
+ self._store_file.write_text(self.data.model_dump_json(indent=2))