dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. build_backend.py +93 -0
  2. dsgrid/__init__.py +22 -0
  3. dsgrid/api/__init__.py +0 -0
  4. dsgrid/api/api_manager.py +179 -0
  5. dsgrid/api/app.py +419 -0
  6. dsgrid/api/models.py +60 -0
  7. dsgrid/api/response_models.py +116 -0
  8. dsgrid/apps/__init__.py +0 -0
  9. dsgrid/apps/project_viewer/app.py +216 -0
  10. dsgrid/apps/registration_gui.py +444 -0
  11. dsgrid/chronify.py +32 -0
  12. dsgrid/cli/__init__.py +0 -0
  13. dsgrid/cli/common.py +120 -0
  14. dsgrid/cli/config.py +176 -0
  15. dsgrid/cli/download.py +13 -0
  16. dsgrid/cli/dsgrid.py +157 -0
  17. dsgrid/cli/dsgrid_admin.py +92 -0
  18. dsgrid/cli/install_notebooks.py +62 -0
  19. dsgrid/cli/query.py +729 -0
  20. dsgrid/cli/registry.py +1862 -0
  21. dsgrid/cloud/__init__.py +0 -0
  22. dsgrid/cloud/cloud_storage_interface.py +140 -0
  23. dsgrid/cloud/factory.py +31 -0
  24. dsgrid/cloud/fake_storage_interface.py +37 -0
  25. dsgrid/cloud/s3_storage_interface.py +156 -0
  26. dsgrid/common.py +36 -0
  27. dsgrid/config/__init__.py +0 -0
  28. dsgrid/config/annual_time_dimension_config.py +194 -0
  29. dsgrid/config/common.py +142 -0
  30. dsgrid/config/config_base.py +148 -0
  31. dsgrid/config/dataset_config.py +907 -0
  32. dsgrid/config/dataset_schema_handler_factory.py +46 -0
  33. dsgrid/config/date_time_dimension_config.py +136 -0
  34. dsgrid/config/dimension_config.py +54 -0
  35. dsgrid/config/dimension_config_factory.py +65 -0
  36. dsgrid/config/dimension_mapping_base.py +350 -0
  37. dsgrid/config/dimension_mappings_config.py +48 -0
  38. dsgrid/config/dimensions.py +1025 -0
  39. dsgrid/config/dimensions_config.py +71 -0
  40. dsgrid/config/file_schema.py +190 -0
  41. dsgrid/config/index_time_dimension_config.py +80 -0
  42. dsgrid/config/input_dataset_requirements.py +31 -0
  43. dsgrid/config/mapping_tables.py +209 -0
  44. dsgrid/config/noop_time_dimension_config.py +42 -0
  45. dsgrid/config/project_config.py +1462 -0
  46. dsgrid/config/registration_models.py +188 -0
  47. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  48. dsgrid/config/simple_models.py +49 -0
  49. dsgrid/config/supplemental_dimension.py +29 -0
  50. dsgrid/config/time_dimension_base_config.py +192 -0
  51. dsgrid/data_models.py +155 -0
  52. dsgrid/dataset/__init__.py +0 -0
  53. dsgrid/dataset/dataset.py +123 -0
  54. dsgrid/dataset/dataset_expression_handler.py +86 -0
  55. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  56. dsgrid/dataset/dataset_schema_handler_base.py +945 -0
  57. dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
  58. dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
  59. dsgrid/dataset/growth_rates.py +162 -0
  60. dsgrid/dataset/models.py +51 -0
  61. dsgrid/dataset/table_format_handler_base.py +257 -0
  62. dsgrid/dataset/table_format_handler_factory.py +17 -0
  63. dsgrid/dataset/unpivoted_table.py +121 -0
  64. dsgrid/dimension/__init__.py +0 -0
  65. dsgrid/dimension/base_models.py +230 -0
  66. dsgrid/dimension/dimension_filters.py +308 -0
  67. dsgrid/dimension/standard.py +252 -0
  68. dsgrid/dimension/time.py +352 -0
  69. dsgrid/dimension/time_utils.py +103 -0
  70. dsgrid/dsgrid_rc.py +88 -0
  71. dsgrid/exceptions.py +105 -0
  72. dsgrid/filesystem/__init__.py +0 -0
  73. dsgrid/filesystem/cloud_filesystem.py +32 -0
  74. dsgrid/filesystem/factory.py +32 -0
  75. dsgrid/filesystem/filesystem_interface.py +136 -0
  76. dsgrid/filesystem/local_filesystem.py +74 -0
  77. dsgrid/filesystem/s3_filesystem.py +118 -0
  78. dsgrid/loggers.py +132 -0
  79. dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
  80. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
  81. dsgrid/notebooks/registration.ipynb +48 -0
  82. dsgrid/notebooks/start_notebook.sh +11 -0
  83. dsgrid/project.py +451 -0
  84. dsgrid/query/__init__.py +0 -0
  85. dsgrid/query/dataset_mapping_plan.py +142 -0
  86. dsgrid/query/derived_dataset.py +388 -0
  87. dsgrid/query/models.py +728 -0
  88. dsgrid/query/query_context.py +287 -0
  89. dsgrid/query/query_submitter.py +994 -0
  90. dsgrid/query/report_factory.py +19 -0
  91. dsgrid/query/report_peak_load.py +70 -0
  92. dsgrid/query/reports_base.py +20 -0
  93. dsgrid/registry/__init__.py +0 -0
  94. dsgrid/registry/bulk_register.py +165 -0
  95. dsgrid/registry/common.py +287 -0
  96. dsgrid/registry/config_update_checker_base.py +63 -0
  97. dsgrid/registry/data_store_factory.py +34 -0
  98. dsgrid/registry/data_store_interface.py +74 -0
  99. dsgrid/registry/dataset_config_generator.py +158 -0
  100. dsgrid/registry/dataset_registry_manager.py +950 -0
  101. dsgrid/registry/dataset_update_checker.py +16 -0
  102. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  103. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  104. dsgrid/registry/dimension_registry_manager.py +413 -0
  105. dsgrid/registry/dimension_update_checker.py +16 -0
  106. dsgrid/registry/duckdb_data_store.py +207 -0
  107. dsgrid/registry/filesystem_data_store.py +150 -0
  108. dsgrid/registry/filter_registry_manager.py +123 -0
  109. dsgrid/registry/project_config_generator.py +57 -0
  110. dsgrid/registry/project_registry_manager.py +1623 -0
  111. dsgrid/registry/project_update_checker.py +48 -0
  112. dsgrid/registry/registration_context.py +223 -0
  113. dsgrid/registry/registry_auto_updater.py +316 -0
  114. dsgrid/registry/registry_database.py +667 -0
  115. dsgrid/registry/registry_interface.py +446 -0
  116. dsgrid/registry/registry_manager.py +558 -0
  117. dsgrid/registry/registry_manager_base.py +367 -0
  118. dsgrid/registry/versioning.py +92 -0
  119. dsgrid/rust_ext/__init__.py +14 -0
  120. dsgrid/rust_ext/find_minimal_patterns.py +129 -0
  121. dsgrid/spark/__init__.py +0 -0
  122. dsgrid/spark/functions.py +589 -0
  123. dsgrid/spark/types.py +110 -0
  124. dsgrid/tests/__init__.py +0 -0
  125. dsgrid/tests/common.py +140 -0
  126. dsgrid/tests/make_us_data_registry.py +265 -0
  127. dsgrid/tests/register_derived_datasets.py +103 -0
  128. dsgrid/tests/utils.py +25 -0
  129. dsgrid/time/__init__.py +0 -0
  130. dsgrid/time/time_conversions.py +80 -0
  131. dsgrid/time/types.py +67 -0
  132. dsgrid/units/__init__.py +0 -0
  133. dsgrid/units/constants.py +113 -0
  134. dsgrid/units/convert.py +71 -0
  135. dsgrid/units/energy.py +145 -0
  136. dsgrid/units/power.py +87 -0
  137. dsgrid/utils/__init__.py +0 -0
  138. dsgrid/utils/dataset.py +830 -0
  139. dsgrid/utils/files.py +179 -0
  140. dsgrid/utils/filters.py +125 -0
  141. dsgrid/utils/id_remappings.py +100 -0
  142. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  143. dsgrid/utils/py_expression_eval/README.md +8 -0
  144. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  145. dsgrid/utils/py_expression_eval/tests.py +283 -0
  146. dsgrid/utils/run_command.py +70 -0
  147. dsgrid/utils/scratch_dir_context.py +65 -0
  148. dsgrid/utils/spark.py +918 -0
  149. dsgrid/utils/spark_partition.py +98 -0
  150. dsgrid/utils/timing.py +239 -0
  151. dsgrid/utils/utilities.py +221 -0
  152. dsgrid/utils/versioning.py +36 -0
  153. dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
  154. dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
  155. dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
  156. dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
  157. dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,142 @@
1
+ from typing import Any, Iterable
2
+
3
+ from dsgrid.dimension.base_models import DimensionType
4
+ from dsgrid.dimension.time import MeasurementType, TimeDimensionType, TimeIntervalType
5
+ from dsgrid.dimension.standard import (
6
+ EnergyEfficiency,
7
+ EnergyEndUse,
8
+ EnergyIntensity,
9
+ EnergyIntensityRegression,
10
+ EnergyServiceDemand,
11
+ EnergyServiceDemandRegression,
12
+ FractionalIndex,
13
+ PeggedIndex,
14
+ Population,
15
+ Stock,
16
+ StockShare,
17
+ StockRegression,
18
+ WeatherVariable,
19
+ )
20
+ from dsgrid.exceptions import DSGInvalidParameter
21
+
22
+
23
+ SUPPORTED_METRIC_TYPES = {
24
+ x.__name__
25
+ for x in (
26
+ EnergyEfficiency,
27
+ EnergyEndUse,
28
+ EnergyIntensity,
29
+ EnergyIntensityRegression,
30
+ EnergyServiceDemand,
31
+ EnergyServiceDemandRegression,
32
+ FractionalIndex,
33
+ PeggedIndex,
34
+ Population,
35
+ Stock,
36
+ StockShare,
37
+ StockRegression,
38
+ WeatherVariable,
39
+ )
40
+ }
41
+
42
+ DIMENSION_CLASS_MAP = {
43
+ DimensionType.GEOGRAPHY: "Geography",
44
+ DimensionType.MODEL_YEAR: "ModelYear",
45
+ DimensionType.SCENARIO: "Scenario",
46
+ DimensionType.SECTOR: "Sector",
47
+ DimensionType.SUBSECTOR: "Subsector",
48
+ DimensionType.TIME: "Time",
49
+ DimensionType.WEATHER_YEAR: "WeatherYear",
50
+ }
51
+
52
+
53
+ def make_base_dimension_template(
54
+ metric_types: Iterable[str],
55
+ exclude_dimension_types: set[DimensionType] | None = None,
56
+ time_type: TimeDimensionType | None = None,
57
+ ) -> list[dict[str, Any]]:
58
+ exclude: set[DimensionType] = exclude_dimension_types or set()
59
+ exclude.update({DimensionType.METRIC, DimensionType.TIME})
60
+
61
+ dimensions: list[dict[str, Any]] = []
62
+ for metric_type in metric_types:
63
+ if metric_type not in SUPPORTED_METRIC_TYPES:
64
+ msg = f"{metric_type=} is not one of the {SUPPORTED_METRIC_TYPES=}"
65
+ raise DSGInvalidParameter(msg)
66
+ dim = {
67
+ "type": DimensionType.METRIC.value,
68
+ "class": metric_type,
69
+ "name": DimensionType.METRIC.value,
70
+ "description": DimensionType.METRIC.value,
71
+ "file": f"dimensions/{metric_type}.csv",
72
+ "module": "dsgrid.dimension.standard",
73
+ }
74
+ dimensions.append(dim)
75
+
76
+ dimensions += [
77
+ {
78
+ "type": x.value,
79
+ "class": DIMENSION_CLASS_MAP[x],
80
+ "name": x.value,
81
+ "description": x.value,
82
+ "file": f"dimensions/{x.value}.csv",
83
+ "module": "dsgrid.dimension.standard",
84
+ }
85
+ for x in DimensionType
86
+ if x not in exclude
87
+ ]
88
+ if time_type is not None:
89
+ time_dim = make_base_time_dimension_template(time_type)
90
+ dimensions.append(time_dim)
91
+
92
+ return dimensions
93
+
94
+
95
+ def make_base_time_dimension_template(time_type: TimeDimensionType) -> dict[str, Any]:
96
+ time_dim = {
97
+ "type": DimensionType.TIME.value,
98
+ "time_type": time_type.value,
99
+ "time_interval_type": TimeIntervalType.PERIOD_BEGINNING.value,
100
+ "name": time_type.value,
101
+ "description": time_type.value,
102
+ "module": "dsgrid.dimension.standard",
103
+ }
104
+ match time_type:
105
+ case TimeDimensionType.DATETIME:
106
+ time_dim["class"] = "Time"
107
+ time_dim["time_zone_format"] = {
108
+ "format_type": "aligned_in_absolute_time",
109
+ "time_zone": "Etc/GMT+5",
110
+ }
111
+ time_dim["measurement_type"] = MeasurementType.TOTAL.value
112
+ time_dim["ranges"] = [
113
+ {
114
+ "start": "2018-01-01 00:00:00",
115
+ "end": "2018-12-31 23:00:00",
116
+ "frequency": "P0DT1H",
117
+ "str_format": "%Y-%m-%d %H:%M:%S",
118
+ },
119
+ ]
120
+ case TimeDimensionType.ANNUAL:
121
+ time_dim["class"] = "AnnualTime"
122
+ time_dim["include_leap_day"] = True
123
+ time_dim["ranges"] = [
124
+ {
125
+ "start": "2010",
126
+ "end": "2024",
127
+ "frequency": 1,
128
+ "str_format": "%Y",
129
+ },
130
+ ]
131
+ case TimeDimensionType.INDEX:
132
+ time_dim["class"] = "IndexTime"
133
+ time_dim["ranges"] = [
134
+ {
135
+ "start": 0,
136
+ "end": 8759,
137
+ "frequency": "P0DT1H",
138
+ "starting_timestamp": "2018-01-01 00:00:00",
139
+ "str_format": "%Y-%m-%d %H:%M:%S",
140
+ },
141
+ ]
142
+ return time_dim
@@ -0,0 +1,148 @@
1
+ import abc
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Type
5
+
6
+ import json5
7
+
8
+ from dsgrid.exceptions import DSGInvalidOperation
9
+ from dsgrid.spark.types import (
10
+ DataFrame,
11
+ )
12
+ from dsgrid.utils.spark import models_to_dataframe
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class ConfigBase(abc.ABC):
19
+ """Base class for all config classes"""
20
+
21
+ def __init__(self, model):
22
+ self._model = model
23
+
24
+ @classmethod
25
+ def load(cls, config_file, *args, **kwargs):
26
+ """Load the config from a file.
27
+
28
+ Parameters
29
+ ---------
30
+ config_file : str
31
+
32
+ Returns
33
+ -------
34
+ ConfigBase
35
+
36
+ """
37
+ # Subclasses can reimplement this method if they need more arguments.
38
+ return cls._load(config_file, *args, **kwargs)
39
+
40
+ @classmethod
41
+ def load_from_model(cls, model):
42
+ """Load the config from a model.
43
+
44
+ Parameters
45
+ ---------
46
+ model : DSGBaseModel
47
+
48
+ Returns
49
+ -------
50
+ ConfigBase
51
+
52
+ """
53
+ return cls(model)
54
+
55
+ @classmethod
56
+ def _load(cls, config_file):
57
+ model = cls.model_class().load(config_file)
58
+ return cls(model)
59
+
60
+ @staticmethod
61
+ @abc.abstractmethod
62
+ def config_filename() -> str:
63
+ """Return the config filename.
64
+
65
+ Returns
66
+ -------
67
+ str
68
+
69
+ """
70
+
71
+ @property
72
+ @abc.abstractmethod
73
+ def config_id(self) -> str:
74
+ """Return the configuration ID.
75
+
76
+ Returns
77
+ -------
78
+ str
79
+
80
+ """
81
+
82
+ @property
83
+ def model(self):
84
+ """Return the data model for the config.
85
+
86
+ Returns
87
+ -------
88
+ DSGBaseModel
89
+
90
+ """
91
+ return self._model
92
+
93
+ @staticmethod
94
+ @abc.abstractmethod
95
+ def model_class() -> Type:
96
+ """Return the data model class backing the config"""
97
+
98
+ def serialize(self, path, force=False):
99
+ """Serialize the configuration to a path.
100
+
101
+ path : str
102
+ Directory
103
+ force : bool
104
+ If True, overwrite files.
105
+
106
+ """
107
+ filename = Path(path) / self.config_filename()
108
+ if filename.exists() and not force:
109
+ msg = f"{filename} exists. Set force=True to overwrite."
110
+ raise DSGInvalidOperation(msg)
111
+ filename.write_text(self.model.model_dump_json(indent=2))
112
+ return filename
113
+
114
+
115
+ class ConfigWithRecordFileBase(ConfigBase, abc.ABC):
116
+ """Intermediate-level base class to provide serialization of record files."""
117
+
118
+ def __init__(self, *args, **kwargs):
119
+ super().__init__(*args, **kwargs)
120
+
121
+ def get_records_dataframe(self) -> DataFrame:
122
+ """Return the records in a spark dataframe. Cached on first call."""
123
+ # id provides uniqueness and the config_id could help inspect what's in cache in case we
124
+ # ever need that.
125
+ # Spark doesn't allow dashes in the table name.
126
+ table_name = f"{self.config_id}__{id(self)}".replace("-", "_")
127
+ df = models_to_dataframe(self.model.records, table_name=table_name)
128
+ logger.debug("Loaded %s records dataframe", self.config_id)
129
+ return df
130
+
131
+ @classmethod
132
+ def load(cls, config_file):
133
+ config = super().load(config_file)
134
+ return config
135
+
136
+ def serialize(self, path, force=False):
137
+ dst_config_file = path / self.config_filename()
138
+ records_file = path / "records.csv"
139
+ for filename in (dst_config_file, records_file):
140
+ if filename.exists() and not force:
141
+ msg = f"{filename} exists. Set force=True to overwrite."
142
+ raise DSGInvalidOperation(msg)
143
+
144
+ self.get_records_dataframe().toPandas().to_csv(records_file, index=False)
145
+ model_data = self.model.serialize()
146
+ model_data["file"] = records_file.name
147
+ dst_config_file.write_text(json5.dumps(model_data, indent=2))
148
+ return dst_config_file