dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. build_backend.py +93 -0
  2. dsgrid/__init__.py +22 -0
  3. dsgrid/api/__init__.py +0 -0
  4. dsgrid/api/api_manager.py +179 -0
  5. dsgrid/api/app.py +419 -0
  6. dsgrid/api/models.py +60 -0
  7. dsgrid/api/response_models.py +116 -0
  8. dsgrid/apps/__init__.py +0 -0
  9. dsgrid/apps/project_viewer/app.py +216 -0
  10. dsgrid/apps/registration_gui.py +444 -0
  11. dsgrid/chronify.py +32 -0
  12. dsgrid/cli/__init__.py +0 -0
  13. dsgrid/cli/common.py +120 -0
  14. dsgrid/cli/config.py +176 -0
  15. dsgrid/cli/download.py +13 -0
  16. dsgrid/cli/dsgrid.py +157 -0
  17. dsgrid/cli/dsgrid_admin.py +92 -0
  18. dsgrid/cli/install_notebooks.py +62 -0
  19. dsgrid/cli/query.py +729 -0
  20. dsgrid/cli/registry.py +1862 -0
  21. dsgrid/cloud/__init__.py +0 -0
  22. dsgrid/cloud/cloud_storage_interface.py +140 -0
  23. dsgrid/cloud/factory.py +31 -0
  24. dsgrid/cloud/fake_storage_interface.py +37 -0
  25. dsgrid/cloud/s3_storage_interface.py +156 -0
  26. dsgrid/common.py +36 -0
  27. dsgrid/config/__init__.py +0 -0
  28. dsgrid/config/annual_time_dimension_config.py +194 -0
  29. dsgrid/config/common.py +142 -0
  30. dsgrid/config/config_base.py +148 -0
  31. dsgrid/config/dataset_config.py +907 -0
  32. dsgrid/config/dataset_schema_handler_factory.py +46 -0
  33. dsgrid/config/date_time_dimension_config.py +136 -0
  34. dsgrid/config/dimension_config.py +54 -0
  35. dsgrid/config/dimension_config_factory.py +65 -0
  36. dsgrid/config/dimension_mapping_base.py +350 -0
  37. dsgrid/config/dimension_mappings_config.py +48 -0
  38. dsgrid/config/dimensions.py +1025 -0
  39. dsgrid/config/dimensions_config.py +71 -0
  40. dsgrid/config/file_schema.py +190 -0
  41. dsgrid/config/index_time_dimension_config.py +80 -0
  42. dsgrid/config/input_dataset_requirements.py +31 -0
  43. dsgrid/config/mapping_tables.py +209 -0
  44. dsgrid/config/noop_time_dimension_config.py +42 -0
  45. dsgrid/config/project_config.py +1462 -0
  46. dsgrid/config/registration_models.py +188 -0
  47. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  48. dsgrid/config/simple_models.py +49 -0
  49. dsgrid/config/supplemental_dimension.py +29 -0
  50. dsgrid/config/time_dimension_base_config.py +192 -0
  51. dsgrid/data_models.py +155 -0
  52. dsgrid/dataset/__init__.py +0 -0
  53. dsgrid/dataset/dataset.py +123 -0
  54. dsgrid/dataset/dataset_expression_handler.py +86 -0
  55. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  56. dsgrid/dataset/dataset_schema_handler_base.py +945 -0
  57. dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
  58. dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
  59. dsgrid/dataset/growth_rates.py +162 -0
  60. dsgrid/dataset/models.py +51 -0
  61. dsgrid/dataset/table_format_handler_base.py +257 -0
  62. dsgrid/dataset/table_format_handler_factory.py +17 -0
  63. dsgrid/dataset/unpivoted_table.py +121 -0
  64. dsgrid/dimension/__init__.py +0 -0
  65. dsgrid/dimension/base_models.py +230 -0
  66. dsgrid/dimension/dimension_filters.py +308 -0
  67. dsgrid/dimension/standard.py +252 -0
  68. dsgrid/dimension/time.py +352 -0
  69. dsgrid/dimension/time_utils.py +103 -0
  70. dsgrid/dsgrid_rc.py +88 -0
  71. dsgrid/exceptions.py +105 -0
  72. dsgrid/filesystem/__init__.py +0 -0
  73. dsgrid/filesystem/cloud_filesystem.py +32 -0
  74. dsgrid/filesystem/factory.py +32 -0
  75. dsgrid/filesystem/filesystem_interface.py +136 -0
  76. dsgrid/filesystem/local_filesystem.py +74 -0
  77. dsgrid/filesystem/s3_filesystem.py +118 -0
  78. dsgrid/loggers.py +132 -0
  79. dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
  80. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
  81. dsgrid/notebooks/registration.ipynb +48 -0
  82. dsgrid/notebooks/start_notebook.sh +11 -0
  83. dsgrid/project.py +451 -0
  84. dsgrid/query/__init__.py +0 -0
  85. dsgrid/query/dataset_mapping_plan.py +142 -0
  86. dsgrid/query/derived_dataset.py +388 -0
  87. dsgrid/query/models.py +728 -0
  88. dsgrid/query/query_context.py +287 -0
  89. dsgrid/query/query_submitter.py +994 -0
  90. dsgrid/query/report_factory.py +19 -0
  91. dsgrid/query/report_peak_load.py +70 -0
  92. dsgrid/query/reports_base.py +20 -0
  93. dsgrid/registry/__init__.py +0 -0
  94. dsgrid/registry/bulk_register.py +165 -0
  95. dsgrid/registry/common.py +287 -0
  96. dsgrid/registry/config_update_checker_base.py +63 -0
  97. dsgrid/registry/data_store_factory.py +34 -0
  98. dsgrid/registry/data_store_interface.py +74 -0
  99. dsgrid/registry/dataset_config_generator.py +158 -0
  100. dsgrid/registry/dataset_registry_manager.py +950 -0
  101. dsgrid/registry/dataset_update_checker.py +16 -0
  102. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  103. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  104. dsgrid/registry/dimension_registry_manager.py +413 -0
  105. dsgrid/registry/dimension_update_checker.py +16 -0
  106. dsgrid/registry/duckdb_data_store.py +207 -0
  107. dsgrid/registry/filesystem_data_store.py +150 -0
  108. dsgrid/registry/filter_registry_manager.py +123 -0
  109. dsgrid/registry/project_config_generator.py +57 -0
  110. dsgrid/registry/project_registry_manager.py +1623 -0
  111. dsgrid/registry/project_update_checker.py +48 -0
  112. dsgrid/registry/registration_context.py +223 -0
  113. dsgrid/registry/registry_auto_updater.py +316 -0
  114. dsgrid/registry/registry_database.py +667 -0
  115. dsgrid/registry/registry_interface.py +446 -0
  116. dsgrid/registry/registry_manager.py +558 -0
  117. dsgrid/registry/registry_manager_base.py +367 -0
  118. dsgrid/registry/versioning.py +92 -0
  119. dsgrid/rust_ext/__init__.py +14 -0
  120. dsgrid/rust_ext/find_minimal_patterns.py +129 -0
  121. dsgrid/spark/__init__.py +0 -0
  122. dsgrid/spark/functions.py +589 -0
  123. dsgrid/spark/types.py +110 -0
  124. dsgrid/tests/__init__.py +0 -0
  125. dsgrid/tests/common.py +140 -0
  126. dsgrid/tests/make_us_data_registry.py +265 -0
  127. dsgrid/tests/register_derived_datasets.py +103 -0
  128. dsgrid/tests/utils.py +25 -0
  129. dsgrid/time/__init__.py +0 -0
  130. dsgrid/time/time_conversions.py +80 -0
  131. dsgrid/time/types.py +67 -0
  132. dsgrid/units/__init__.py +0 -0
  133. dsgrid/units/constants.py +113 -0
  134. dsgrid/units/convert.py +71 -0
  135. dsgrid/units/energy.py +145 -0
  136. dsgrid/units/power.py +87 -0
  137. dsgrid/utils/__init__.py +0 -0
  138. dsgrid/utils/dataset.py +830 -0
  139. dsgrid/utils/files.py +179 -0
  140. dsgrid/utils/filters.py +125 -0
  141. dsgrid/utils/id_remappings.py +100 -0
  142. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  143. dsgrid/utils/py_expression_eval/README.md +8 -0
  144. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  145. dsgrid/utils/py_expression_eval/tests.py +283 -0
  146. dsgrid/utils/run_command.py +70 -0
  147. dsgrid/utils/scratch_dir_context.py +65 -0
  148. dsgrid/utils/spark.py +918 -0
  149. dsgrid/utils/spark_partition.py +98 -0
  150. dsgrid/utils/timing.py +239 -0
  151. dsgrid/utils/utilities.py +221 -0
  152. dsgrid/utils/versioning.py +36 -0
  153. dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
  154. dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
  155. dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
  156. dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
  157. dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,98 @@
1
+ import logging
2
+ import math
3
+
4
+ from dsgrid.utils.timing import timed_info
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ class SparkPartition:
10
+ def __init__(self):
11
+ return
12
+
13
+ def get_data_size(self, df, bytes_per_cell=8):
14
+ """approximate dataset size
15
+
16
+ Parameters
17
+ ----------
18
+ df : DataFrame
19
+ bytes_per_cell : [float, int]
20
+ Estimated number of bytes per cell in a dataframe.
21
+ * 4-bytes = 32-bit = Single-precision Float = pyspark.sql.types.FloatType,
22
+ * 8-bytes = 64-bit = Double-precision float = pyspark.sql.types.DoubleType,
23
+
24
+ Returns
25
+ -------
26
+ n_rows : int
27
+ Number of rows in df
28
+ n_cols : int
29
+ Number of columns in df
30
+ data_MB : float
31
+ Estimated size of df in memory in MB
32
+
33
+ """
34
+ n_rows = df.count()
35
+ n_cols = len(df.columns)
36
+ data_MB = n_rows * n_cols * bytes_per_cell / 1e6 # MB
37
+ return n_rows, n_cols, data_MB
38
+
39
+ @timed_info
40
+ def get_optimal_number_of_files(self, df, MB_per_cmp_file=128, cmp_ratio=0.18):
41
+ """calculate *optimal* number of files
42
+ Parameters
43
+ ----------
44
+ df : DataFrame
45
+ MB_per_cmp_file : float
46
+ Desired size of compressed file on disk in MB
47
+ cmp_ratio : float
48
+ Ratio of file size after and before compression
49
+
50
+ Returns
51
+ -------
52
+ n_files : int
53
+ Number of files
54
+ """
55
+ _, _, data_MB = self.get_data_size(df)
56
+ MB_per_file = MB_per_cmp_file / cmp_ratio
57
+ n_files = math.ceil(data_MB / MB_per_file)
58
+
59
+ logger.info(
60
+ f"Dataframe is approximately {data_MB:.02f} MB in size, "
61
+ f"ideal to split into {n_files} file(s) at {MB_per_file:.1f} MB compressed on disk. "
62
+ f"({MB_per_file:.1f} MB uncompressed in memory, {cmp_ratio} compression ratio)."
63
+ )
64
+ return n_files
65
+
66
+ @timed_info
67
+ def file_size_if_partition_by(self, df, key):
68
+ """calculate sharded file size based on paritionBy key"""
69
+ n_rows, n_cols, data_MB = self.get_data_size(df)
70
+ n_partitions = df.select(key).distinct().count()
71
+ avg_MB = round(data_MB / n_partitions, 2)
72
+
73
+ n_rows_largest_part = df.groupBy(key).count().orderBy("count", ascending=False).first()[1]
74
+ n_rows_smallest_part = df.groupBy(key).count().orderBy("count", ascending=True).first()[1]
75
+
76
+ largest_MB = round(data_MB / n_rows * n_rows_largest_part, 2)
77
+ smallest_MB = round(data_MB / n_rows * n_rows_smallest_part, 2)
78
+
79
+ report = (
80
+ f'Partitioning by "{key}" will yield: \n'
81
+ + f" - # of partitions: {n_partitions} \n"
82
+ + f" - avg partition size: {avg_MB} MB \n"
83
+ + f" - largest partition: {largest_MB} MB \n"
84
+ + f" - smallest partition: {smallest_MB} MB \n"
85
+ )
86
+
87
+ logger.info(report)
88
+
89
+ output = {
90
+ key: {
91
+ "n_partitions": n_partitions,
92
+ "avg_partition_MB": avg_MB,
93
+ "max_partition_MB": largest_MB,
94
+ "min_partition_MB": smallest_MB,
95
+ }
96
+ }
97
+
98
+ return output
dsgrid/utils/timing.py ADDED
@@ -0,0 +1,239 @@
1
+ """Utility functions for timing measurements."""
2
+
3
+ import functools
4
+ import logging
5
+ import time
6
+ from pathlib import Path
7
+
8
+ from dsgrid.utils.files import dump_line_delimited_json
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def timed_info(func):
14
+ """Decorator to measure and logger.info a function's execution time."""
15
+
16
+ @functools.wraps(func)
17
+ def timed_(*args, **kwargs):
18
+ return _timed(func, logger.info, *args, **kwargs)
19
+
20
+ return timed_
21
+
22
+
23
+ def timed_debug(func):
24
+ """Decorator to measure and logger.debug a function's execution time."""
25
+
26
+ @functools.wraps(func)
27
+ def timed_(*args, **kwargs):
28
+ return _timed(func, logger.debug, *args, **kwargs)
29
+
30
+ return timed_
31
+
32
+
33
+ def _timed(func, log_func, *args, **kwargs):
34
+ start = time.time()
35
+ result = func(*args, **kwargs)
36
+ total = time.time() - start
37
+ log_func("execution-time=%s func=%s", get_time_duration_string(total), func.__name__)
38
+ return result
39
+
40
+
41
+ def get_time_duration_string(seconds):
42
+ """Returns a string with the time converted to reasonable units."""
43
+ if seconds >= 1:
44
+ val = "{:.3f} s".format(seconds)
45
+ elif seconds >= 0.001:
46
+ val = "{:.3f} ms".format(seconds * 1000)
47
+ elif seconds >= 0.000001:
48
+ val = "{:.3f} us".format(seconds * 1000000)
49
+ elif seconds == 0:
50
+ val = "0 s"
51
+ else:
52
+ val = "{:.3f} ns".format(seconds * 1000000000)
53
+
54
+ return val
55
+
56
+
57
+ class TimerStats:
58
+ """Tracks timing stats for one code block."""
59
+
60
+ def __init__(self, name):
61
+ self._name = name
62
+ self._count = 0
63
+ self._max = 0.0
64
+ self._min = None
65
+ self._avg = 0.0
66
+ self._total = 0.0
67
+
68
+ def get_stats(self):
69
+ """Get the current stats summary.
70
+
71
+ Returns
72
+ -------
73
+ dict
74
+
75
+ """
76
+ avg = 0 if self._count == 0 else self._total / self._count
77
+ return {
78
+ "min": self._min,
79
+ "max": self._max,
80
+ "total": self._total,
81
+ "avg": avg,
82
+ "count": self._count,
83
+ }
84
+
85
+ def log_stats(self):
86
+ """Log a summary of the stats."""
87
+ if self._count == 0:
88
+ logger.info("No stats have been recorded for %s.", self._name)
89
+ return
90
+
91
+ x = self.get_stats()
92
+ text = "total={:.3f}s avg={:.3f}ms max={:.3f}ms min={:.3f}ms count={}".format(
93
+ x["total"], x["avg"] * 1000, x["max"] * 1000, x["min"] * 1000, x["count"]
94
+ )
95
+ logger.info("TimerStats summary: %s: %s", self._name, text)
96
+
97
+ def update(self, duration):
98
+ """Update the stats with a new timing."""
99
+ self._count += 1
100
+ self._total += duration
101
+ if duration > self._max:
102
+ self._max = duration
103
+ if self._min is None or duration < self._min:
104
+ self._min = duration
105
+
106
+
107
+ class Timer:
108
+ """Times a code block."""
109
+
110
+ def __init__(self, timer_stats, name):
111
+ self._start = None
112
+ self._timer_stat = timer_stats.get_stat(name)
113
+
114
+ def __enter__(self):
115
+ if self._timer_stat is not None:
116
+ self._start = time.perf_counter()
117
+
118
+ def __exit__(self, exc, value, tb):
119
+ if self._timer_stat is not None:
120
+ self._timer_stat.update(time.perf_counter() - self._start)
121
+
122
+
123
+ def track_timing(collector):
124
+ """Decorator to track statistics on a function's execution time.
125
+
126
+ Parameters
127
+ ----------
128
+ collector : TimerStatsCollector
129
+
130
+ """
131
+
132
+ def wrap(func):
133
+ def timed_(*args, **kwargs):
134
+ return _timed_func(collector, func, *args, **kwargs)
135
+
136
+ return timed_
137
+
138
+ return wrap
139
+
140
+
141
+ def _timed_func(timer_stats, func, *args, **kwargs):
142
+ with Timer(timer_stats, func.__qualname__):
143
+ return func(*args, **kwargs)
144
+
145
+
146
+ class TimerStatsCollector:
147
+ """Collects statistics for timed code segments."""
148
+
149
+ def __init__(self, is_enabled=False):
150
+ self._stats = {}
151
+ self._is_enabled = is_enabled
152
+
153
+ def clear(self):
154
+ """Clear all stats."""
155
+ self._stats.clear()
156
+
157
+ def disable(self):
158
+ """Disable timing."""
159
+ self._is_enabled = False
160
+
161
+ def enable(self):
162
+ """Enable timing."""
163
+ self._is_enabled = True
164
+
165
+ def get_stat(self, name):
166
+ """Return a TimerStats. Return None if timing is disabled.
167
+
168
+ Parameters
169
+ ----------
170
+ name : str
171
+
172
+ Returns
173
+ -------
174
+ TimerStats | None
175
+
176
+ """
177
+ if not self._is_enabled:
178
+ return None
179
+ if name not in self._stats:
180
+ self.register_stat(name)
181
+ return self._stats[name]
182
+
183
+ @property
184
+ def is_enabled(self) -> bool:
185
+ """Return True if timing is enabled."""
186
+ return self._is_enabled
187
+
188
+ def log_json_stats(self, filename: Path, clear=False):
189
+ """Log line-delimited JSON stats to filename.
190
+
191
+ Parameters
192
+ ----------
193
+ filename: Path
194
+ clear : bool
195
+ If True, clear all stats.
196
+ """
197
+ if self._is_enabled:
198
+ rows = []
199
+ for name, stat in self._stats.items():
200
+ row = {"name": name}
201
+ row.update(stat.get_stats())
202
+ rows.append(row)
203
+ dump_line_delimited_json(rows, filename, mode="a")
204
+ if clear:
205
+ self._stats.clear()
206
+
207
+ def log_stats(self, clear=False):
208
+ """Log statistics for all tracked stats.
209
+
210
+ Parameters
211
+ ----------
212
+ clear : bool
213
+ If True, clear all stats.
214
+ """
215
+ if self._is_enabled:
216
+ for stat in self._stats.values():
217
+ stat.log_stats()
218
+ if clear:
219
+ self._stats.clear()
220
+
221
+ def register_stat(self, name):
222
+ """Register tracking of a new stat.
223
+
224
+ Parameters
225
+ ----------
226
+ name : str
227
+
228
+ Returns
229
+ -------
230
+ TimerStats
231
+
232
+ """
233
+ if self._is_enabled:
234
+ assert name not in self._stats
235
+ stat = TimerStats(name)
236
+ self._stats[name] = stat
237
+
238
+
239
+ timer_stats_collector = TimerStatsCollector()
@@ -0,0 +1,221 @@
1
+ """
2
+ Helpful utility functions for dsgrid
3
+ """
4
+
5
+ import logging
6
+ import inspect
7
+ import json
8
+ import os
9
+ from enum import Enum
10
+ from typing import Iterable
11
+
12
+ from prettytable import PrettyTable
13
+
14
+ try:
15
+ from IPython.display import display, HTML
16
+ from IPython import get_ipython
17
+ from ipykernel.zmqshell import ZMQInteractiveShell
18
+
19
+ _IPYTHON_INSTALLED = True
20
+ except ImportError:
21
+ _IPYTHON_INSTALLED = False
22
+
23
+
24
+ from dsgrid.exceptions import DSGJSONError
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def safe_json_load(fpath):
30
+ """Perform a json file load with better exception handling.
31
+
32
+ Parameters
33
+ ----------
34
+ fpath : str
35
+ Filepath to .json file.
36
+
37
+ Returns
38
+ -------
39
+ j : dict
40
+ Loaded json dictionary.
41
+
42
+ Examples
43
+ --------
44
+ >>> json_path = "./path_to_json.json"
45
+ >>> safe_json_load(json_path)
46
+ {key1: value1,
47
+ key2: value2}
48
+ """
49
+
50
+ if not isinstance(fpath, str):
51
+ msg = "Filepath must be str to load json: {}".format(fpath)
52
+ raise TypeError(msg)
53
+
54
+ if not fpath.endswith(".json"):
55
+ msg = "Filepath must end in .json to load json: {}".format(fpath)
56
+ raise DSGJSONError(msg)
57
+
58
+ if not os.path.isfile(fpath):
59
+ msg = "Could not find json file to load: {}".format(fpath)
60
+ raise DSGJSONError(msg)
61
+
62
+ try:
63
+ with open(fpath, "r") as f:
64
+ j = json.load(f)
65
+ except json.decoder.JSONDecodeError as e:
66
+ emsg = 'JSON Error:\n{}\nCannot read json file: "{}"'.format(e, fpath)
67
+ raise DSGJSONError(emsg)
68
+
69
+ return j
70
+
71
+
72
+ def get_class_properties(cls):
73
+ """Get all class properties
74
+
75
+ Used to check against config keys
76
+
77
+ Returns
78
+ -------
79
+ properties : list
80
+ List of class properties, each of which should represent a valid
81
+ config key/entry
82
+ """
83
+ properties = [
84
+ attr for attr, attr_obj in inspect.getmembers(cls) if isinstance(attr_obj, property)
85
+ ]
86
+
87
+ return properties
88
+
89
+
90
+ def check_uniqueness(iterable: Iterable, tag: str) -> set[str]:
91
+ """Raises ValueError if iterable has duplicate entries.
92
+
93
+ Parameters
94
+ ----------
95
+ iterable : list | generator
96
+ tag : str
97
+ tag to add to the exception string
98
+
99
+ Returns
100
+ -------
101
+ set[str]
102
+
103
+ """
104
+ values = set()
105
+ for item in iterable:
106
+ if item in values:
107
+ msg = f"duplicate {tag}: {item}"
108
+ raise ValueError(msg)
109
+ values.add(item)
110
+ return values
111
+
112
+
113
+ def convert_record_dicts_to_classes(iterable, cls, check_duplicates: None | list[str] = None):
114
+ """Convert an iterable of dicts to instances of a data class.
115
+
116
+ Parameters
117
+ ----------
118
+ iterable
119
+ Any iterable of dicts that must have an 'id' field.
120
+ cls : class
121
+ Instantiate a class from each dict by splatting the dict to the constructor.
122
+ check_duplicates : None | list[str]
123
+ If it is a list of column names, ensure that there are no duplicates among the rows.
124
+
125
+ Returns
126
+ -------
127
+ list
128
+ """
129
+ records = []
130
+ check_duplicates = check_duplicates or []
131
+ values = {x: set() for x in check_duplicates}
132
+ length = None
133
+ for row in iterable:
134
+ if None in row:
135
+ msg = f"row has a key that is None: {row=}"
136
+ raise ValueError(msg)
137
+ if length is None:
138
+ length = len(row)
139
+ elif len(row) != length:
140
+ msg = f"Rows have inconsistent length: first_row_length={length} {row=}"
141
+ raise ValueError(msg)
142
+ record = cls(**row)
143
+ for name in check_duplicates:
144
+ val = getattr(record, name)
145
+ if val in values[name]:
146
+ msg = f"{val} is listed multiple times"
147
+ raise ValueError(msg)
148
+ values[name].add(val)
149
+ records.append(record)
150
+
151
+ return records
152
+
153
+
154
+ def list_enum_values(enum: Enum):
155
+ """Returns list enum values."""
156
+ return [e.value for e in enum]
157
+
158
+
159
+ def in_jupyter_notebook():
160
+ """Returns True if the current interpreter is running in a Jupyter notebook.
161
+
162
+ Returns
163
+ -------
164
+ bool
165
+
166
+ """
167
+ if not _IPYTHON_INSTALLED:
168
+ return False
169
+
170
+ return isinstance(get_ipython(), ZMQInteractiveShell)
171
+
172
+
173
+ def display_table(table: PrettyTable):
174
+ """Displays a table in an ASCII or HTML format as determined by the current interpreter.
175
+
176
+ Parameters
177
+ ----------
178
+ table : PrettyTable
179
+
180
+ """
181
+ if in_jupyter_notebook():
182
+ display(HTML(table.get_html_string()))
183
+ else:
184
+ print(table)
185
+
186
+
187
+ def make_unique_key(base_name: str, existing_keys: Iterable[str]) -> str:
188
+ """Generate a unique key by appending an index if the base name already exists.
189
+
190
+ Parameters
191
+ ----------
192
+ base_name : str
193
+ The base name to use as a key.
194
+ existing_keys : Iterable[str]
195
+ Collection of existing keys to check against.
196
+
197
+ Returns
198
+ -------
199
+ str
200
+ A unique key, either the base name or base name with an appended index
201
+ (e.g., 'name_1', 'name_2').
202
+
203
+ Examples
204
+ --------
205
+ >>> make_unique_key("file", {"other", "another"})
206
+ 'file'
207
+ >>> make_unique_key("file", {"file", "other"})
208
+ 'file_1'
209
+ >>> make_unique_key("file", {"file", "file_1", "file_2"})
210
+ 'file_3'
211
+ """
212
+ existing = set(existing_keys)
213
+ if base_name not in existing:
214
+ return base_name
215
+
216
+ index = 1
217
+ while True:
218
+ new_key = f"{base_name}_{index}"
219
+ if new_key not in existing:
220
+ return new_key
221
+ index += 1
@@ -0,0 +1,36 @@
1
+ """Utility functions for versioning"""
2
+
3
+ from semver import VersionInfo
4
+
5
+
6
+ def handle_version_or_str(version):
7
+ """Return VersionInfo if version is a str."""
8
+ if isinstance(version, str):
9
+ return make_version(version)
10
+ return version
11
+
12
+
13
+ def make_version(version):
14
+ """Convert the string version to a VersionInfo object.
15
+
16
+ Parameters
17
+ ----------
18
+ version : str
19
+
20
+ Returns
21
+ -------
22
+ VersionInfo
23
+
24
+ Raises
25
+ ------
26
+ ValueError
27
+ Raised if parsing fails.
28
+
29
+ """
30
+ try:
31
+ return VersionInfo.parse(version)
32
+ except Exception as exc:
33
+ msg = f"Failed to create VersionInfo: {exc}"
34
+ raise ValueError(msg) from exc
35
+
36
+ return version