PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
postbound/shortcuts.py ADDED
@@ -0,0 +1,70 @@
1
+ """Shortcuts provide simple methods to generate instances of different PostBOUND objects, mostly for REPL contexts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from . import qal
6
+ from ._core import ColumnReference, TableReference
7
+
8
+
9
+ def tab(table: str) -> TableReference:
10
+ """Creates a table instance.
11
+
12
+ Parameters
13
+ ----------
14
+ table : str
15
+ The name and/or alias of the table. Supported formats include ``"table_name"`` and ``"table_name alias"``
16
+
17
+ Returns
18
+ -------
19
+ TableReference
20
+ The resulting table. This will never be a virtual table.
21
+ """
22
+ if " " in table:
23
+ full_name, alias = table.split(" ")
24
+ return TableReference(full_name, alias)
25
+ else:
26
+ return TableReference(table)
27
+
28
+
29
+ def col(column: str) -> ColumnReference:
30
+ """Creates a column instance.
31
+
32
+ Parameters
33
+ ----------
34
+ column : str
35
+ The name and/or table of the column. Supported formats include ``"column_name"`` and ``"table_name.column_name"``
36
+
37
+ Returns
38
+ -------
39
+ ColumnReference
40
+ The resulting column. If a table name is included before the ``.``, it will be parsed according to the rules of
41
+ `tab()`.
42
+ """
43
+ if "." in column:
44
+ table_name, column_name = column.split(".")
45
+ return ColumnReference(column_name, tab(table_name))
46
+ else:
47
+ return ColumnReference(column)
48
+
49
+
50
+ def q(query: str) -> qal.SqlQuery:
51
+ """Parses the given SQL query.
52
+
53
+ This is really just a shortcut to calling importing and calling the parser module.
54
+
55
+ Parameters
56
+ ----------
57
+ query : str
58
+ The SQL query to parse
59
+
60
+ Returns
61
+ -------
62
+ qal.SqlQuery
63
+ A QAL query object corresponding to the given input query. Errors can be produced according to the documentation of
64
+ `qal.parse_query`.
65
+
66
+ See Also
67
+ --------
68
+ qal.parse_query
69
+ """
70
+ return qal.parse_query(query)
@@ -0,0 +1,46 @@
1
+ """Contains utilities that are not specific to PostBOUND's domain of databases and query optimization."""
2
+
3
+ from . import collections, dicts, proc, stats, system, typing
4
+ from . import networkx as nx
5
+ from ._errors import InvariantViolationError, LogicError, StateError
6
+ from .collections import enlist, flatten, powerset, set_union, simplify
7
+ from .dicts import argmin, frozendict, hash_dict
8
+ from .jsonize import jsondict, to_json, to_json_dump
9
+ from .logging import Logger, make_logger, timestamp
10
+ from .misc import DependencyGraph, Version, camel_case2snake_case
11
+ from .proc import run_cmd
12
+ from .stats import jaccard
13
+ from .system import open_files
14
+
15
+ __all__ = [
16
+ "flatten",
17
+ "enlist",
18
+ "simplify",
19
+ "set_union",
20
+ "powerset",
21
+ "collections",
22
+ "hash_dict",
23
+ "argmin",
24
+ "frozendict",
25
+ "dicts",
26
+ "StateError",
27
+ "LogicError",
28
+ "InvariantViolationError",
29
+ "jsondict",
30
+ "to_json",
31
+ "to_json_dump",
32
+ "timestamp",
33
+ "make_logger",
34
+ "Logger",
35
+ "camel_case2snake_case",
36
+ "Version",
37
+ "DependencyGraph",
38
+ "nx",
39
+ "run_cmd",
40
+ "proc",
41
+ "jaccard",
42
+ "stats",
43
+ "open_files",
44
+ "system",
45
+ "typing",
46
+ ]
@@ -0,0 +1,33 @@
1
+ """Contains various general errors that extend Python's base errors."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class LogicError(RuntimeError):
7
+ """Generic error to indicate that any kind of algorithmic problem occurred.
8
+
9
+ This error is generally used when some assumption within PostBOUND is violated, but it's (probably) not the user's fault.
10
+ As a rule of thumb, if the user supplies faulty input, a `ValueError` should be raised instead.
11
+ Therefore, encoutering a `LogicError` indicates a bug in PostBOUND itself.
12
+ """
13
+
14
+ def __init__(self, *args, **kwargs) -> None:
15
+ super().__init__(
16
+ "Internal PostBOUND error found. Please file a bug report on Github (https://github.com/rbergm/PostBOUND): ",
17
+ *args,
18
+ **kwargs,
19
+ )
20
+
21
+
22
+ class StateError(RuntimeError):
23
+ """Indicates that an object is not in the right state to perform an operation."""
24
+
25
+ def __init__(self, *args, **kwargs) -> None:
26
+ super().__init__(*args, **kwargs)
27
+
28
+
29
+ class InvariantViolationError(LogicError):
30
+ """Indicates that some contract of a method was violated. The arguments should provide further details."""
31
+
32
+ def __init__(self, *args, **kwargs) -> None:
33
+ super().__init__(*args, **kwargs)
@@ -0,0 +1,490 @@
1
+ """Provides utilities to work with arbitrary collections like lists, sets and tuples."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import itertools
6
+ import typing
7
+ from collections.abc import (
8
+ Callable,
9
+ Collection,
10
+ Container,
11
+ Generator,
12
+ Iterable,
13
+ Iterator,
14
+ Sequence,
15
+ Sized,
16
+ )
17
+ from typing import Any, Optional, overload
18
+
19
+ from .._base import T
20
+ from .dicts import HashableDict
21
+
22
+ ContainerType = typing.TypeVar("ContainerType", list, tuple, set, frozenset)
23
+ """Specifies which types are considered containers.
24
+
25
+ For some methods this is necessary to determine whether any work still has to be done.
26
+ """
27
+
28
+
29
+ def flatten(deep_list: Iterable[Iterable[T] | T]) -> list[T]:
30
+ """Transforms a nested list into a flat list: ``[[1, 2], [3]]`` is turned into ``[1, 2, 3]``
31
+
32
+ Parameters
33
+ ----------
34
+ deep_list : Iterable[Iterable[T] | T]
35
+ The list to flatten
36
+
37
+ Returns
38
+ -------
39
+ list[T]
40
+ The flattened list: all elements of iterables from the `deep_list` are now contained directly in the resulting list.
41
+ """
42
+ flattened: list[T] = []
43
+ for nested in deep_list:
44
+ if isinstance(nested, Iterable) and not isinstance(nested, str):
45
+ flattened.extend(nested)
46
+ else:
47
+ flattened.append(nested)
48
+ return flattened
49
+
50
+
51
+ @overload
52
+ def enlist(obj: list[T]) -> list[T]: ...
53
+
54
+
55
+ @overload
56
+ def enlist(
57
+ obj: tuple[T, ...], *, enlist_tuples: bool = False
58
+ ) -> list[tuple[T, ...]]: ...
59
+
60
+
61
+ @overload
62
+ def enlist(obj: tuple[T, ...]) -> tuple[T, ...]: ...
63
+
64
+
65
+ @overload
66
+ def enlist(obj: set[T]) -> set[T]: ...
67
+
68
+
69
+ @overload
70
+ def enlist(obj: frozenset[T]) -> frozenset[T]: ...
71
+
72
+
73
+ @overload
74
+ def enlist(obj: T) -> list[T]: ...
75
+
76
+
77
+ def enlist(obj: T | Iterable[T], *, enlist_tuples: bool = False) -> Iterable[T]:
78
+ """Transforms any object into a singular list of that object, if it is not a container already.
79
+
80
+ Specifically, the following types are treated as container-like and will not be transformed: lists, tuples, sets
81
+ and frozensets. The treatment of tuples can be configured via parameters. All other arguments will be wrapped in a list.
82
+
83
+ For example, ``"abc"`` is turned into ``["abc"]``, whereas ``["abc"]`` is returned unmodified.
84
+
85
+ Parameters
86
+ ----------
87
+ obj : T | Iterable[T]
88
+ The object or list to wrap
89
+ enlist_tuples : bool, optional
90
+ Whether a tuple `obj` should be enlisted. This is ``False`` by default
91
+
92
+ Returns
93
+ -------
94
+ Iterable[T]
95
+ The object, wrapped into a list if necessary
96
+ """
97
+ if isinstance(obj, str):
98
+ return [obj]
99
+ if isinstance(obj, tuple) and enlist_tuples:
100
+ return [obj]
101
+ list_types = [tuple, list, set, frozenset]
102
+ if any(isinstance(obj, target_type) for target_type in list_types):
103
+ return obj
104
+ return [obj]
105
+
106
+
107
+ def get_any(elems: Iterable[T]) -> T:
108
+ """Provides any element from an iterable. There is no guarantee which one will be returned.
109
+
110
+ This method can potentially iterate over the entire iterable. The behaviour for empty iterables is undefined.
111
+
112
+ Parameters
113
+ ----------
114
+ elems : Iterable[T]
115
+ The items from which to choose.
116
+
117
+ Returns
118
+ -------
119
+ T
120
+ Any of the elements from the iterable. If the iterable is empty, the behaviour is undefined.
121
+ """
122
+ return next(iter(elems))
123
+
124
+
125
+ def simplify(obj: Iterable[T]) -> T:
126
+ """Unwraps containers containing just a single element.
127
+
128
+ This can be thought of as the inverse operation to `enlist`. If the object contains multiple elements, nothing happens.
129
+
130
+ Parameters
131
+ ----------
132
+ obj : Iterable[T]
133
+ The object to simplify
134
+
135
+ Returns
136
+ -------
137
+ T
138
+ For a singular list, the object that was contained in that list. Otherwise `obj` is returned unmodified. Since this
139
+ method is mainly intended for lists which are known to contain exactly one element, we use *T* as a return type to
140
+ assist the type checker.
141
+
142
+ Examples
143
+ --------
144
+ The singular list ``[1]`` is simplified to ``1``. On the other hand, ``[1,2]`` is returned unmodified.
145
+ """
146
+ if "__len__" not in dir(obj) or "__iter__" not in dir(obj):
147
+ return obj
148
+
149
+ if len(obj) == 1:
150
+ return list(obj)[0]
151
+ return obj
152
+
153
+
154
+ def foreach(lst: Iterable[T], action: Callable[[T], None]) -> None:
155
+ """Shortcut to apply a specific action to each element in an iterable.
156
+
157
+ Parameters
158
+ ----------
159
+ lst : Iterable[T]
160
+ The elements.
161
+ action : Callable[[T], None]
162
+ The side-effect that should be applied to all elements.
163
+ """
164
+ for elem in lst:
165
+ action(elem)
166
+
167
+
168
+ def powerset(lst: Collection[T]) -> Iterable[tuple[T, ...]]:
169
+ """Calculates the powerset of the provided iterable.
170
+
171
+ The powerset of a set *S* is defined as the set that contains all subsets of *S*. This is includes the empty set, as well
172
+ as the entire set *S*.
173
+
174
+ Parameters
175
+ ----------
176
+ lst : Collection[T]
177
+ The "set" *S*
178
+
179
+ Returns
180
+ -------
181
+ Iterable[tuple[T, ...]]
182
+ The powerset of *S*. Each tuple correponds to a specific subset. The order of the elements within the tuple is not
183
+ significant.
184
+ """
185
+ return itertools.chain.from_iterable(
186
+ itertools.combinations(lst, size) for size in range(len(lst) + 1)
187
+ )
188
+
189
+
190
+ def sliding_window(
191
+ lst: Sequence[T], size: int, step: int = 1
192
+ ) -> Generator[tuple[Sequence[T], Sequence[T], Sequence[T]], None, None]:
193
+ """Iterates over the given sequence using a sliding window.
194
+
195
+ The window will contain exactly `size` many entries, starting at the beginning of the sequence. After yielding a
196
+ window, the next window will be shifted `step` many elements.
197
+
198
+ Parameters
199
+ ----------
200
+ lst : Sequence[T]
201
+ The sequence to iterate over
202
+ size : int
203
+ The number of elements in the sliding window
204
+ step : int, optional
205
+ The number of elements to shift after each window, defaults to 1.
206
+
207
+ Yields
208
+ ------
209
+ Generator[tuple[Sequence[T], Sequence[T], Sequence[T]]]
210
+ The sliding window subsets. The tuples are structured as follows: *(prefix, window, suffix)* where *prefix* are all
211
+ elements of the sequence before the current window, *window* contains exactly those elements that are part of the
212
+ current window and *suffix* contains all elements after the current window.
213
+ """
214
+ for i in range(0, len(lst) - size + 1, step=step):
215
+ prefix = lst[:i]
216
+ window = lst[i : i + size]
217
+ suffix = lst[i + size :]
218
+ yield prefix, window, suffix
219
+
220
+
221
+ def pairs(lst: Iterable[T]) -> Generator[tuple[T, T], None, None]:
222
+ """Provides all pairs of elements of the given iterable, disregarding order and identical pairs.
223
+
224
+ This means that the resulting iterable will not contain entries *(a, a)* unless *a* itself is present multiple
225
+ times in the input. Likewise, tuples *(a, b)* and *(b, a)* are treated as equal and only one of them will be
226
+ returned (Again, unless *a* or *b* are present multiple times in the input. In that case, their order is
227
+ unspecified.)
228
+
229
+ Parameters
230
+ ----------
231
+ lst : Iterable[T]
232
+ The iterable that contains the pairs. It must be possible to iterate over it multiple times (twice, to be exact).
233
+
234
+ Yields
235
+ ------
236
+ Generator[tuple[T, T], None, None]
237
+ The element pairs.
238
+ """
239
+ for a_idx, a in enumerate(lst):
240
+ for b_idx, b in enumerate(lst):
241
+ if b_idx <= a_idx:
242
+ continue
243
+ yield a, b
244
+
245
+
246
+ def set_union(sets: Iterable[set[T] | frozenset[T]]) -> set[T]:
247
+ """Computes the union of many sets.
248
+
249
+ Parameters
250
+ ----------
251
+ sets : Iterable[set[T] | frozenset[T]]
252
+ The sets to combine. Frozensets are "expanded" to regular sets.
253
+
254
+ Returns
255
+ -------
256
+ set[T]
257
+ Large union of all provided sets.
258
+ """
259
+ union_set: set[T] = set()
260
+ for s in sets:
261
+ union_set |= s
262
+ return union_set
263
+
264
+
265
+ def make_hashable(obj: Any) -> Any:
266
+ """Attempts to generate an equivalent, hashable representation for a container.
267
+
268
+ This function operates on the standard container types list, tuple, set, dictionary and frozenset and performs the
269
+ following conversion:
270
+
271
+ - list becomes tuple, all elements of the list are recursively made hashable
272
+ - tuples are left as-is, but all elements of the tuple are recursively made hashable
273
+ - sets become frozensets. The elements are left as they are, because they must already be hashable
274
+ - dictionaries become instances of `dict_utils.HashableDict`. The values are recursively made hashable, keys are left the
275
+ way they are because they must already be hashable
276
+ - frozensets are left as-is
277
+
278
+ All other types, including user-defined types are returned as-is.
279
+
280
+ Parameters
281
+ ----------
282
+ obj : Any
283
+ The object to hash
284
+
285
+ Returns
286
+ -------
287
+ Any
288
+ The hashable counterpart of the object
289
+ """
290
+ if isinstance(obj, set):
291
+ return frozenset(obj)
292
+ elif isinstance(obj, list) or isinstance(obj, tuple):
293
+ return tuple(make_hashable(elem) for elem in obj)
294
+ elif isinstance(obj, dict):
295
+ return HashableDict({k: make_hashable(v) for k, v in obj.items()})
296
+ else:
297
+ return obj
298
+
299
+
300
+ class Queue(Iterable[T], Sized, Container[T]):
301
+ """A queue is a wrapper around an underlying list of elements which provides FIFO semantics for access.
302
+
303
+ Parameters
304
+ ----------
305
+ data : Iterable[T] | None, optional
306
+ Initial contents of the queue. By default the queue is empty at the beginning.
307
+
308
+ """
309
+
310
+ def __init__(self, data: Iterable[T] | None = None) -> None:
311
+ self.data = list(data) if data else []
312
+
313
+ def enqueue(self, value: T) -> None:
314
+ """Adds a new item to the end of the queue.
315
+
316
+ Parameters
317
+ ----------
318
+ value : T
319
+ The item to add
320
+ """
321
+ self.data.append(value)
322
+
323
+ def push(self, value: T) -> None:
324
+ """Adds a new item to the end of the queue.
325
+
326
+ This is an alias for `enqueue`.
327
+
328
+ Parameters
329
+ ----------
330
+ value : T
331
+ The item to add
332
+ """
333
+ self.enqueue(value)
334
+
335
+ def append(self, value: T) -> None:
336
+ """Adds a new item to end of the queue.
337
+
338
+ This method is an alias for `enqueue` to enable easier interchangeability with normal lists.
339
+
340
+ Parameters
341
+ ----------
342
+ value : T
343
+ The item to add
344
+ """
345
+ self.enqueue(value)
346
+
347
+ def extend(self, values: Iterable[T]) -> None:
348
+ """Adds a number of values to the end of the queue.
349
+
350
+ Parameters
351
+ ----------
352
+ values : Iterable[T]
353
+ The elements to add. The order in the queue matches the order in the iterable.
354
+ """
355
+ self.data.extend(values)
356
+
357
+ def head(self) -> Optional[T]:
358
+ """Provides the current first element of the queue without removing.
359
+
360
+ Returns
361
+ -------
362
+ Optional[T]
363
+ The first element if it exists, or ``None`` if the queue is empty.
364
+ """
365
+ return self.data[0] if self.data else None
366
+
367
+ def peak(self) -> Optional[T]:
368
+ """Provides the current first element of the queue without removing.
369
+
370
+ This is an alias for `head`.
371
+
372
+ Returns
373
+ -------
374
+ Optional[T]
375
+ The first element if it exists, or ``None`` if the queue is empty.
376
+ """
377
+ return self.head()
378
+
379
+ def pop(self) -> Optional[T]:
380
+ """Provides the current first element of the queue and removes it.
381
+
382
+ Returns
383
+ -------
384
+ Optional[T]
385
+ The first element if it exists, or ``None`` if the queue is empty.
386
+ """
387
+ item = self.head()
388
+ if item:
389
+ self.data.pop(0)
390
+ return item
391
+
392
+ def __len__(self) -> int:
393
+ return len(self.data)
394
+
395
+ def __contains__(self, __x: object) -> bool:
396
+ return __x in self.data
397
+
398
+ def __iter__(self) -> Iterator[T]:
399
+ return self.data.__iter__()
400
+
401
+ def __repr__(self) -> str:
402
+ return f"Queue({self.data})"
403
+
404
+ def __str__(self) -> str:
405
+ return str(self.data)
406
+
407
+
408
+ class SizedQueue(Collection[T]):
409
+ """A sized queue extends on the behaviour of a normal queue by restricting the number of items in the queue.
410
+
411
+ A sized queue has weak FIFO semantics: items can only be appended at the end, but the contents of the entire queue
412
+ can be accessed at any time.
413
+
414
+ If upon enqueuing a new item the queue is already at maximum capacity, the current head of the queue will be
415
+ dropped.
416
+
417
+ Parameters
418
+ ----------
419
+ capacity : int
420
+ The maximum number of items the queue can contain at the same time.
421
+ data : Optional[Iterable[T]], optional
422
+ Initial contents of the queue. By default the queue is empty at the beginning.
423
+
424
+ Notes
425
+ -----
426
+ Although `Queue` and `SizedQueue` provide similar FIFO semantics, there is no subclass relationship between the two. This
427
+ is by design, since the contract of a queue is very different from the contract of a sized queue.
428
+
429
+ """
430
+
431
+ def __init__(self, capacity: int, data: Optional[Iterable[T]] = None) -> None:
432
+ self.data = list(data) if data else []
433
+ self.capacity = capacity
434
+
435
+ def append(self, value: T) -> None:
436
+ """Adds a new item to the end of the queue, popping any excess items.
437
+
438
+ Parameters
439
+ ----------
440
+ value : T
441
+ The value to add
442
+ """
443
+ if len(self.data) >= self.capacity:
444
+ self.data.pop(0)
445
+ self.data.append(value)
446
+
447
+ def extend(self, values: typing.Iterable[T]) -> None:
448
+ """Adds all the items to the end of the queue, popping any excess items.
449
+
450
+ Parameters
451
+ ----------
452
+ values : typing.Iterable[T]
453
+ The values to add
454
+ """
455
+ self.data = (self.data + list(values))[: self.capacity]
456
+
457
+ def head(self) -> Optional[T]:
458
+ """Provides the current first item of the queue without removing it.
459
+
460
+ Returns
461
+ -------
462
+ Optional[T]
463
+ The first item in the queue, or ``None`` if the queue is empty
464
+ """
465
+ return self.data[0] if self.data else None
466
+
467
+ def pop(self) -> Optional[T]:
468
+ """Provides the current first item of the queue and removes it.
469
+
470
+ Returns
471
+ -------
472
+ Optional[T]
473
+ The first item in the queue, or ``None`` if the queue is empty
474
+ """
475
+ return self.data.pop(0) if self.data else None
476
+
477
+ def __contains__(self, other: T) -> bool:
478
+ return other in self.data
479
+
480
+ def __iter__(self) -> typing.Iterator[T]:
481
+ return self.data.__iter__()
482
+
483
+ def __len__(self) -> int:
484
+ return len(self.data)
485
+
486
+ def __repr__(self) -> str:
487
+ return f"SizedQueue(capacity={self.capacity}, data={self.data})"
488
+
489
+ def __str__(self) -> str:
490
+ return str(self.data)
@@ -0,0 +1,71 @@
1
+ """Utilities to work with Pandas data frames"""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Collection, Iterable
6
+ from typing import Any, Optional
7
+
8
+ import pandas as pd
9
+
10
+
11
+ def _df_from_dict(
12
+ data: dict[Any, Collection[Any]],
13
+ key_name: Optional[str] = None,
14
+ column_names: Optional[Iterable[str]] = None,
15
+ ) -> pd.DataFrame:
16
+ data_template = next(iter(data.values()))
17
+ if column_names is None:
18
+ column_name_map = {i: str(i) for i in range(len(data_template))}
19
+ else:
20
+ column_name_map = {idx: col for idx, col in enumerate(column_names)}
21
+
22
+ df_container: dict[str, list[Any]] = {col: [] for col in column_name_map.values()}
23
+ for row in data.values():
24
+ for col_idx, col in enumerate(row):
25
+ col_name = column_name_map[col_idx]
26
+ df_container[col_name].append(col)
27
+
28
+ key_name = "key" if key_name is None else key_name
29
+ df_container[key_name] = list(data.keys())
30
+
31
+ return pd.DataFrame(df_container)
32
+
33
+
34
+ def _df_from_list(data: Collection[dict[Any, Any]]) -> pd.DataFrame:
35
+ data_template = next(iter(data))
36
+ df_container: dict[str, list[Any]] = {col: [] for col in data_template.keys()}
37
+ for row in data:
38
+ for key in df_container.keys():
39
+ df_container[key].append(row[key])
40
+ return pd.DataFrame(df_container)
41
+
42
+
43
+ def as_df(
44
+ data: dict[Any, Collection[Any]] | Collection[dict[Any, Any]],
45
+ *,
46
+ key_name: Optional[str] = None,
47
+ column_names: Optional[Iterable[str]] = None,
48
+ ) -> pd.DataFrame:
49
+ """Generates a new Pandas `DataFrame`.
50
+
51
+ The contents of the dataframe can be supplied in one of two forms: a collection of dictionaries will be transformed
52
+ into a dataframe such that each dictionary corresponds to one row of the dataframe. All dictionaries have to
53
+ consist of exactly the same key-value pairs. Each key becomes a column in the dataframe. The precise columns are
54
+ inferred from the first dictionary in the collection. In this case, column values are derived directly from the
55
+ keys.
56
+
57
+ The other form consists of one large dictionary of keys mapping to several columns. The resulting dataframe will
58
+ have one column that corresponds to the key values and additional columns that correspond to the entries in the
59
+ collection which was mapped-to by the key. All collections have to consist of exactly the same number of elements.
60
+ The precise number is inferred based on the first key-value pair. To name the different columns of the dataframe,
61
+ the `key_name` and `column_names` can be used. If no key name is given, it defaults to `key`. If no column names
62
+ are given, they default to numerical indices that correspond to the position in the mapped collection.
63
+ """
64
+ if not data:
65
+ return pd.DataFrame()
66
+ if isinstance(data, dict):
67
+ return _df_from_dict(data, key_name, column_names)
68
+ elif isinstance(data, Collection):
69
+ return _df_from_list(data)
70
+ else:
71
+ raise TypeError("Unexpected data type: " + str(data))