PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,401 @@
1
+ """Provides graph-centric algorithms based on NetworkX [nx]_.
2
+
3
+ References
4
+ ----------
5
+
6
+ .. [nx] Aric A. Hagberg, Daniel A. Schult and Pieter J. Swart, "Exploring network structure, dynamics, and function using
7
+ NetworkX", in Proceedings of the 7th Python in Science Conference (SciPy2008), Gäel Varoquaux, Travis Vaught, and
8
+ Jarrod Millman (Eds), (Pasadena, CA USA), pp. 11-15, Aug 2008
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import dataclasses
14
+ import random
15
+ import typing
16
+ from collections.abc import (
17
+ Callable,
18
+ Collection,
19
+ Generator,
20
+ Iterable,
21
+ Iterator,
22
+ Sequence,
23
+ )
24
+ from typing import Optional
25
+
26
+ import networkx as nx
27
+
28
+ from .collections import Queue
29
+
30
+ NodeType = typing.TypeVar("NodeType")
31
+ """Generic type to model the specific nodes contained in a NetworkX graph."""
32
+
33
+
34
+ def nx_sinks(graph: nx.DiGraph) -> Collection[NodeType]:
35
+ """Determines all sink nodes in a directed graph.
36
+
37
+ A sink is a node with no outgoing edges.
38
+
39
+ Parameters
40
+ ----------
41
+ graph : nx.DiGraph
42
+ The graph to check
43
+
44
+ Returns
45
+ -------
46
+ Collection[NodeType]
47
+ All sink nodes. Can be an empty collection.
48
+ """
49
+ return [n for n in graph.nodes if graph.out_degree(n) == 0]
50
+
51
+
52
+ def nx_sources(graph: nx.DiGraph) -> Collection[NodeType]:
53
+ """Determines all source nodes in a directed graph.
54
+
55
+ A source is a node with no incoming edges.
56
+
57
+ Parameters
58
+ ----------
59
+ graph : nx.DiGraph
60
+ The graph to check
61
+
62
+ Returns
63
+ -------
64
+ Collection[NodeType]
65
+ All source nodes. Can be an empty collection.
66
+ """
67
+ return [n for n in graph.nodes if graph.in_degree(n) == 0]
68
+
69
+
70
+ def nx_filter_nodes(
71
+ graph: nx.Graph, predicate: Callable[[NodeType, dict], bool]
72
+ ) -> Collection[tuple[NodeType, dict]]:
73
+ return [(n, d) for n, d in graph.nodes.data() if predicate(n, d)]
74
+
75
+
76
+ def nx_merge_nodes(
77
+ graph: nx.Graph, nodes: Iterable[NodeType], *, target_node: NodeType
78
+ ) -> nx.Graph:
79
+ pass
80
+
81
+
82
+ def nx_random_walk(
83
+ graph: nx.Graph, *, starting_node: Optional[NodeType] = None
84
+ ) -> Generator[NodeType, None, None]:
85
+ """A modified random walk implementation for NetworkX graphs.
86
+
87
+ A random walk starts at any of the nodes of the graph. At each iteration, a neighboring node is selected and moved to.
88
+ Afterwards, the iteration continues with that node.
89
+
90
+ Our implementation uses the following modifications: after each stop, the walk may jump to a node that is connected to
91
+ one of the visited nodes. This node does not necessarily have to be connected to the current node. Secondly, if the
92
+ graph contains multiple connected components, the walk will first explore one component before jumping to the next
93
+ one.
94
+
95
+ The walk finishes when all nodes have been explored.
96
+
97
+ Parameters
98
+ ----------
99
+ graph : nx.Graph
100
+ The graph to walk over
101
+ starting_node : Optional[NodeType], optional
102
+ The node where the walk starts. If unspecified, a random node is selected.
103
+
104
+ Yields
105
+ ------
106
+ Generator[NodeType, None, None]
107
+ The nodes in the order in which they have been moved to.
108
+ """
109
+ # TODO: could be refactored to use the GraphWalk class instead
110
+ shell_nodes = set()
111
+ visited_nodes = set()
112
+
113
+ total_n_nodes = len(graph.nodes)
114
+
115
+ current_node = (
116
+ random.choice(list(graph.nodes)) if starting_node is None else starting_node
117
+ )
118
+ visited_nodes.add(current_node)
119
+ yield current_node
120
+
121
+ while len(visited_nodes) < total_n_nodes:
122
+ shell_nodes |= set(
123
+ n for n in graph.adj[current_node].keys() if n not in visited_nodes
124
+ )
125
+ if not shell_nodes:
126
+ # we have multiple connected components and need to jump into the other component
127
+ current_node = random.choice(
128
+ [n for n in graph.nodes if n not in visited_nodes]
129
+ )
130
+ visited_nodes.add(current_node)
131
+ yield current_node
132
+ continue
133
+
134
+ current_node = random.choice(list(shell_nodes))
135
+ shell_nodes.remove(current_node)
136
+ visited_nodes.add(current_node)
137
+ yield current_node
138
+
139
+
140
+ def nx_bfs_tree(
141
+ graph: nx.Graph,
142
+ start_node: NodeType,
143
+ condition: Callable[[NodeType, dict], bool],
144
+ *,
145
+ node_order: Callable[[NodeType, dict], int] | None = None,
146
+ ) -> Generator[tuple[NodeType, dict], None, None]:
147
+ """Traverses a specific graph in breadth-first manner, yielding its nodes along the way.
148
+
149
+ The traversal starts at a specific start node. During the traversal all nodes that match a condition are provided. If no
150
+ more nodes are found or the condition cannot be satisfied for any more nodes, traversal terminates. Notice that there is
151
+ no "early stopping": if a parent node fails the condition check, its children are still explored.
152
+
153
+ Parameters
154
+ ----------
155
+ graph : nx.Graph
156
+ The graph to explore
157
+ start_node : NodeType
158
+ The node where the exploration starts. This node will never be yielded.
159
+ condition : Callable[[NodeType, dict], bool]
160
+ A condition that is satisfied by all nodes that should be yielded
161
+ node_order : Callable[[NodeType, dict], int] | None, optional
162
+ The sequence in which child nodes should be explored. This function receives the child node as well as the edge from
163
+ its parent as arguments and produces a numerical position value as output (lower values indicate earlier yielding).
164
+ If unspecified, this produces the nodes in an arbitrary order.
165
+
166
+ Yields
167
+ ------
168
+ Generator[tuple[NodeType, dict], None, None]
169
+ The node along with their edge data from the parent.
170
+
171
+ See Also
172
+ --------
173
+
174
+ .. NetworkX documentation on usage and definition of edge data:
175
+ https://networkx.org/documentation/stable/reference/introduction.html#nodes-and-edges
176
+ """
177
+ shell_nodes = Queue([(node, edge) for node, edge in graph.adj[start_node].items()])
178
+ visited_nodes = {start_node}
179
+ while shell_nodes:
180
+ current_node, current_edge = shell_nodes.pop()
181
+ visited_nodes.add(current_node)
182
+ if condition(current_node, current_edge):
183
+ neighbor_nodes = [
184
+ (node, edge)
185
+ for node, edge in graph.adj[current_node].items()
186
+ if node not in visited_nodes
187
+ ]
188
+ if node_order:
189
+ sorted(
190
+ neighbor_nodes,
191
+ key=lambda neighbor: node_order(neighbor[0], neighbor[1]),
192
+ )
193
+ shell_nodes.extend(neighbor_nodes)
194
+ yield current_node, current_edge
195
+
196
+
197
+ @dataclasses.dataclass
198
+ class GraphWalk:
199
+ """A graph walk models a traversal of some graph.
200
+
201
+ Each walk begins at a specific *start node* and then follows a *path* along other nodes and edges.
202
+
203
+ Notice that depending on the specific use-case the path might deviate from a normal walk. More specifically, two
204
+ special cases might occur:
205
+
206
+ 1. the edge data can be ``None``. This indicates that the walk jumped to a different node without using an edge. For
207
+ example, this might happen if the walk moved to a different connected component of the graph.
208
+ 2. the next node in the walk might not be connected to the current node in the path, but to some node that has
209
+ already been visited instead. This is especially the case for so-called *frontier walks* which can be computed
210
+ using the `nx_frontier_walks` method.
211
+
212
+ The walk can be iterated over by its nodes and nodes can be checked for containment in the walk. Length calculation is
213
+ also supported.
214
+
215
+ Attributes
216
+ ----------
217
+ start_node : NodeType
218
+ The origin of the traversal
219
+ path : Sequence[tuple[NodeType, Optional[dict]]]
220
+ The nodes that have been visited during the traversal, in the order in which they were explored. The dictionary stores
221
+ the NetworkX edge data of the edge that has been used to move to the node. This may be ``None`` if the node was
222
+ "jumped to".
223
+ """
224
+
225
+ start_node: NodeType
226
+ path: Sequence[tuple[NodeType, Optional[dict]]] = dataclasses.field(
227
+ default_factory=list
228
+ )
229
+
230
+ def nodes(self) -> Sequence[NodeType]:
231
+ """Provides all nodes that are visited by this walk, in the sequence in which they are visited.
232
+
233
+ Returns
234
+ -------
235
+ Sequence[NodeType]
236
+ The nodes
237
+ """
238
+ return [self.start_node] + [node[0] for node in self.path]
239
+
240
+ def final_node(self) -> NodeType:
241
+ """Provides the very last node that was visited by this walk.
242
+
243
+ Returns
244
+ -------
245
+ NodeType
246
+ The last node. This can be the `start_node` if the path is empty.
247
+ """
248
+ return self.start_node if not self.path else self.path[-1][0]
249
+
250
+ def expand(
251
+ self, next_node: NodeType, edge_data: Optional[dict] = None
252
+ ) -> GraphWalk:
253
+ """Creates a new walk by prolonging the current one with one more edge at the end.
254
+
255
+ Parameters
256
+ ----------
257
+ next_node : NodeType
258
+ The node to move to from the final node of the current graph.
259
+ edge_data : Optional[dict], optional
260
+ The NetworkX edge data for the traversal. Can be ``None`` if the new node is being jumped to.
261
+
262
+ Returns
263
+ -------
264
+ GraphWalk
265
+ The resulting larger walk. The original walk is not modified in any way.
266
+ """
267
+ return GraphWalk(self.start_node, list(self.path) + [(next_node, edge_data)])
268
+
269
+ def nodes_hash(self) -> int:
270
+ """Provides a hash value only based on the nodes sequence, not the selected predicates.
271
+
272
+ Returns
273
+ -------
274
+ int
275
+ The computed hash value
276
+ """
277
+ return hash(tuple(self.nodes()))
278
+
279
+ def __len__(self) -> int:
280
+ return 1 + len(self.path)
281
+
282
+ def __iter__(self) -> Iterator[NodeType]:
283
+ return self.nodes().__iter__()
284
+
285
+ def __contains__(self, other: object) -> bool:
286
+ return other in self.nodes()
287
+
288
+ def __hash__(self) -> int:
289
+ return hash((self.start_node, tuple(self.path)))
290
+
291
+ def __eq__(self, other: object) -> bool:
292
+ return (
293
+ isinstance(other, type(self))
294
+ and self.start_node == other.start_node
295
+ and self.path == other.path
296
+ )
297
+
298
+ def __repr__(self) -> str:
299
+ return str(self)
300
+
301
+ def __str__(self) -> str:
302
+ return " -> ".join(str(node) for node in self.nodes())
303
+
304
+
305
+ def _walk_frontier(
306
+ graph: nx.Graph, current_walk: GraphWalk, current_frontier: set[NodeType]
307
+ ) -> Generator[GraphWalk, None, None]:
308
+ """Worker method to recursively expand graph traversals to candidate nodes.
309
+
310
+ This method expands a specific walk by considering all possible traversals to candidate/frontier nodes. Jumps are included
311
+ if the graph contains multiple connected components. The frontier is composed of all nodes that are adjacent to one of the
312
+ nodes that has already been visited.
313
+
314
+ Only paths of unique nodes are considered - if a node has already been visited, it will not be visited again.
315
+
316
+ Parameters
317
+ ----------
318
+ graph : nx.Graph
319
+ The graph to traverse
320
+ current_walk : GraphWalk
321
+ The path that was already selected
322
+ current_frontier : set[NodeType]
323
+ All nodes that can be moved to next. This datastructure is mutated after each traversal to include the new (yet
324
+ unexplored) nodes that are adjacent to the selected next node.
325
+
326
+ Yields
327
+ ------
328
+ Generator[GraphWalk, None, None]
329
+ All unique walks over the complete graph
330
+ """
331
+ available_edges = []
332
+ for frontier_node in current_frontier:
333
+ current_edges = graph.adj[frontier_node]
334
+ current_edges = [
335
+ (target_node, edge_data)
336
+ for target_node, edge_data in current_edges.items()
337
+ if target_node not in current_walk and target_node not in current_frontier
338
+ ]
339
+ available_edges.extend(current_edges)
340
+
341
+ if not available_edges and len(current_walk) < len(graph):
342
+ jump_nodes = [node for node in graph.nodes if node not in current_frontier]
343
+ for jump_node in jump_nodes:
344
+ yield from _walk_frontier(
345
+ graph, current_walk.expand(jump_node), current_frontier | {jump_node}
346
+ )
347
+ elif not available_edges:
348
+ yield current_walk
349
+ else:
350
+ for target_node, edge_data in available_edges:
351
+ yield from _walk_frontier(
352
+ graph,
353
+ current_walk.expand(target_node, edge_data),
354
+ current_frontier | {target_node},
355
+ )
356
+
357
+
358
+ def nx_frontier_walks(graph: nx.Graph) -> Generator[GraphWalk, None, None]:
359
+ """Provides all possible frontier walks over a specific graph.
360
+
361
+ A *frontier walk* is a generalized version of a normal walk over a graph: Whereas a normal walk traverses the edges in the
362
+ graph to move from node to node in a local fashion (i.e. only based on the edges of the current node), a frontier walk
363
+ remembers all the nodes that have already been visited. This is called the *frontier* of the current walk. To find the next
364
+ node, any edge from any of the nodes in the frontier can be selected.
365
+
366
+ Notice that the frontier walk also remembers nodes that have already been visited and prevents them from being visited
367
+ again.
368
+
369
+ Our implementation augments this procedure by also allowing jumps to other partitions in the graph. This will happen if all
370
+ nodes in the current connected component have been visited, but more unexplored nodes remain.
371
+
372
+ Parameters
373
+ ----------
374
+ graph : nx.Graph
375
+ The graph to traverse
376
+
377
+ Yields
378
+ ------
379
+ Generator[GraphWalk, None, None]
380
+ All frontier walks over the graph
381
+
382
+ Notes
383
+ -----
384
+
385
+ Notice that this method already distinguishes between paths if they differ in the traversed edges, even if the sequence of
386
+ nodes is the same.
387
+
388
+ For example, consider this fully-connected graph:
389
+
390
+ ::
391
+ a
392
+ / \\
393
+ b - c
394
+
395
+
396
+ The frontier walks produced by this function will include the sequence *a* -> *b* -> *c* twice (among many other
397
+ sequences):
398
+ Once by traversing the edge *a* -> *c* to reach *c*, and once by traversing the edge *b* -> *c* to reach *c* again.
399
+ """
400
+ for node in graph.nodes:
401
+ yield from _walk_frontier(graph, GraphWalk(node), current_frontier={node})