PostBOUND 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- postbound/__init__.py +211 -0
- postbound/_base.py +6 -0
- postbound/_bench.py +1012 -0
- postbound/_core.py +1153 -0
- postbound/_hints.py +1373 -0
- postbound/_jointree.py +1079 -0
- postbound/_pipelines.py +1121 -0
- postbound/_qep.py +1986 -0
- postbound/_stages.py +876 -0
- postbound/_validation.py +734 -0
- postbound/db/__init__.py +72 -0
- postbound/db/_db.py +2348 -0
- postbound/db/_duckdb.py +785 -0
- postbound/db/mysql.py +1195 -0
- postbound/db/postgres.py +4216 -0
- postbound/experiments/__init__.py +12 -0
- postbound/experiments/analysis.py +674 -0
- postbound/experiments/benchmarking.py +54 -0
- postbound/experiments/ceb.py +877 -0
- postbound/experiments/interactive.py +105 -0
- postbound/experiments/querygen.py +334 -0
- postbound/experiments/workloads.py +980 -0
- postbound/optimizer/__init__.py +92 -0
- postbound/optimizer/__init__.pyi +73 -0
- postbound/optimizer/_cardinalities.py +369 -0
- postbound/optimizer/_joingraph.py +1150 -0
- postbound/optimizer/dynprog.py +1825 -0
- postbound/optimizer/enumeration.py +432 -0
- postbound/optimizer/native.py +539 -0
- postbound/optimizer/noopt.py +54 -0
- postbound/optimizer/presets.py +147 -0
- postbound/optimizer/randomized.py +650 -0
- postbound/optimizer/tonic.py +1479 -0
- postbound/optimizer/ues.py +1607 -0
- postbound/qal/__init__.py +343 -0
- postbound/qal/_qal.py +9678 -0
- postbound/qal/formatter.py +1089 -0
- postbound/qal/parser.py +2344 -0
- postbound/qal/relalg.py +4257 -0
- postbound/qal/transform.py +2184 -0
- postbound/shortcuts.py +70 -0
- postbound/util/__init__.py +46 -0
- postbound/util/_errors.py +33 -0
- postbound/util/collections.py +490 -0
- postbound/util/dataframe.py +71 -0
- postbound/util/dicts.py +330 -0
- postbound/util/jsonize.py +68 -0
- postbound/util/logging.py +106 -0
- postbound/util/misc.py +168 -0
- postbound/util/networkx.py +401 -0
- postbound/util/numbers.py +438 -0
- postbound/util/proc.py +107 -0
- postbound/util/stats.py +37 -0
- postbound/util/system.py +48 -0
- postbound/util/typing.py +35 -0
- postbound/vis/__init__.py +5 -0
- postbound/vis/fdl.py +69 -0
- postbound/vis/graphs.py +48 -0
- postbound/vis/optimizer.py +538 -0
- postbound/vis/plots.py +84 -0
- postbound/vis/tonic.py +70 -0
- postbound/vis/trees.py +105 -0
- postbound-0.19.0.dist-info/METADATA +355 -0
- postbound-0.19.0.dist-info/RECORD +67 -0
- postbound-0.19.0.dist-info/WHEEL +5 -0
- postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
- postbound-0.19.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
"""Provides graph-centric algorithms based on NetworkX [nx]_.
|
|
2
|
+
|
|
3
|
+
References
|
|
4
|
+
----------
|
|
5
|
+
|
|
6
|
+
.. [nx] Aric A. Hagberg, Daniel A. Schult and Pieter J. Swart, "Exploring network structure, dynamics, and function using
|
|
7
|
+
NetworkX", in Proceedings of the 7th Python in Science Conference (SciPy2008), Gäel Varoquaux, Travis Vaught, and
|
|
8
|
+
Jarrod Millman (Eds), (Pasadena, CA USA), pp. 11-15, Aug 2008
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import dataclasses
|
|
14
|
+
import random
|
|
15
|
+
import typing
|
|
16
|
+
from collections.abc import (
|
|
17
|
+
Callable,
|
|
18
|
+
Collection,
|
|
19
|
+
Generator,
|
|
20
|
+
Iterable,
|
|
21
|
+
Iterator,
|
|
22
|
+
Sequence,
|
|
23
|
+
)
|
|
24
|
+
from typing import Optional
|
|
25
|
+
|
|
26
|
+
import networkx as nx
|
|
27
|
+
|
|
28
|
+
from .collections import Queue
|
|
29
|
+
|
|
30
|
+
NodeType = typing.TypeVar("NodeType")
|
|
31
|
+
"""Generic type to model the specific nodes contained in a NetworkX graph."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def nx_sinks(graph: nx.DiGraph) -> Collection[NodeType]:
|
|
35
|
+
"""Determines all sink nodes in a directed graph.
|
|
36
|
+
|
|
37
|
+
A sink is a node with no outgoing edges.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
graph : nx.DiGraph
|
|
42
|
+
The graph to check
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
Collection[NodeType]
|
|
47
|
+
All sink nodes. Can be an empty collection.
|
|
48
|
+
"""
|
|
49
|
+
return [n for n in graph.nodes if graph.out_degree(n) == 0]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def nx_sources(graph: nx.DiGraph) -> Collection[NodeType]:
|
|
53
|
+
"""Determines all source nodes in a directed graph.
|
|
54
|
+
|
|
55
|
+
A source is a node with no incoming edges.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
graph : nx.DiGraph
|
|
60
|
+
The graph to check
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
Collection[NodeType]
|
|
65
|
+
All source nodes. Can be an empty collection.
|
|
66
|
+
"""
|
|
67
|
+
return [n for n in graph.nodes if graph.in_degree(n) == 0]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def nx_filter_nodes(
|
|
71
|
+
graph: nx.Graph, predicate: Callable[[NodeType, dict], bool]
|
|
72
|
+
) -> Collection[tuple[NodeType, dict]]:
|
|
73
|
+
return [(n, d) for n, d in graph.nodes.data() if predicate(n, d)]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def nx_merge_nodes(
|
|
77
|
+
graph: nx.Graph, nodes: Iterable[NodeType], *, target_node: NodeType
|
|
78
|
+
) -> nx.Graph:
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def nx_random_walk(
|
|
83
|
+
graph: nx.Graph, *, starting_node: Optional[NodeType] = None
|
|
84
|
+
) -> Generator[NodeType, None, None]:
|
|
85
|
+
"""A modified random walk implementation for NetworkX graphs.
|
|
86
|
+
|
|
87
|
+
A random walk starts at any of the nodes of the graph. At each iteration, a neighboring node is selected and moved to.
|
|
88
|
+
Afterwards, the iteration continues with that node.
|
|
89
|
+
|
|
90
|
+
Our implementation uses the following modifications: after each stop, the walk may jump to a node that is connected to
|
|
91
|
+
one of the visited nodes. This node does not necessarily have to be connected to the current node. Secondly, if the
|
|
92
|
+
graph contains multiple connected components, the walk will first explore one component before jumping to the next
|
|
93
|
+
one.
|
|
94
|
+
|
|
95
|
+
The walk finishes when all nodes have been explored.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
graph : nx.Graph
|
|
100
|
+
The graph to walk over
|
|
101
|
+
starting_node : Optional[NodeType], optional
|
|
102
|
+
The node where the walk starts. If unspecified, a random node is selected.
|
|
103
|
+
|
|
104
|
+
Yields
|
|
105
|
+
------
|
|
106
|
+
Generator[NodeType, None, None]
|
|
107
|
+
The nodes in the order in which they have been moved to.
|
|
108
|
+
"""
|
|
109
|
+
# TODO: could be refactored to use the GraphWalk class instead
|
|
110
|
+
shell_nodes = set()
|
|
111
|
+
visited_nodes = set()
|
|
112
|
+
|
|
113
|
+
total_n_nodes = len(graph.nodes)
|
|
114
|
+
|
|
115
|
+
current_node = (
|
|
116
|
+
random.choice(list(graph.nodes)) if starting_node is None else starting_node
|
|
117
|
+
)
|
|
118
|
+
visited_nodes.add(current_node)
|
|
119
|
+
yield current_node
|
|
120
|
+
|
|
121
|
+
while len(visited_nodes) < total_n_nodes:
|
|
122
|
+
shell_nodes |= set(
|
|
123
|
+
n for n in graph.adj[current_node].keys() if n not in visited_nodes
|
|
124
|
+
)
|
|
125
|
+
if not shell_nodes:
|
|
126
|
+
# we have multiple connected components and need to jump into the other component
|
|
127
|
+
current_node = random.choice(
|
|
128
|
+
[n for n in graph.nodes if n not in visited_nodes]
|
|
129
|
+
)
|
|
130
|
+
visited_nodes.add(current_node)
|
|
131
|
+
yield current_node
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
current_node = random.choice(list(shell_nodes))
|
|
135
|
+
shell_nodes.remove(current_node)
|
|
136
|
+
visited_nodes.add(current_node)
|
|
137
|
+
yield current_node
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def nx_bfs_tree(
|
|
141
|
+
graph: nx.Graph,
|
|
142
|
+
start_node: NodeType,
|
|
143
|
+
condition: Callable[[NodeType, dict], bool],
|
|
144
|
+
*,
|
|
145
|
+
node_order: Callable[[NodeType, dict], int] | None = None,
|
|
146
|
+
) -> Generator[tuple[NodeType, dict], None, None]:
|
|
147
|
+
"""Traverses a specific graph in breadth-first manner, yielding its nodes along the way.
|
|
148
|
+
|
|
149
|
+
The traversal starts at a specific start node. During the traversal all nodes that match a condition are provided. If no
|
|
150
|
+
more nodes are found or the condition cannot be satisfied for any more nodes, traversal terminates. Notice that there is
|
|
151
|
+
no "early stopping": if a parent node fails the condition check, its children are still explored.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
graph : nx.Graph
|
|
156
|
+
The graph to explore
|
|
157
|
+
start_node : NodeType
|
|
158
|
+
The node where the exploration starts. This node will never be yielded.
|
|
159
|
+
condition : Callable[[NodeType, dict], bool]
|
|
160
|
+
A condition that is satisfied by all nodes that should be yielded
|
|
161
|
+
node_order : Callable[[NodeType, dict], int] | None, optional
|
|
162
|
+
The sequence in which child nodes should be explored. This function receives the child node as well as the edge from
|
|
163
|
+
its parent as arguments and produces a numerical position value as output (lower values indicate earlier yielding).
|
|
164
|
+
If unspecified, this produces the nodes in an arbitrary order.
|
|
165
|
+
|
|
166
|
+
Yields
|
|
167
|
+
------
|
|
168
|
+
Generator[tuple[NodeType, dict], None, None]
|
|
169
|
+
The node along with their edge data from the parent.
|
|
170
|
+
|
|
171
|
+
See Also
|
|
172
|
+
--------
|
|
173
|
+
|
|
174
|
+
.. NetworkX documentation on usage and definition of edge data:
|
|
175
|
+
https://networkx.org/documentation/stable/reference/introduction.html#nodes-and-edges
|
|
176
|
+
"""
|
|
177
|
+
shell_nodes = Queue([(node, edge) for node, edge in graph.adj[start_node].items()])
|
|
178
|
+
visited_nodes = {start_node}
|
|
179
|
+
while shell_nodes:
|
|
180
|
+
current_node, current_edge = shell_nodes.pop()
|
|
181
|
+
visited_nodes.add(current_node)
|
|
182
|
+
if condition(current_node, current_edge):
|
|
183
|
+
neighbor_nodes = [
|
|
184
|
+
(node, edge)
|
|
185
|
+
for node, edge in graph.adj[current_node].items()
|
|
186
|
+
if node not in visited_nodes
|
|
187
|
+
]
|
|
188
|
+
if node_order:
|
|
189
|
+
sorted(
|
|
190
|
+
neighbor_nodes,
|
|
191
|
+
key=lambda neighbor: node_order(neighbor[0], neighbor[1]),
|
|
192
|
+
)
|
|
193
|
+
shell_nodes.extend(neighbor_nodes)
|
|
194
|
+
yield current_node, current_edge
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@dataclasses.dataclass
|
|
198
|
+
class GraphWalk:
|
|
199
|
+
"""A graph walk models a traversal of some graph.
|
|
200
|
+
|
|
201
|
+
Each walk begins at a specific *start node* and then follows a *path* along other nodes and edges.
|
|
202
|
+
|
|
203
|
+
Notice that depending on the specific use-case the path might deviate from a normal walk. More specifically, two
|
|
204
|
+
special cases might occur:
|
|
205
|
+
|
|
206
|
+
1. the edge data can be ``None``. This indicates that the walk jumped to a different node without using an edge. For
|
|
207
|
+
example, this might happen if the walk moved to a different connected component of the graph.
|
|
208
|
+
2. the next node in the walk might not be connected to the current node in the path, but to some node that has
|
|
209
|
+
already been visited instead. This is especially the case for so-called *frontier walks* which can be computed
|
|
210
|
+
using the `nx_frontier_walks` method.
|
|
211
|
+
|
|
212
|
+
The walk can be iterated over by its nodes and nodes can be checked for containment in the walk. Length calculation is
|
|
213
|
+
also supported.
|
|
214
|
+
|
|
215
|
+
Attributes
|
|
216
|
+
----------
|
|
217
|
+
start_node : NodeType
|
|
218
|
+
The origin of the traversal
|
|
219
|
+
path : Sequence[tuple[NodeType, Optional[dict]]]
|
|
220
|
+
The nodes that have been visited during the traversal, in the order in which they were explored. The dictionary stores
|
|
221
|
+
the NetworkX edge data of the edge that has been used to move to the node. This may be ``None`` if the node was
|
|
222
|
+
"jumped to".
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
start_node: NodeType
|
|
226
|
+
path: Sequence[tuple[NodeType, Optional[dict]]] = dataclasses.field(
|
|
227
|
+
default_factory=list
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
def nodes(self) -> Sequence[NodeType]:
|
|
231
|
+
"""Provides all nodes that are visited by this walk, in the sequence in which they are visited.
|
|
232
|
+
|
|
233
|
+
Returns
|
|
234
|
+
-------
|
|
235
|
+
Sequence[NodeType]
|
|
236
|
+
The nodes
|
|
237
|
+
"""
|
|
238
|
+
return [self.start_node] + [node[0] for node in self.path]
|
|
239
|
+
|
|
240
|
+
def final_node(self) -> NodeType:
|
|
241
|
+
"""Provides the very last node that was visited by this walk.
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
NodeType
|
|
246
|
+
The last node. This can be the `start_node` if the path is empty.
|
|
247
|
+
"""
|
|
248
|
+
return self.start_node if not self.path else self.path[-1][0]
|
|
249
|
+
|
|
250
|
+
def expand(
|
|
251
|
+
self, next_node: NodeType, edge_data: Optional[dict] = None
|
|
252
|
+
) -> GraphWalk:
|
|
253
|
+
"""Creates a new walk by prolonging the current one with one more edge at the end.
|
|
254
|
+
|
|
255
|
+
Parameters
|
|
256
|
+
----------
|
|
257
|
+
next_node : NodeType
|
|
258
|
+
The node to move to from the final node of the current graph.
|
|
259
|
+
edge_data : Optional[dict], optional
|
|
260
|
+
The NetworkX edge data for the traversal. Can be ``None`` if the new node is being jumped to.
|
|
261
|
+
|
|
262
|
+
Returns
|
|
263
|
+
-------
|
|
264
|
+
GraphWalk
|
|
265
|
+
The resulting larger walk. The original walk is not modified in any way.
|
|
266
|
+
"""
|
|
267
|
+
return GraphWalk(self.start_node, list(self.path) + [(next_node, edge_data)])
|
|
268
|
+
|
|
269
|
+
def nodes_hash(self) -> int:
|
|
270
|
+
"""Provides a hash value only based on the nodes sequence, not the selected predicates.
|
|
271
|
+
|
|
272
|
+
Returns
|
|
273
|
+
-------
|
|
274
|
+
int
|
|
275
|
+
The computed hash value
|
|
276
|
+
"""
|
|
277
|
+
return hash(tuple(self.nodes()))
|
|
278
|
+
|
|
279
|
+
def __len__(self) -> int:
|
|
280
|
+
return 1 + len(self.path)
|
|
281
|
+
|
|
282
|
+
def __iter__(self) -> Iterator[NodeType]:
|
|
283
|
+
return self.nodes().__iter__()
|
|
284
|
+
|
|
285
|
+
def __contains__(self, other: object) -> bool:
|
|
286
|
+
return other in self.nodes()
|
|
287
|
+
|
|
288
|
+
def __hash__(self) -> int:
|
|
289
|
+
return hash((self.start_node, tuple(self.path)))
|
|
290
|
+
|
|
291
|
+
def __eq__(self, other: object) -> bool:
|
|
292
|
+
return (
|
|
293
|
+
isinstance(other, type(self))
|
|
294
|
+
and self.start_node == other.start_node
|
|
295
|
+
and self.path == other.path
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
def __repr__(self) -> str:
|
|
299
|
+
return str(self)
|
|
300
|
+
|
|
301
|
+
def __str__(self) -> str:
|
|
302
|
+
return " -> ".join(str(node) for node in self.nodes())
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _walk_frontier(
|
|
306
|
+
graph: nx.Graph, current_walk: GraphWalk, current_frontier: set[NodeType]
|
|
307
|
+
) -> Generator[GraphWalk, None, None]:
|
|
308
|
+
"""Worker method to recursively expand graph traversals to candidate nodes.
|
|
309
|
+
|
|
310
|
+
This method expands a specific walk by considering all possible traversals to candidate/frontier nodes. Jumps are included
|
|
311
|
+
if the graph contains multiple connected components. The frontier is composed of all nodes that are adjacent to one of the
|
|
312
|
+
nodes that has already been visited.
|
|
313
|
+
|
|
314
|
+
Only paths of unique nodes are considered - if a node has already been visited, it will not be visited again.
|
|
315
|
+
|
|
316
|
+
Parameters
|
|
317
|
+
----------
|
|
318
|
+
graph : nx.Graph
|
|
319
|
+
The graph to traverse
|
|
320
|
+
current_walk : GraphWalk
|
|
321
|
+
The path that was already selected
|
|
322
|
+
current_frontier : set[NodeType]
|
|
323
|
+
All nodes that can be moved to next. This datastructure is mutated after each traversal to include the new (yet
|
|
324
|
+
unexplored) nodes that are adjacent to the selected next node.
|
|
325
|
+
|
|
326
|
+
Yields
|
|
327
|
+
------
|
|
328
|
+
Generator[GraphWalk, None, None]
|
|
329
|
+
All unique walks over the complete graph
|
|
330
|
+
"""
|
|
331
|
+
available_edges = []
|
|
332
|
+
for frontier_node in current_frontier:
|
|
333
|
+
current_edges = graph.adj[frontier_node]
|
|
334
|
+
current_edges = [
|
|
335
|
+
(target_node, edge_data)
|
|
336
|
+
for target_node, edge_data in current_edges.items()
|
|
337
|
+
if target_node not in current_walk and target_node not in current_frontier
|
|
338
|
+
]
|
|
339
|
+
available_edges.extend(current_edges)
|
|
340
|
+
|
|
341
|
+
if not available_edges and len(current_walk) < len(graph):
|
|
342
|
+
jump_nodes = [node for node in graph.nodes if node not in current_frontier]
|
|
343
|
+
for jump_node in jump_nodes:
|
|
344
|
+
yield from _walk_frontier(
|
|
345
|
+
graph, current_walk.expand(jump_node), current_frontier | {jump_node}
|
|
346
|
+
)
|
|
347
|
+
elif not available_edges:
|
|
348
|
+
yield current_walk
|
|
349
|
+
else:
|
|
350
|
+
for target_node, edge_data in available_edges:
|
|
351
|
+
yield from _walk_frontier(
|
|
352
|
+
graph,
|
|
353
|
+
current_walk.expand(target_node, edge_data),
|
|
354
|
+
current_frontier | {target_node},
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def nx_frontier_walks(graph: nx.Graph) -> Generator[GraphWalk, None, None]:
|
|
359
|
+
"""Provides all possible frontier walks over a specific graph.
|
|
360
|
+
|
|
361
|
+
A *frontier walk* is a generalized version of a normal walk over a graph: Whereas a normal walk traverses the edges in the
|
|
362
|
+
graph to move from node to node in a local fashion (i.e. only based on the edges of the current node), a frontier walk
|
|
363
|
+
remembers all the nodes that have already been visited. This is called the *frontier* of the current walk. To find the next
|
|
364
|
+
node, any edge from any of the nodes in the frontier can be selected.
|
|
365
|
+
|
|
366
|
+
Notice that the frontier walk also remembers nodes that have already been visited and prevents them from being visited
|
|
367
|
+
again.
|
|
368
|
+
|
|
369
|
+
Our implementation augments this procedure by also allowing jumps to other partitions in the graph. This will happen if all
|
|
370
|
+
nodes in the current connected component have been visited, but more unexplored nodes remain.
|
|
371
|
+
|
|
372
|
+
Parameters
|
|
373
|
+
----------
|
|
374
|
+
graph : nx.Graph
|
|
375
|
+
The graph to traverse
|
|
376
|
+
|
|
377
|
+
Yields
|
|
378
|
+
------
|
|
379
|
+
Generator[GraphWalk, None, None]
|
|
380
|
+
All frontier walks over the graph
|
|
381
|
+
|
|
382
|
+
Notes
|
|
383
|
+
-----
|
|
384
|
+
|
|
385
|
+
Notice that this method already distinguishes between paths if they differ in the traversed edges, even if the sequence of
|
|
386
|
+
nodes is the same.
|
|
387
|
+
|
|
388
|
+
For example, consider this fully-connected graph:
|
|
389
|
+
|
|
390
|
+
::
|
|
391
|
+
a
|
|
392
|
+
/ \\
|
|
393
|
+
b - c
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
The frontier walks produced by this function will include the sequence *a* -> *b* -> *c* twice (among many other
|
|
397
|
+
sequences):
|
|
398
|
+
Once by traversing the edge *a* -> *c* to reach *c*, and once by traversing the edge *b* -> *c* to reach *c* again.
|
|
399
|
+
"""
|
|
400
|
+
for node in graph.nodes:
|
|
401
|
+
yield from _walk_frontier(graph, GraphWalk(node), current_frontier={node})
|