cbrkit 0.26.1__tar.gz → 0.26.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cbrkit-0.26.1 → cbrkit-0.26.2}/PKG-INFO +1 -1
- {cbrkit-0.26.1 → cbrkit-0.26.2}/pyproject.toml +1 -1
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/__init__.py +3 -1
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/astar.py +0 -4
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/common.py +40 -51
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/dfs.py +4 -6
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/greedy.py +0 -6
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/lap.py +67 -14
- cbrkit-0.26.2/src/cbrkit/sim/graphs/precompute.py +80 -0
- cbrkit-0.26.2/src/cbrkit/sim/graphs/qap.py +145 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/vf2.py +143 -35
- cbrkit-0.26.1/src/cbrkit/sim/graphs/precompute.py +0 -56
- cbrkit-0.26.1/src/cbrkit/sim/graphs/qap.py +0 -118
- {cbrkit-0.26.1 → cbrkit-0.26.2}/README.md +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/__init__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/__main__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/__init__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/attribute_value.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/generic.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/numbers.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/strings.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/api.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/cli.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/constants.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/cycle.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/dumpers.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/eval/__init__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/eval/common.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/eval/retrieval.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/helpers.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/loaders.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/model/__init__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/model/graph.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/model/result.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/py.typed +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/retrieval/__init__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/retrieval/apply.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/retrieval/build.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/retrieval/rerank.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/reuse/__init__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/reuse/apply.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/reuse/build.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/__init__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/aggregator.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/attribute_value.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/collections.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/embed.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/generic.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/alignment.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/brute_force.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/numbers.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/strings.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/taxonomy.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/wrappers.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/__init__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/apply.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/build.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/model.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/prompts.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/__init__.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/anthropic.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/cohere.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/google.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/instructor.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/model.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/ollama.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/openai.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/wrappers.py +0 -0
- {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/typing.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: cbrkit
|
|
3
|
-
Version: 0.26.
|
|
3
|
+
Version: 0.26.2
|
|
4
4
|
Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
|
|
5
5
|
Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
|
|
6
6
|
Author: Mirko Lenz
|
|
@@ -15,7 +15,7 @@ from .common import (
|
|
|
15
15
|
from .greedy import greedy
|
|
16
16
|
from .lap import lap
|
|
17
17
|
from .precompute import precompute
|
|
18
|
-
from .vf2 import vf2
|
|
18
|
+
from .vf2 import vf2, vf2_networkx, vf2_rustworkx
|
|
19
19
|
|
|
20
20
|
with optional_dependencies():
|
|
21
21
|
from .alignment import dtw
|
|
@@ -34,6 +34,8 @@ __all__ = [
|
|
|
34
34
|
"lap",
|
|
35
35
|
"precompute",
|
|
36
36
|
"vf2",
|
|
37
|
+
"vf2_networkx",
|
|
38
|
+
"vf2_rustworkx",
|
|
37
39
|
"dtw",
|
|
38
40
|
"smith_waterman",
|
|
39
41
|
"init_empty",
|
|
@@ -363,10 +363,6 @@ class build[K, N, E, G](
|
|
|
363
363
|
x: Graph[K, N, E, G],
|
|
364
364
|
y: Graph[K, N, E, G],
|
|
365
365
|
) -> GraphSim[K]:
|
|
366
|
-
# if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
|
|
367
|
-
# self_inv = dataclasses.replace(self, _invert=True)
|
|
368
|
-
# return self.invert_similarity(x, y, self_inv(x=y, y=x))
|
|
369
|
-
|
|
370
366
|
node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
|
|
371
367
|
|
|
372
368
|
open_set: list[PriorityState[K]] = []
|
|
@@ -8,12 +8,11 @@ from frozendict import frozendict
|
|
|
8
8
|
|
|
9
9
|
from ...helpers import (
|
|
10
10
|
batchify_sim,
|
|
11
|
-
reverse_batch_positional,
|
|
12
|
-
reverse_positional,
|
|
13
11
|
total_params,
|
|
14
12
|
unpack_float,
|
|
13
|
+
unpack_floats,
|
|
15
14
|
)
|
|
16
|
-
from ...model.graph import
|
|
15
|
+
from ...model.graph import Graph, Node
|
|
17
16
|
from ...typing import AnySimFunc, BatchSimFunc, Float, SimFunc, StructuredValue
|
|
18
17
|
from ..wrappers import transpose_value
|
|
19
18
|
|
|
@@ -38,26 +37,38 @@ def default_element_matcher(x: Any, y: Any) -> bool:
|
|
|
38
37
|
|
|
39
38
|
@dataclass(slots=True, frozen=True)
|
|
40
39
|
class SemanticEdgeSim[K, N, E]:
|
|
41
|
-
source_weight: float = 0
|
|
42
|
-
target_weight: float = 0
|
|
40
|
+
source_weight: float = 1.0
|
|
41
|
+
target_weight: float = 1.0
|
|
42
|
+
edge_sim_func: AnySimFunc[E, Float] | None = None
|
|
43
43
|
|
|
44
44
|
def __call__(
|
|
45
45
|
self,
|
|
46
|
-
batches: Sequence[tuple[
|
|
46
|
+
batches: Sequence[tuple[E, E, float, float]],
|
|
47
47
|
) -> list[float]:
|
|
48
|
-
source_sims = (
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
48
|
+
source_sims = (source_sim for _, _, source_sim, _ in batches)
|
|
49
|
+
target_sims = (target_sim for _, _, _, target_sim in batches)
|
|
50
|
+
|
|
51
|
+
if self.edge_sim_func is not None:
|
|
52
|
+
edge_sim_func = batchify_sim(self.edge_sim_func)
|
|
53
|
+
edge_sims = unpack_floats(
|
|
54
|
+
edge_sim_func(
|
|
55
|
+
[(x, y) for x, y, _, _ in batches],
|
|
56
|
+
)
|
|
57
|
+
)
|
|
58
|
+
else:
|
|
59
|
+
edge_sims = [1.0] * len(batches)
|
|
60
|
+
|
|
61
|
+
scaling_factor = self.source_weight + self.target_weight
|
|
62
|
+
|
|
63
|
+
if scaling_factor == 0:
|
|
64
|
+
return edge_sims
|
|
56
65
|
|
|
57
66
|
return [
|
|
58
|
-
(
|
|
59
|
-
|
|
60
|
-
for source, target in zip(
|
|
67
|
+
(edge * source * self.source_weight / scaling_factor)
|
|
68
|
+
+ (edge * target * self.target_weight / scaling_factor)
|
|
69
|
+
for source, target, edge in zip(
|
|
70
|
+
source_sims, target_sims, edge_sims, strict=True
|
|
71
|
+
)
|
|
61
72
|
]
|
|
62
73
|
|
|
63
74
|
|
|
@@ -82,37 +93,14 @@ def _induced_edge_mapping[K, N, E, G](
|
|
|
82
93
|
@dataclass(slots=True)
|
|
83
94
|
class BaseGraphSimFunc[K, N, E, G]:
|
|
84
95
|
node_sim_func: AnySimFunc[N, Float]
|
|
85
|
-
edge_sim_func:
|
|
86
|
-
default_edge_sim
|
|
87
|
-
)
|
|
96
|
+
edge_sim_func: SemanticEdgeSim[K, N, E] = default_edge_sim
|
|
88
97
|
node_matcher: ElementMatcher[N] = default_element_matcher
|
|
89
98
|
edge_matcher: ElementMatcher[E] = default_element_matcher
|
|
90
99
|
batch_node_sim_func: BatchSimFunc[Node[K, N], Float] = field(init=False)
|
|
91
|
-
batch_edge_sim_func: (
|
|
92
|
-
BatchSimFunc[Edge[K, N, E], Float] | SemanticEdgeSim[K, N, E]
|
|
93
|
-
) = field(init=False)
|
|
94
|
-
_invert: bool = False
|
|
95
100
|
|
|
96
101
|
def __post_init__(self) -> None:
|
|
97
102
|
self.batch_node_sim_func = batchify_sim(transpose_value(self.node_sim_func))
|
|
98
103
|
|
|
99
|
-
if isinstance(self.edge_sim_func, SemanticEdgeSim):
|
|
100
|
-
self.batch_edge_sim_func = self.edge_sim_func
|
|
101
|
-
else:
|
|
102
|
-
self.batch_edge_sim_func = batchify_sim(self.edge_sim_func)
|
|
103
|
-
|
|
104
|
-
if self._invert:
|
|
105
|
-
self.node_matcher = reverse_positional(self.node_matcher)
|
|
106
|
-
self.edge_matcher = reverse_positional(self.edge_matcher)
|
|
107
|
-
self.batch_node_sim_func = reverse_batch_positional(
|
|
108
|
-
self.batch_node_sim_func
|
|
109
|
-
)
|
|
110
|
-
if not isinstance(self.batch_edge_sim_func, SemanticEdgeSim):
|
|
111
|
-
# semantic edge sim is agnostic to order
|
|
112
|
-
self.batch_edge_sim_func = reverse_batch_positional(
|
|
113
|
-
self.batch_edge_sim_func
|
|
114
|
-
)
|
|
115
|
-
|
|
116
104
|
def induced_edge_mapping(
|
|
117
105
|
self,
|
|
118
106
|
x: Graph[K, N, E, G],
|
|
@@ -163,16 +151,17 @@ class BaseGraphSimFunc[K, N, E, G]:
|
|
|
163
151
|
]
|
|
164
152
|
|
|
165
153
|
edge_pair_values = [(x.edges[x_key], y.edges[y_key]) for y_key, x_key in pairs]
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
154
|
+
edge_pair_sims = self.edge_sim_func(
|
|
155
|
+
[
|
|
156
|
+
(
|
|
157
|
+
x_edge.value,
|
|
158
|
+
y_edge.value,
|
|
159
|
+
node_pair_sims[(y_edge.source.key, x_edge.source.key)],
|
|
160
|
+
node_pair_sims[(y_edge.target.key, x_edge.target.key)],
|
|
161
|
+
)
|
|
162
|
+
for x_edge, y_edge in edge_pair_values
|
|
163
|
+
]
|
|
164
|
+
)
|
|
176
165
|
|
|
177
166
|
return {
|
|
178
167
|
(y_edge.key, x_edge.key): unpack_float(sim)
|
|
@@ -11,8 +11,6 @@ from .common import BaseGraphSimFunc, GraphSim
|
|
|
11
11
|
|
|
12
12
|
logger = get_logger(__name__)
|
|
13
13
|
|
|
14
|
-
__all__ = ["dfs"]
|
|
15
|
-
|
|
16
14
|
|
|
17
15
|
class RootsFunc[K, N, E, G](Protocol):
|
|
18
16
|
"""Support for matching rooted graphs
|
|
@@ -37,10 +35,10 @@ with optional_dependencies():
|
|
|
37
35
|
class dfs[K, N, E, G](
|
|
38
36
|
BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
|
|
39
37
|
):
|
|
40
|
-
node_del_cost: float =
|
|
41
|
-
node_ins_cost: float =
|
|
42
|
-
edge_del_cost: float =
|
|
43
|
-
edge_ins_cost: float =
|
|
38
|
+
node_del_cost: float = 1.0
|
|
39
|
+
node_ins_cost: float = 1.0
|
|
40
|
+
edge_del_cost: float = 1.0
|
|
41
|
+
edge_ins_cost: float = 1.0
|
|
44
42
|
max_iterations: int = 0
|
|
45
43
|
upper_bound: float | None = None
|
|
46
44
|
strictly_decreasing: bool = True
|
|
@@ -11,8 +11,6 @@ from .common import GraphSim, SearchGraphSimFunc, SearchState
|
|
|
11
11
|
|
|
12
12
|
logger = get_logger(__name__)
|
|
13
13
|
|
|
14
|
-
__all__ = ["greedy"]
|
|
15
|
-
|
|
16
14
|
|
|
17
15
|
@dataclass(slots=True)
|
|
18
16
|
class greedy[K, N, E, G](
|
|
@@ -35,10 +33,6 @@ class greedy[K, N, E, G](
|
|
|
35
33
|
x: Graph[K, N, E, G],
|
|
36
34
|
y: Graph[K, N, E, G],
|
|
37
35
|
) -> GraphSim[K]:
|
|
38
|
-
# if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
|
|
39
|
-
# self_inv = dataclasses.replace(self, _invert=True)
|
|
40
|
-
# return self.invert_similarity(x, y, self_inv(x=y, y=x))
|
|
41
|
-
|
|
42
36
|
node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
|
|
43
37
|
|
|
44
38
|
current_state = self.init_search_state(x, y)
|
|
@@ -14,18 +14,17 @@ from .common import BaseGraphSimFunc, GraphSim, PairSim
|
|
|
14
14
|
|
|
15
15
|
logger = get_logger(__name__)
|
|
16
16
|
|
|
17
|
-
__all__ = ["lap"]
|
|
18
|
-
|
|
19
17
|
|
|
20
18
|
# https://jack.valmadre.net/notes/2020/12/08/non-perfect-linear-assignment/
|
|
21
19
|
@dataclass(slots=True)
|
|
22
20
|
class lap[K, N, E, G](
|
|
23
21
|
BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
|
|
24
22
|
):
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
23
|
+
greedy: bool = True
|
|
24
|
+
node_del_cost: float = 1.0
|
|
25
|
+
node_ins_cost: float = 1.0
|
|
26
|
+
edge_del_cost: float = 1.0
|
|
27
|
+
edge_ins_cost: float = 1.0
|
|
29
28
|
# 1.0 gives an upper bound, 0.5 gives a lower bound
|
|
30
29
|
# approximation is better with a lower bound
|
|
31
30
|
# since we compute real edit costs at the end anyway,
|
|
@@ -38,7 +37,7 @@ class lap[K, N, E, G](
|
|
|
38
37
|
e.key for e in g.edges.values() if n == e.source.key or n == e.target.key
|
|
39
38
|
}
|
|
40
39
|
|
|
41
|
-
def
|
|
40
|
+
def edge_sub_cost_greedy(
|
|
42
41
|
self,
|
|
43
42
|
x: Graph[K, N, E, G],
|
|
44
43
|
y: Graph[K, N, E, G],
|
|
@@ -46,9 +45,60 @@ class lap[K, N, E, G](
|
|
|
46
45
|
y_node: K,
|
|
47
46
|
edge_pair_sims: PairSim[K],
|
|
48
47
|
) -> float:
|
|
48
|
+
"""BranchFast algorithm without solving an inner LAP problem.
|
|
49
|
+
|
|
50
|
+
- Substitutions are taken greedily in descending similarity order
|
|
51
|
+
- Unmatched y‑edges are deletions, unmatched x‑edges are insertions.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
y_edges = list(self.connected_edges(y, y_node))
|
|
55
|
+
x_edges = list(self.connected_edges(x, x_node))
|
|
56
|
+
|
|
57
|
+
# trivial fast‑path
|
|
58
|
+
if not y_edges and not x_edges:
|
|
59
|
+
return 0.0
|
|
60
|
+
|
|
61
|
+
# candidate substitutions: (cost, y_key, x_key)
|
|
62
|
+
candidates: list[tuple[float, K, K]] = [
|
|
63
|
+
(1.0 - edge_pair_sims[(y_key, x_key)], y_key, x_key)
|
|
64
|
+
for y_key, x_key in itertools.product(y_edges, x_edges)
|
|
65
|
+
if (y_key, x_key) in edge_pair_sims
|
|
66
|
+
]
|
|
67
|
+
# sort by cheapest cost ==> highest similarity first
|
|
68
|
+
candidates.sort(key=lambda t: t[0])
|
|
69
|
+
|
|
70
|
+
matched_y: set[K] = set()
|
|
71
|
+
matched_x: set[K] = set()
|
|
72
|
+
cost = 0.0
|
|
73
|
+
|
|
74
|
+
for c, y_key, x_key in candidates:
|
|
75
|
+
if y_key not in matched_y and x_key not in matched_x:
|
|
76
|
+
matched_y.add(y_key)
|
|
77
|
+
matched_x.add(x_key)
|
|
78
|
+
cost += c # substitution cost
|
|
79
|
+
|
|
80
|
+
# remaining deletions / insertions
|
|
81
|
+
cost += (len(y_edges) - len(matched_y)) * self.edge_del_cost
|
|
82
|
+
cost += (len(x_edges) - len(matched_x)) * self.edge_ins_cost
|
|
83
|
+
|
|
84
|
+
return cost
|
|
85
|
+
|
|
86
|
+
def edge_sub_cost_optimal(
|
|
87
|
+
self,
|
|
88
|
+
x: Graph[K, N, E, G],
|
|
89
|
+
y: Graph[K, N, E, G],
|
|
90
|
+
x_node: K,
|
|
91
|
+
y_node: K,
|
|
92
|
+
edge_pair_sims: PairSim[K],
|
|
93
|
+
) -> float:
|
|
94
|
+
"""Branch algorithm solving an inner LAP problem."""
|
|
95
|
+
|
|
49
96
|
y_edges = self.connected_edges(y, y_node)
|
|
50
97
|
x_edges = self.connected_edges(x, x_node)
|
|
51
98
|
|
|
99
|
+
if not y_edges and not x_edges:
|
|
100
|
+
return 0.0
|
|
101
|
+
|
|
52
102
|
rows = len(y_edges)
|
|
53
103
|
cols = len(x_edges)
|
|
54
104
|
dim = rows + cols
|
|
@@ -126,13 +176,16 @@ class lap[K, N, E, G](
|
|
|
126
176
|
and (sim := node_pair_sims.get((y_key, x_key)))
|
|
127
177
|
):
|
|
128
178
|
node_sub_cost = 1.0 - sim
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
179
|
+
|
|
180
|
+
if self.greedy:
|
|
181
|
+
edge_sub_cost = self.edge_sub_cost_greedy(
|
|
182
|
+
x, y, x_key, y_key, edge_pair_sims
|
|
183
|
+
)
|
|
184
|
+
else:
|
|
185
|
+
edge_sub_cost = self.edge_sub_cost_optimal(
|
|
186
|
+
x, y, x_key, y_key, edge_pair_sims
|
|
187
|
+
)
|
|
188
|
+
|
|
136
189
|
cost[r, c] = node_sub_cost + (self.edge_edit_factor * edge_sub_cost)
|
|
137
190
|
|
|
138
191
|
if self.print_matrix:
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
from collections.abc import Sequence
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from cbrkit.helpers import chain_map_chunks, unpack_float
|
|
6
|
+
|
|
7
|
+
from ...model.graph import (
|
|
8
|
+
Graph,
|
|
9
|
+
Node,
|
|
10
|
+
)
|
|
11
|
+
from ...typing import BatchSimFunc
|
|
12
|
+
from .common import BaseGraphSimFunc
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(slots=True)
|
|
16
|
+
class precompute[K, N, E, G](
|
|
17
|
+
BaseGraphSimFunc[K, N, E, G], BatchSimFunc[Graph[K, N, E, G], float]
|
|
18
|
+
):
|
|
19
|
+
precompute_nodes: bool = True
|
|
20
|
+
precompute_edges: bool = True
|
|
21
|
+
|
|
22
|
+
def __call__(
|
|
23
|
+
self, batches: Sequence[tuple[Graph[K, N, E, G], Graph[K, N, E, G]]]
|
|
24
|
+
) -> list[float]:
|
|
25
|
+
precompute_edges = (
|
|
26
|
+
self.precompute_edges and self.edge_sim_func.edge_sim_func is not None
|
|
27
|
+
)
|
|
28
|
+
batch_node_pair_sims: list[dict[tuple[K, K], float]] = []
|
|
29
|
+
|
|
30
|
+
if self.precompute_nodes or precompute_edges:
|
|
31
|
+
batch_node_pairs: list[list[tuple[Node[K, N], Node[K, N]]]] = [
|
|
32
|
+
[
|
|
33
|
+
(x_node, y_node)
|
|
34
|
+
for x_node, y_node in itertools.product(
|
|
35
|
+
x.nodes.values(), y.nodes.values()
|
|
36
|
+
)
|
|
37
|
+
if self.node_matcher(x_node.value, y_node.value)
|
|
38
|
+
]
|
|
39
|
+
for x, y in batches
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
batch_node_pair_sims_list = chain_map_chunks(
|
|
43
|
+
batch_node_pairs, self.batch_node_sim_func
|
|
44
|
+
)
|
|
45
|
+
batch_node_pair_sims = [
|
|
46
|
+
{
|
|
47
|
+
(y_node.key, x_node.key): unpack_float(sim)
|
|
48
|
+
for (x_node, y_node), sim in zip(
|
|
49
|
+
node_pair_values, node_pair_sims, strict=True
|
|
50
|
+
)
|
|
51
|
+
}
|
|
52
|
+
for node_pair_values, node_pair_sims in zip(
|
|
53
|
+
batch_node_pairs, batch_node_pair_sims_list, strict=True
|
|
54
|
+
)
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
if precompute_edges:
|
|
58
|
+
edge_pairs: list[tuple[E, E, float, float]] = []
|
|
59
|
+
|
|
60
|
+
for (x, y), node_pair_sims in zip(
|
|
61
|
+
batches, batch_node_pair_sims, strict=True
|
|
62
|
+
):
|
|
63
|
+
edge_pairs.extend(
|
|
64
|
+
(
|
|
65
|
+
x_edge.value,
|
|
66
|
+
y_edge.value,
|
|
67
|
+
node_pair_sims[(y_edge.source.key, x_edge.source.key)],
|
|
68
|
+
node_pair_sims[(y_edge.target.key, x_edge.target.key)],
|
|
69
|
+
)
|
|
70
|
+
for x_edge, y_edge in itertools.product(
|
|
71
|
+
x.edges.values(), y.edges.values()
|
|
72
|
+
)
|
|
73
|
+
if self.edge_matcher(x_edge.value, y_edge.value)
|
|
74
|
+
and (y_edge.source.key, x_edge.source.key) in node_pair_sims
|
|
75
|
+
and (y_edge.target.key, x_edge.target.key) in node_pair_sims
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
self.edge_sim_func(edge_pairs)
|
|
79
|
+
|
|
80
|
+
return [1.0] * len(batches)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from frozendict import frozendict
|
|
6
|
+
from scipy.optimize import quadratic_assignment
|
|
7
|
+
|
|
8
|
+
from ...helpers import get_logger
|
|
9
|
+
from ...model.graph import Graph
|
|
10
|
+
from ...typing import SimFunc
|
|
11
|
+
from .common import BaseGraphSimFunc, GraphSim
|
|
12
|
+
|
|
13
|
+
logger = get_logger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# https://jack.valmadre.net/notes/2020/12/08/non-perfect-linear-assignment/
|
|
17
|
+
@dataclass(slots=True)
|
|
18
|
+
class qap[K, N, E, G](
|
|
19
|
+
BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
|
|
20
|
+
):
|
|
21
|
+
"""Quadratic Assignment Problem (QAP) solver for graph similarity"""
|
|
22
|
+
|
|
23
|
+
node_del_cost: float = 1.0
|
|
24
|
+
node_ins_cost: float = 1.0
|
|
25
|
+
edge_del_cost: float = 1.0
|
|
26
|
+
edge_ins_cost: float = 1.0
|
|
27
|
+
illegal_cost: float = 1e9
|
|
28
|
+
|
|
29
|
+
def __call__(
|
|
30
|
+
self,
|
|
31
|
+
x: Graph[K, N, E, G],
|
|
32
|
+
y: Graph[K, N, E, G],
|
|
33
|
+
) -> GraphSim[K]:
|
|
34
|
+
node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
|
|
35
|
+
|
|
36
|
+
n = len(y.nodes)
|
|
37
|
+
m = len(x.nodes)
|
|
38
|
+
dim = n + m
|
|
39
|
+
a = np.zeros((dim, dim), dtype=float)
|
|
40
|
+
b = np.full((dim, dim), self.illegal_cost, dtype=float)
|
|
41
|
+
|
|
42
|
+
y2idx = {k: i for i, k in enumerate(y.nodes)}
|
|
43
|
+
x2idx = {k: i for i, k in enumerate(x.nodes)}
|
|
44
|
+
idx2y = {i: k for k, i in y2idx.items()}
|
|
45
|
+
idx2x = {i: k for k, i in x2idx.items()}
|
|
46
|
+
|
|
47
|
+
# (fast look‑ups for present / absent edges)
|
|
48
|
+
y_edges = {(y2idx[e.source.key], y2idx[e.target.key]) for e in y.edges.values()}
|
|
49
|
+
x_edges = {(x2idx[e.source.key], x2idx[e.target.key]) for e in x.edges.values()}
|
|
50
|
+
|
|
51
|
+
# linear part
|
|
52
|
+
# substitution: real-real
|
|
53
|
+
for (y_key, i), (x_key, j) in itertools.product(y2idx.items(), x2idx.items()):
|
|
54
|
+
b[i, j] = (
|
|
55
|
+
1.0 - node_pair_sims[(y_key, x_key)]
|
|
56
|
+
if (y_key, x_key) in node_pair_sims
|
|
57
|
+
else self.illegal_cost
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# deletion: real-dummy
|
|
61
|
+
for y_key, i in y2idx.items():
|
|
62
|
+
dummy_col = m + i
|
|
63
|
+
b[i, dummy_col] = self.node_del_cost
|
|
64
|
+
a[dummy_col, dummy_col] = 1.0 # selector
|
|
65
|
+
|
|
66
|
+
# insertion: dummy-real
|
|
67
|
+
for x_key, j in x2idx.items():
|
|
68
|
+
dummy_row = n + j
|
|
69
|
+
b[dummy_row, j] = self.node_ins_cost
|
|
70
|
+
a[dummy_row, dummy_row] = 1.0 # selector
|
|
71
|
+
|
|
72
|
+
# quadratic part
|
|
73
|
+
# real edges of y in A
|
|
74
|
+
for e in y.edges.values():
|
|
75
|
+
i, j = y2idx[e.source.key], y2idx[e.target.key]
|
|
76
|
+
a[i, j] = 1.0
|
|
77
|
+
|
|
78
|
+
# real edges of x in B
|
|
79
|
+
# not needed for directed graphs
|
|
80
|
+
# for e in x.edges.values():
|
|
81
|
+
# i, j = x2idx[e.source.key], x2idx[e.target.key]
|
|
82
|
+
# b[i, j] = 1.0
|
|
83
|
+
|
|
84
|
+
# edge substitution
|
|
85
|
+
for y_edge, x_edge in itertools.product(y.edges.values(), x.edges.values()):
|
|
86
|
+
iy, jy = y2idx[y_edge.source.key], y2idx[y_edge.target.key]
|
|
87
|
+
ix, jx = x2idx[x_edge.source.key], x2idx[x_edge.target.key]
|
|
88
|
+
cost = (
|
|
89
|
+
1.0 - edge_pair_sims[(y_edge.key, x_edge.key)]
|
|
90
|
+
if (y_edge.key, x_edge.key) in edge_pair_sims
|
|
91
|
+
else self.illegal_cost
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# four row/col combinations induced by the permutation
|
|
95
|
+
for r, c in ((iy, ix), (iy, jx), (jy, ix), (jy, jx)):
|
|
96
|
+
# keep the *lowest* cost if collisions happen
|
|
97
|
+
b[r, c] = min(b[r, c], cost)
|
|
98
|
+
|
|
99
|
+
# edge deletion/insertion
|
|
100
|
+
for iy, jy in itertools.product(range(n), range(n)):
|
|
101
|
+
y_has = (iy, jy) in y_edges
|
|
102
|
+
|
|
103
|
+
for ix, jx in itertools.product(range(m), range(m)):
|
|
104
|
+
x_has = (ix, jx) in x_edges
|
|
105
|
+
|
|
106
|
+
if y_has and not x_has: # deletion
|
|
107
|
+
cost = self.edge_del_cost
|
|
108
|
+
elif not y_has and x_has: # insertion
|
|
109
|
+
cost = self.edge_ins_cost
|
|
110
|
+
else: # no op
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
for r, c in ((iy, ix), (iy, jx), (jy, ix), (jy, jx)):
|
|
114
|
+
b[r, c] = min(b[r, c], cost)
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
res = quadratic_assignment(a, b, method="faq")
|
|
118
|
+
except ValueError as e:
|
|
119
|
+
logger.warning(f"Failed to compute QAP mapping for two graphs: {e}")
|
|
120
|
+
|
|
121
|
+
return GraphSim(
|
|
122
|
+
0.0,
|
|
123
|
+
frozendict(),
|
|
124
|
+
frozendict(),
|
|
125
|
+
frozendict(),
|
|
126
|
+
frozendict(),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# only consider substitutions of real nodes
|
|
130
|
+
node_mapping = frozendict(
|
|
131
|
+
(idx2y[row], idx2x[col])
|
|
132
|
+
for row, col in enumerate(res.col_ind)
|
|
133
|
+
if row < n and col < m and (idx2y[row], idx2x[col]) in node_pair_sims
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
edge_mapping = self.induced_edge_mapping(x, y, node_mapping)
|
|
137
|
+
|
|
138
|
+
return self.similarity(
|
|
139
|
+
x,
|
|
140
|
+
y,
|
|
141
|
+
node_mapping,
|
|
142
|
+
edge_mapping,
|
|
143
|
+
node_pair_sims,
|
|
144
|
+
edge_pair_sims,
|
|
145
|
+
)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import itertools
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from typing import override
|
|
4
5
|
|
|
@@ -11,27 +12,31 @@ from .common import BaseGraphSimFunc, GraphSim
|
|
|
11
12
|
|
|
12
13
|
with optional_dependencies():
|
|
13
14
|
import rustworkx
|
|
15
|
+
from networkx.algorithms.isomorphism import DiGraphMatcher
|
|
14
16
|
|
|
15
|
-
from ...model.graph import to_rustworkx_with_lookup
|
|
17
|
+
from ...model.graph import to_networkx, to_rustworkx_with_lookup
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
@dataclass(slots=True)
|
|
19
|
-
class
|
|
20
|
-
BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
|
|
21
|
+
class VF2Base[K, N, E, G](
|
|
22
|
+
ABC, BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
|
|
21
23
|
):
|
|
22
24
|
"""Compute subgraph isomorphisms between two graphs.
|
|
23
25
|
|
|
24
|
-
- Convert the input graphs to Rustworkx graphs.
|
|
25
26
|
- Compute all possible subgraph isomorphisms between the two graphs.
|
|
26
27
|
- For each isomorphism, compute the global similarity.
|
|
27
28
|
- Return the isomorphism mapping with the highest similarity.
|
|
28
29
|
"""
|
|
29
30
|
|
|
30
|
-
id_order: bool = False
|
|
31
|
-
subgraph: bool = True
|
|
32
|
-
induced: bool = False
|
|
33
|
-
call_limit: int | None = None
|
|
34
31
|
max_iterations: int = 0
|
|
32
|
+
maximum_common_subgraph: bool = True
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def node_mappings(
|
|
36
|
+
self,
|
|
37
|
+
x: Graph[K, N, E, G],
|
|
38
|
+
y: Graph[K, N, E, G],
|
|
39
|
+
) -> list[frozendict[K, K]]: ...
|
|
35
40
|
|
|
36
41
|
@override
|
|
37
42
|
def __call__(
|
|
@@ -39,6 +44,77 @@ class vf2[K, N, E, G](
|
|
|
39
44
|
x: Graph[K, N, E, G],
|
|
40
45
|
y: Graph[K, N, E, G],
|
|
41
46
|
) -> GraphSim[K]:
|
|
47
|
+
node_mappings: list[frozendict[K, K]] = []
|
|
48
|
+
next_permutations: list[Graph] = [y]
|
|
49
|
+
|
|
50
|
+
while next_permutations and not node_mappings:
|
|
51
|
+
current_permutations = next_permutations
|
|
52
|
+
next_permutations = []
|
|
53
|
+
|
|
54
|
+
for current_permutation in current_permutations:
|
|
55
|
+
node_mappings.extend(self.node_mappings(x, current_permutation))
|
|
56
|
+
|
|
57
|
+
if self.maximum_common_subgraph:
|
|
58
|
+
# remove nodes from y to determine partial mappings
|
|
59
|
+
next_permutations.extend(
|
|
60
|
+
Graph(
|
|
61
|
+
nodes=frozendict(
|
|
62
|
+
(k, v)
|
|
63
|
+
for k, v in current_permutation.nodes.items()
|
|
64
|
+
if k != node_key
|
|
65
|
+
),
|
|
66
|
+
edges=frozendict(
|
|
67
|
+
(k, v)
|
|
68
|
+
for k, v in current_permutation.edges.items()
|
|
69
|
+
if v.source.key != node_key and v.target.key != node_key
|
|
70
|
+
),
|
|
71
|
+
value=current_permutation.value,
|
|
72
|
+
)
|
|
73
|
+
for node_key in current_permutation.nodes.keys()
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
graph_sims: list[GraphSim[K]] = []
|
|
77
|
+
|
|
78
|
+
for node_mapping in node_mappings:
|
|
79
|
+
edge_mapping = self.induced_edge_mapping(x, y, node_mapping)
|
|
80
|
+
node_pair_sims, edge_pair_sims = self.pair_similarities(
|
|
81
|
+
x, y, list(node_mapping.items()), list(edge_mapping.items())
|
|
82
|
+
)
|
|
83
|
+
graph_sims.append(
|
|
84
|
+
self.similarity(
|
|
85
|
+
x,
|
|
86
|
+
y,
|
|
87
|
+
node_mapping,
|
|
88
|
+
edge_mapping,
|
|
89
|
+
node_pair_sims,
|
|
90
|
+
edge_pair_sims,
|
|
91
|
+
)
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return max(
|
|
95
|
+
graph_sims,
|
|
96
|
+
key=lambda sim: sim.value,
|
|
97
|
+
default=GraphSim(
|
|
98
|
+
0.0,
|
|
99
|
+
frozendict(),
|
|
100
|
+
frozendict(),
|
|
101
|
+
frozendict(),
|
|
102
|
+
frozendict(),
|
|
103
|
+
),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@dataclass(slots=True)
|
|
108
|
+
class vf2_rustworkx[K, N, E, G](VF2Base):
|
|
109
|
+
id_order: bool = False
|
|
110
|
+
induced: bool = False
|
|
111
|
+
call_limit: int | None = None
|
|
112
|
+
|
|
113
|
+
def node_mappings(
|
|
114
|
+
self,
|
|
115
|
+
x: Graph[K, N, E, G],
|
|
116
|
+
y: Graph[K, N, E, G],
|
|
117
|
+
) -> list[frozendict[K, K]]:
|
|
42
118
|
if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
|
|
43
119
|
larger_graph, larger_graph_lookup = to_rustworkx_with_lookup(y)
|
|
44
120
|
smaller_graph, smaller_graph_lookup = to_rustworkx_with_lookup(x)
|
|
@@ -59,14 +135,13 @@ class vf2[K, N, E, G](
|
|
|
59
135
|
smaller_graph,
|
|
60
136
|
node_matcher=node_matcher,
|
|
61
137
|
edge_matcher=edge_matcher,
|
|
138
|
+
subgraph=True,
|
|
62
139
|
id_order=self.id_order,
|
|
63
|
-
subgraph=self.subgraph,
|
|
64
140
|
induced=self.induced,
|
|
65
141
|
call_limit=self.call_limit,
|
|
66
142
|
)
|
|
67
143
|
|
|
68
144
|
node_mappings: list[frozendict[K, K]] = []
|
|
69
|
-
graph_sims: list[GraphSim[K]] = []
|
|
70
145
|
|
|
71
146
|
for idx in itertools.count():
|
|
72
147
|
if self.max_iterations > 0 and idx >= self.max_iterations:
|
|
@@ -98,30 +173,63 @@ class vf2[K, N, E, G](
|
|
|
98
173
|
except StopIteration:
|
|
99
174
|
break
|
|
100
175
|
|
|
101
|
-
|
|
102
|
-
edge_mapping = self.induced_edge_mapping(x, y, node_mapping)
|
|
103
|
-
node_pair_sims, edge_pair_sims = self.pair_similarities(
|
|
104
|
-
x, y, list(node_mapping.items()), list(edge_mapping.items())
|
|
105
|
-
)
|
|
106
|
-
graph_sims.append(
|
|
107
|
-
self.similarity(
|
|
108
|
-
x,
|
|
109
|
-
y,
|
|
110
|
-
node_mapping,
|
|
111
|
-
edge_mapping,
|
|
112
|
-
node_pair_sims,
|
|
113
|
-
edge_pair_sims,
|
|
114
|
-
)
|
|
115
|
-
)
|
|
176
|
+
return node_mappings
|
|
116
177
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
)
|
|
178
|
+
|
|
179
|
+
@dataclass(slots=True)
|
|
180
|
+
class vf2_networkx[K, N, E, G](VF2Base):
|
|
181
|
+
def node_mappings(
|
|
182
|
+
self,
|
|
183
|
+
x: Graph[K, N, E, G],
|
|
184
|
+
y: Graph[K, N, E, G],
|
|
185
|
+
) -> list[frozendict[K, K]]:
|
|
186
|
+
if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
|
|
187
|
+
larger_graph = to_networkx(y)
|
|
188
|
+
smaller_graph = to_networkx(x)
|
|
189
|
+
node_matcher = reverse_positional(self.node_matcher)
|
|
190
|
+
edge_matcher = reverse_positional(self.edge_matcher)
|
|
191
|
+
else:
|
|
192
|
+
larger_graph = to_networkx(x)
|
|
193
|
+
smaller_graph = to_networkx(y)
|
|
194
|
+
node_matcher = self.node_matcher
|
|
195
|
+
edge_matcher = self.edge_matcher
|
|
196
|
+
|
|
197
|
+
# `first` must be the larger graph and `second` the smaller one.
|
|
198
|
+
graph_matcher = DiGraphMatcher(
|
|
199
|
+
larger_graph,
|
|
200
|
+
smaller_graph,
|
|
201
|
+
node_match=lambda x, y: node_matcher(x["value"], y["value"]),
|
|
202
|
+
edge_match=lambda x, y: edge_matcher(x["value"], y["value"]),
|
|
127
203
|
)
|
|
204
|
+
|
|
205
|
+
mappings_iter = graph_matcher.subgraph_isomorphisms_iter()
|
|
206
|
+
node_mappings: list[frozendict[K, K]] = []
|
|
207
|
+
|
|
208
|
+
for idx in itertools.count():
|
|
209
|
+
if self.max_iterations > 0 and idx >= self.max_iterations:
|
|
210
|
+
break
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
|
|
214
|
+
# y -> x (as needed)
|
|
215
|
+
node_mappings.append(
|
|
216
|
+
frozendict(
|
|
217
|
+
(larger_idx, smaller_idx)
|
|
218
|
+
for larger_idx, smaller_idx in next(mappings_iter).items()
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
else:
|
|
222
|
+
# x -> y (needs to be inverted)
|
|
223
|
+
node_mappings.append(
|
|
224
|
+
frozendict(
|
|
225
|
+
(smaller_idx, larger_idx)
|
|
226
|
+
for larger_idx, smaller_idx in next(mappings_iter).items()
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
except StopIteration:
|
|
230
|
+
break
|
|
231
|
+
|
|
232
|
+
return node_mappings
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
vf2 = vf2_rustworkx
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
import itertools
|
|
2
|
-
from collections.abc import Sequence
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
from ...model.graph import (
|
|
6
|
-
Edge,
|
|
7
|
-
Graph,
|
|
8
|
-
Node,
|
|
9
|
-
)
|
|
10
|
-
from ...typing import BatchSimFunc
|
|
11
|
-
from .common import BaseGraphSimFunc, SemanticEdgeSim
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@dataclass(slots=True)
|
|
15
|
-
class precompute[K, N, E, G](
|
|
16
|
-
BaseGraphSimFunc[K, N, E, G], BatchSimFunc[Graph[K, N, E, G], float]
|
|
17
|
-
):
|
|
18
|
-
precompute_nodes: bool = True
|
|
19
|
-
precompute_edges: bool = True
|
|
20
|
-
|
|
21
|
-
def __call__(
|
|
22
|
-
self, batches: Sequence[tuple[Graph[K, N, E, G], Graph[K, N, E, G]]]
|
|
23
|
-
) -> list[float]:
|
|
24
|
-
if self.precompute_nodes:
|
|
25
|
-
node_pairs: list[tuple[Node[K, N], Node[K, N]]] = []
|
|
26
|
-
|
|
27
|
-
for x, y in batches:
|
|
28
|
-
node_pairs.extend(
|
|
29
|
-
(x_node, y_node)
|
|
30
|
-
for x_node, y_node in itertools.product(
|
|
31
|
-
x.nodes.values(), y.nodes.values()
|
|
32
|
-
)
|
|
33
|
-
if self.node_matcher(x_node.value, y_node.value)
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
self.batch_node_sim_func(node_pairs)
|
|
37
|
-
|
|
38
|
-
if self.precompute_edges and not isinstance(
|
|
39
|
-
self.batch_edge_sim_func, SemanticEdgeSim
|
|
40
|
-
):
|
|
41
|
-
edge_pairs: list[tuple[Edge[K, N, E], Edge[K, N, E]]] = []
|
|
42
|
-
|
|
43
|
-
for x, y in batches:
|
|
44
|
-
edge_pairs.extend(
|
|
45
|
-
(x_edge, y_edge)
|
|
46
|
-
for x_edge, y_edge in itertools.product(
|
|
47
|
-
x.edges.values(), y.edges.values()
|
|
48
|
-
)
|
|
49
|
-
if self.edge_matcher(x_edge.value, y_edge.value)
|
|
50
|
-
and self.node_matcher(x_edge.source.value, y_edge.source.value)
|
|
51
|
-
and self.node_matcher(x_edge.target.value, y_edge.target.value)
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
self.batch_edge_sim_func(edge_pairs)
|
|
55
|
-
|
|
56
|
-
return [1.0] * len(batches)
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
import itertools
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
|
|
4
|
-
import numpy as np
|
|
5
|
-
from frozendict import frozendict
|
|
6
|
-
from scipy.optimize import quadratic_assignment
|
|
7
|
-
|
|
8
|
-
from ...helpers import get_logger
|
|
9
|
-
from ...model.graph import (
|
|
10
|
-
Graph,
|
|
11
|
-
)
|
|
12
|
-
from ...typing import SimFunc
|
|
13
|
-
from .common import BaseGraphSimFunc, GraphSim
|
|
14
|
-
|
|
15
|
-
logger = get_logger(__name__)
|
|
16
|
-
|
|
17
|
-
__all__ = ["qap"]
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
# https://jack.valmadre.net/notes/2020/12/08/non-perfect-linear-assignment/
|
|
21
|
-
@dataclass(slots=True)
|
|
22
|
-
class qap[K, N, E, G](
|
|
23
|
-
BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
|
|
24
|
-
):
|
|
25
|
-
"""Quadratic Assignment Problem (QAP) solver for graph similarity
|
|
26
|
-
|
|
27
|
-
Currently not functional, the generated mappings are not correct.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
def __call__(
|
|
31
|
-
self,
|
|
32
|
-
x: Graph[K, N, E, G],
|
|
33
|
-
y: Graph[K, N, E, G],
|
|
34
|
-
) -> GraphSim[K]:
|
|
35
|
-
node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
|
|
36
|
-
|
|
37
|
-
n = len(y.nodes)
|
|
38
|
-
m = len(x.nodes)
|
|
39
|
-
dim = n + m
|
|
40
|
-
a = np.zeros((dim, dim), dtype=float)
|
|
41
|
-
b = np.zeros((dim, dim), dtype=float)
|
|
42
|
-
|
|
43
|
-
y2idx = {k: i for i, k in enumerate(y.nodes)}
|
|
44
|
-
x2idx = {k: i for i, k in enumerate(x.nodes)}
|
|
45
|
-
idx2y = {i: k for k, i in y2idx.items()}
|
|
46
|
-
idx2x = {i: k for k, i in x2idx.items()}
|
|
47
|
-
|
|
48
|
-
# put 1 on every real-node loop of a
|
|
49
|
-
# encode substitution / deletion cost on the corresponding loop of b
|
|
50
|
-
for i in idx2y.keys():
|
|
51
|
-
a[i, i] = 1.0 # selector
|
|
52
|
-
b[m + i, m + i] = 1.0 # deletion
|
|
53
|
-
|
|
54
|
-
for j in idx2x.keys():
|
|
55
|
-
b[j, j] = 1.0 # selector
|
|
56
|
-
a[n + j, n + j] = 1.0 # insertion
|
|
57
|
-
|
|
58
|
-
# substitution cost (real-real loops)
|
|
59
|
-
for (y_key, i), (x_key, j) in itertools.product(y2idx.items(), x2idx.items()):
|
|
60
|
-
b[i, j] = (
|
|
61
|
-
1 - node_pair_sims[(y_key, x_key)]
|
|
62
|
-
if (y_key, x_key) in node_pair_sims
|
|
63
|
-
else 1e9
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
# real edge in y, deletion cost when mapped to two dummies
|
|
67
|
-
for e in y.edges.values():
|
|
68
|
-
i, j = y2idx[e.source.key], y2idx[e.target.key]
|
|
69
|
-
b[m + i, m + j] = 1
|
|
70
|
-
b[m + j, m + i] = 1
|
|
71
|
-
|
|
72
|
-
# real edge in x, insertion cost when mapped from two dummies
|
|
73
|
-
for e in x.edges.values():
|
|
74
|
-
i, j = x2idx[e.source.key], x2idx[e.target.key]
|
|
75
|
-
a[n + i, n + j] = 1
|
|
76
|
-
a[n + j, n + i] = 1
|
|
77
|
-
|
|
78
|
-
# real-real pairs, substitution cost
|
|
79
|
-
for y_edge, x_edge in itertools.product(y.edges.values(), x.edges.values()):
|
|
80
|
-
i, j = x2idx[x_edge.source.key], x2idx[x_edge.target.key]
|
|
81
|
-
b[i, j] = (
|
|
82
|
-
1 - edge_pair_sims[(y_edge.key, x_edge.key)]
|
|
83
|
-
if (y_edge.key, x_edge.key) in edge_pair_sims
|
|
84
|
-
else 1e9
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
try:
|
|
88
|
-
res = quadratic_assignment(a, b, method="faq")
|
|
89
|
-
except ValueError as e:
|
|
90
|
-
logger.warning(f"Failed to compute QAP mapping for two graphs: {e}")
|
|
91
|
-
|
|
92
|
-
return GraphSim(
|
|
93
|
-
0.0,
|
|
94
|
-
frozendict(),
|
|
95
|
-
frozendict(),
|
|
96
|
-
frozendict(),
|
|
97
|
-
frozendict(),
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
# only consider substitutions of real nodes
|
|
101
|
-
node_mapping = frozendict(
|
|
102
|
-
(idx2y[y_idx], idx2x[x_idx])
|
|
103
|
-
for y_idx, x_idx in enumerate(res.col_ind)
|
|
104
|
-
if y_idx < n
|
|
105
|
-
and x_idx < m
|
|
106
|
-
and (idx2y[y_idx], idx2x[x_idx]) in node_pair_sims
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
edge_mapping = self.induced_edge_mapping(x, y, node_mapping)
|
|
110
|
-
|
|
111
|
-
return self.similarity(
|
|
112
|
-
x,
|
|
113
|
-
y,
|
|
114
|
-
node_mapping,
|
|
115
|
-
edge_mapping,
|
|
116
|
-
node_pair_sims,
|
|
117
|
-
edge_pair_sims,
|
|
118
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|