cbrkit 0.26.1__tar.gz → 0.26.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {cbrkit-0.26.1 → cbrkit-0.26.2}/PKG-INFO +1 -1
  2. {cbrkit-0.26.1 → cbrkit-0.26.2}/pyproject.toml +1 -1
  3. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/__init__.py +3 -1
  4. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/astar.py +0 -4
  5. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/common.py +40 -51
  6. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/dfs.py +4 -6
  7. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/greedy.py +0 -6
  8. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/lap.py +67 -14
  9. cbrkit-0.26.2/src/cbrkit/sim/graphs/precompute.py +80 -0
  10. cbrkit-0.26.2/src/cbrkit/sim/graphs/qap.py +145 -0
  11. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/vf2.py +143 -35
  12. cbrkit-0.26.1/src/cbrkit/sim/graphs/precompute.py +0 -56
  13. cbrkit-0.26.1/src/cbrkit/sim/graphs/qap.py +0 -118
  14. {cbrkit-0.26.1 → cbrkit-0.26.2}/README.md +0 -0
  15. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/__init__.py +0 -0
  16. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/__main__.py +0 -0
  17. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/__init__.py +0 -0
  18. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/attribute_value.py +0 -0
  19. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/generic.py +0 -0
  20. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/numbers.py +0 -0
  21. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/adapt/strings.py +0 -0
  22. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/api.py +0 -0
  23. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/cli.py +0 -0
  24. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/constants.py +0 -0
  25. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/cycle.py +0 -0
  26. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/dumpers.py +0 -0
  27. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/eval/__init__.py +0 -0
  28. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/eval/common.py +0 -0
  29. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/eval/retrieval.py +0 -0
  30. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/helpers.py +0 -0
  31. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/loaders.py +0 -0
  32. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/model/__init__.py +0 -0
  33. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/model/graph.py +0 -0
  34. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/model/result.py +0 -0
  35. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/py.typed +0 -0
  36. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/retrieval/__init__.py +0 -0
  37. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/retrieval/apply.py +0 -0
  38. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/retrieval/build.py +0 -0
  39. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/retrieval/rerank.py +0 -0
  40. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/reuse/__init__.py +0 -0
  41. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/reuse/apply.py +0 -0
  42. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/reuse/build.py +0 -0
  43. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/__init__.py +0 -0
  44. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/aggregator.py +0 -0
  45. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/attribute_value.py +0 -0
  46. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/collections.py +0 -0
  47. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/embed.py +0 -0
  48. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/generic.py +0 -0
  49. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/alignment.py +0 -0
  50. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/graphs/brute_force.py +0 -0
  51. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/numbers.py +0 -0
  52. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/strings.py +0 -0
  53. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/taxonomy.py +0 -0
  54. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/sim/wrappers.py +0 -0
  55. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/__init__.py +0 -0
  56. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/apply.py +0 -0
  57. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/build.py +0 -0
  58. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/model.py +0 -0
  59. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/prompts.py +0 -0
  60. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/__init__.py +0 -0
  61. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/anthropic.py +0 -0
  62. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/cohere.py +0 -0
  63. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/google.py +0 -0
  64. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/instructor.py +0 -0
  65. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/model.py +0 -0
  66. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/ollama.py +0 -0
  67. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/openai.py +0 -0
  68. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/synthesis/providers/wrappers.py +0 -0
  69. {cbrkit-0.26.1 → cbrkit-0.26.2}/src/cbrkit/typing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cbrkit
3
- Version: 0.26.1
3
+ Version: 0.26.2
4
4
  Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
5
5
  Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
6
6
  Author: Mirko Lenz
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cbrkit"
3
- version = "0.26.1"
3
+ version = "0.26.2"
4
4
  description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
5
5
  authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
6
6
  readme = "README.md"
@@ -15,7 +15,7 @@ from .common import (
15
15
  from .greedy import greedy
16
16
  from .lap import lap
17
17
  from .precompute import precompute
18
- from .vf2 import vf2
18
+ from .vf2 import vf2, vf2_networkx, vf2_rustworkx
19
19
 
20
20
  with optional_dependencies():
21
21
  from .alignment import dtw
@@ -34,6 +34,8 @@ __all__ = [
34
34
  "lap",
35
35
  "precompute",
36
36
  "vf2",
37
+ "vf2_networkx",
38
+ "vf2_rustworkx",
37
39
  "dtw",
38
40
  "smith_waterman",
39
41
  "init_empty",
@@ -363,10 +363,6 @@ class build[K, N, E, G](
363
363
  x: Graph[K, N, E, G],
364
364
  y: Graph[K, N, E, G],
365
365
  ) -> GraphSim[K]:
366
- # if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
367
- # self_inv = dataclasses.replace(self, _invert=True)
368
- # return self.invert_similarity(x, y, self_inv(x=y, y=x))
369
-
370
366
  node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
371
367
 
372
368
  open_set: list[PriorityState[K]] = []
@@ -8,12 +8,11 @@ from frozendict import frozendict
8
8
 
9
9
  from ...helpers import (
10
10
  batchify_sim,
11
- reverse_batch_positional,
12
- reverse_positional,
13
11
  total_params,
14
12
  unpack_float,
13
+ unpack_floats,
15
14
  )
16
- from ...model.graph import Edge, Graph, Node
15
+ from ...model.graph import Graph, Node
17
16
  from ...typing import AnySimFunc, BatchSimFunc, Float, SimFunc, StructuredValue
18
17
  from ..wrappers import transpose_value
19
18
 
@@ -38,26 +37,38 @@ def default_element_matcher(x: Any, y: Any) -> bool:
38
37
 
39
38
  @dataclass(slots=True, frozen=True)
40
39
  class SemanticEdgeSim[K, N, E]:
41
- source_weight: float = 0.5
42
- target_weight: float = 0.5
40
+ source_weight: float = 1.0
41
+ target_weight: float = 1.0
42
+ edge_sim_func: AnySimFunc[E, Float] | None = None
43
43
 
44
44
  def __call__(
45
45
  self,
46
- batches: Sequence[tuple[Edge[K, N, E], Edge[K, N, E], PairSim[K]]],
46
+ batches: Sequence[tuple[E, E, float, float]],
47
47
  ) -> list[float]:
48
- source_sims = (
49
- node_pair_sims.get((y.source.key, x.source.key), 0.0)
50
- for x, y, node_pair_sims in batches
51
- )
52
- target_sims = (
53
- node_pair_sims.get((y.target.key, x.target.key), 0.0)
54
- for x, y, node_pair_sims in batches
55
- )
48
+ source_sims = (source_sim for _, _, source_sim, _ in batches)
49
+ target_sims = (target_sim for _, _, _, target_sim in batches)
50
+
51
+ if self.edge_sim_func is not None:
52
+ edge_sim_func = batchify_sim(self.edge_sim_func)
53
+ edge_sims = unpack_floats(
54
+ edge_sim_func(
55
+ [(x, y) for x, y, _, _ in batches],
56
+ )
57
+ )
58
+ else:
59
+ edge_sims = [1.0] * len(batches)
60
+
61
+ scaling_factor = self.source_weight + self.target_weight
62
+
63
+ if scaling_factor == 0:
64
+ return edge_sims
56
65
 
57
66
  return [
58
- (self.source_weight * source + self.target_weight * target)
59
- / (self.source_weight + self.target_weight)
60
- for source, target in zip(source_sims, target_sims, strict=True)
67
+ (edge * source * self.source_weight / scaling_factor)
68
+ + (edge * target * self.target_weight / scaling_factor)
69
+ for source, target, edge in zip(
70
+ source_sims, target_sims, edge_sims, strict=True
71
+ )
61
72
  ]
62
73
 
63
74
 
@@ -82,37 +93,14 @@ def _induced_edge_mapping[K, N, E, G](
82
93
  @dataclass(slots=True)
83
94
  class BaseGraphSimFunc[K, N, E, G]:
84
95
  node_sim_func: AnySimFunc[N, Float]
85
- edge_sim_func: AnySimFunc[Edge[K, N, E], Float] | SemanticEdgeSim[K, N, E] = (
86
- default_edge_sim
87
- )
96
+ edge_sim_func: SemanticEdgeSim[K, N, E] = default_edge_sim
88
97
  node_matcher: ElementMatcher[N] = default_element_matcher
89
98
  edge_matcher: ElementMatcher[E] = default_element_matcher
90
99
  batch_node_sim_func: BatchSimFunc[Node[K, N], Float] = field(init=False)
91
- batch_edge_sim_func: (
92
- BatchSimFunc[Edge[K, N, E], Float] | SemanticEdgeSim[K, N, E]
93
- ) = field(init=False)
94
- _invert: bool = False
95
100
 
96
101
  def __post_init__(self) -> None:
97
102
  self.batch_node_sim_func = batchify_sim(transpose_value(self.node_sim_func))
98
103
 
99
- if isinstance(self.edge_sim_func, SemanticEdgeSim):
100
- self.batch_edge_sim_func = self.edge_sim_func
101
- else:
102
- self.batch_edge_sim_func = batchify_sim(self.edge_sim_func)
103
-
104
- if self._invert:
105
- self.node_matcher = reverse_positional(self.node_matcher)
106
- self.edge_matcher = reverse_positional(self.edge_matcher)
107
- self.batch_node_sim_func = reverse_batch_positional(
108
- self.batch_node_sim_func
109
- )
110
- if not isinstance(self.batch_edge_sim_func, SemanticEdgeSim):
111
- # semantic edge sim is agnostic to order
112
- self.batch_edge_sim_func = reverse_batch_positional(
113
- self.batch_edge_sim_func
114
- )
115
-
116
104
  def induced_edge_mapping(
117
105
  self,
118
106
  x: Graph[K, N, E, G],
@@ -163,16 +151,17 @@ class BaseGraphSimFunc[K, N, E, G]:
163
151
  ]
164
152
 
165
153
  edge_pair_values = [(x.edges[x_key], y.edges[y_key]) for y_key, x_key in pairs]
166
-
167
- if isinstance(self.batch_edge_sim_func, SemanticEdgeSim):
168
- edge_pair_sims = self.batch_edge_sim_func(
169
- [
170
- (x_edge, y_edge, node_pair_sims)
171
- for x_edge, y_edge in edge_pair_values
172
- ]
173
- )
174
- else:
175
- edge_pair_sims = self.batch_edge_sim_func(edge_pair_values)
154
+ edge_pair_sims = self.edge_sim_func(
155
+ [
156
+ (
157
+ x_edge.value,
158
+ y_edge.value,
159
+ node_pair_sims[(y_edge.source.key, x_edge.source.key)],
160
+ node_pair_sims[(y_edge.target.key, x_edge.target.key)],
161
+ )
162
+ for x_edge, y_edge in edge_pair_values
163
+ ]
164
+ )
176
165
 
177
166
  return {
178
167
  (y_edge.key, x_edge.key): unpack_float(sim)
@@ -11,8 +11,6 @@ from .common import BaseGraphSimFunc, GraphSim
11
11
 
12
12
  logger = get_logger(__name__)
13
13
 
14
- __all__ = ["dfs"]
15
-
16
14
 
17
15
  class RootsFunc[K, N, E, G](Protocol):
18
16
  """Support for matching rooted graphs
@@ -37,10 +35,10 @@ with optional_dependencies():
37
35
  class dfs[K, N, E, G](
38
36
  BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
39
37
  ):
40
- node_del_cost: float = 2.0
41
- node_ins_cost: float = 0.0
42
- edge_del_cost: float = 2.0
43
- edge_ins_cost: float = 0.0
38
+ node_del_cost: float = 1.0
39
+ node_ins_cost: float = 1.0
40
+ edge_del_cost: float = 1.0
41
+ edge_ins_cost: float = 1.0
44
42
  max_iterations: int = 0
45
43
  upper_bound: float | None = None
46
44
  strictly_decreasing: bool = True
@@ -11,8 +11,6 @@ from .common import GraphSim, SearchGraphSimFunc, SearchState
11
11
 
12
12
  logger = get_logger(__name__)
13
13
 
14
- __all__ = ["greedy"]
15
-
16
14
 
17
15
  @dataclass(slots=True)
18
16
  class greedy[K, N, E, G](
@@ -35,10 +33,6 @@ class greedy[K, N, E, G](
35
33
  x: Graph[K, N, E, G],
36
34
  y: Graph[K, N, E, G],
37
35
  ) -> GraphSim[K]:
38
- # if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
39
- # self_inv = dataclasses.replace(self, _invert=True)
40
- # return self.invert_similarity(x, y, self_inv(x=y, y=x))
41
-
42
36
  node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
43
37
 
44
38
  current_state = self.init_search_state(x, y)
@@ -14,18 +14,17 @@ from .common import BaseGraphSimFunc, GraphSim, PairSim
14
14
 
15
15
  logger = get_logger(__name__)
16
16
 
17
- __all__ = ["lap"]
18
-
19
17
 
20
18
  # https://jack.valmadre.net/notes/2020/12/08/non-perfect-linear-assignment/
21
19
  @dataclass(slots=True)
22
20
  class lap[K, N, E, G](
23
21
  BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
24
22
  ):
25
- node_del_cost: float = 2.0
26
- node_ins_cost: float = 0.0
27
- edge_del_cost: float = 2.0
28
- edge_ins_cost: float = 0.0
23
+ greedy: bool = True
24
+ node_del_cost: float = 1.0
25
+ node_ins_cost: float = 1.0
26
+ edge_del_cost: float = 1.0
27
+ edge_ins_cost: float = 1.0
29
28
  # 1.0 gives an upper bound, 0.5 gives a lower bound
30
29
  # approximation is better with a lower bound
31
30
  # since we compute real edit costs at the end anyway,
@@ -38,7 +37,7 @@ class lap[K, N, E, G](
38
37
  e.key for e in g.edges.values() if n == e.source.key or n == e.target.key
39
38
  }
40
39
 
41
- def edge_sub_cost(
40
+ def edge_sub_cost_greedy(
42
41
  self,
43
42
  x: Graph[K, N, E, G],
44
43
  y: Graph[K, N, E, G],
@@ -46,9 +45,60 @@ class lap[K, N, E, G](
46
45
  y_node: K,
47
46
  edge_pair_sims: PairSim[K],
48
47
  ) -> float:
48
+ """BranchFast algorithm without solving an inner LAP problem.
49
+
50
+ - Substitutions are taken greedily in descending similarity order
51
+ - Unmatched y‑edges are deletions, unmatched x‑edges are insertions.
52
+ """
53
+
54
+ y_edges = list(self.connected_edges(y, y_node))
55
+ x_edges = list(self.connected_edges(x, x_node))
56
+
57
+ # trivial fast‑path
58
+ if not y_edges and not x_edges:
59
+ return 0.0
60
+
61
+ # candidate substitutions: (cost, y_key, x_key)
62
+ candidates: list[tuple[float, K, K]] = [
63
+ (1.0 - edge_pair_sims[(y_key, x_key)], y_key, x_key)
64
+ for y_key, x_key in itertools.product(y_edges, x_edges)
65
+ if (y_key, x_key) in edge_pair_sims
66
+ ]
67
+ # sort by cheapest cost ==> highest similarity first
68
+ candidates.sort(key=lambda t: t[0])
69
+
70
+ matched_y: set[K] = set()
71
+ matched_x: set[K] = set()
72
+ cost = 0.0
73
+
74
+ for c, y_key, x_key in candidates:
75
+ if y_key not in matched_y and x_key not in matched_x:
76
+ matched_y.add(y_key)
77
+ matched_x.add(x_key)
78
+ cost += c # substitution cost
79
+
80
+ # remaining deletions / insertions
81
+ cost += (len(y_edges) - len(matched_y)) * self.edge_del_cost
82
+ cost += (len(x_edges) - len(matched_x)) * self.edge_ins_cost
83
+
84
+ return cost
85
+
86
+ def edge_sub_cost_optimal(
87
+ self,
88
+ x: Graph[K, N, E, G],
89
+ y: Graph[K, N, E, G],
90
+ x_node: K,
91
+ y_node: K,
92
+ edge_pair_sims: PairSim[K],
93
+ ) -> float:
94
+ """Branch algorithm solving an inner LAP problem."""
95
+
49
96
  y_edges = self.connected_edges(y, y_node)
50
97
  x_edges = self.connected_edges(x, x_node)
51
98
 
99
+ if not y_edges and not x_edges:
100
+ return 0.0
101
+
52
102
  rows = len(y_edges)
53
103
  cols = len(x_edges)
54
104
  dim = rows + cols
@@ -126,13 +176,16 @@ class lap[K, N, E, G](
126
176
  and (sim := node_pair_sims.get((y_key, x_key)))
127
177
  ):
128
178
  node_sub_cost = 1.0 - sim
129
- edge_sub_cost = self.edge_sub_cost(
130
- x,
131
- y,
132
- x_key,
133
- y_key,
134
- edge_pair_sims,
135
- )
179
+
180
+ if self.greedy:
181
+ edge_sub_cost = self.edge_sub_cost_greedy(
182
+ x, y, x_key, y_key, edge_pair_sims
183
+ )
184
+ else:
185
+ edge_sub_cost = self.edge_sub_cost_optimal(
186
+ x, y, x_key, y_key, edge_pair_sims
187
+ )
188
+
136
189
  cost[r, c] = node_sub_cost + (self.edge_edit_factor * edge_sub_cost)
137
190
 
138
191
  if self.print_matrix:
@@ -0,0 +1,80 @@
1
+ import itertools
2
+ from collections.abc import Sequence
3
+ from dataclasses import dataclass
4
+
5
+ from cbrkit.helpers import chain_map_chunks, unpack_float
6
+
7
+ from ...model.graph import (
8
+ Graph,
9
+ Node,
10
+ )
11
+ from ...typing import BatchSimFunc
12
+ from .common import BaseGraphSimFunc
13
+
14
+
15
+ @dataclass(slots=True)
16
+ class precompute[K, N, E, G](
17
+ BaseGraphSimFunc[K, N, E, G], BatchSimFunc[Graph[K, N, E, G], float]
18
+ ):
19
+ precompute_nodes: bool = True
20
+ precompute_edges: bool = True
21
+
22
+ def __call__(
23
+ self, batches: Sequence[tuple[Graph[K, N, E, G], Graph[K, N, E, G]]]
24
+ ) -> list[float]:
25
+ precompute_edges = (
26
+ self.precompute_edges and self.edge_sim_func.edge_sim_func is not None
27
+ )
28
+ batch_node_pair_sims: list[dict[tuple[K, K], float]] = []
29
+
30
+ if self.precompute_nodes or precompute_edges:
31
+ batch_node_pairs: list[list[tuple[Node[K, N], Node[K, N]]]] = [
32
+ [
33
+ (x_node, y_node)
34
+ for x_node, y_node in itertools.product(
35
+ x.nodes.values(), y.nodes.values()
36
+ )
37
+ if self.node_matcher(x_node.value, y_node.value)
38
+ ]
39
+ for x, y in batches
40
+ ]
41
+
42
+ batch_node_pair_sims_list = chain_map_chunks(
43
+ batch_node_pairs, self.batch_node_sim_func
44
+ )
45
+ batch_node_pair_sims = [
46
+ {
47
+ (y_node.key, x_node.key): unpack_float(sim)
48
+ for (x_node, y_node), sim in zip(
49
+ node_pair_values, node_pair_sims, strict=True
50
+ )
51
+ }
52
+ for node_pair_values, node_pair_sims in zip(
53
+ batch_node_pairs, batch_node_pair_sims_list, strict=True
54
+ )
55
+ ]
56
+
57
+ if precompute_edges:
58
+ edge_pairs: list[tuple[E, E, float, float]] = []
59
+
60
+ for (x, y), node_pair_sims in zip(
61
+ batches, batch_node_pair_sims, strict=True
62
+ ):
63
+ edge_pairs.extend(
64
+ (
65
+ x_edge.value,
66
+ y_edge.value,
67
+ node_pair_sims[(y_edge.source.key, x_edge.source.key)],
68
+ node_pair_sims[(y_edge.target.key, x_edge.target.key)],
69
+ )
70
+ for x_edge, y_edge in itertools.product(
71
+ x.edges.values(), y.edges.values()
72
+ )
73
+ if self.edge_matcher(x_edge.value, y_edge.value)
74
+ and (y_edge.source.key, x_edge.source.key) in node_pair_sims
75
+ and (y_edge.target.key, x_edge.target.key) in node_pair_sims
76
+ )
77
+
78
+ self.edge_sim_func(edge_pairs)
79
+
80
+ return [1.0] * len(batches)
@@ -0,0 +1,145 @@
1
+ import itertools
2
+ from dataclasses import dataclass
3
+
4
+ import numpy as np
5
+ from frozendict import frozendict
6
+ from scipy.optimize import quadratic_assignment
7
+
8
+ from ...helpers import get_logger
9
+ from ...model.graph import Graph
10
+ from ...typing import SimFunc
11
+ from .common import BaseGraphSimFunc, GraphSim
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ # https://jack.valmadre.net/notes/2020/12/08/non-perfect-linear-assignment/
17
+ @dataclass(slots=True)
18
+ class qap[K, N, E, G](
19
+ BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
20
+ ):
21
+ """Quadratic Assignment Problem (QAP) solver for graph similarity"""
22
+
23
+ node_del_cost: float = 1.0
24
+ node_ins_cost: float = 1.0
25
+ edge_del_cost: float = 1.0
26
+ edge_ins_cost: float = 1.0
27
+ illegal_cost: float = 1e9
28
+
29
+ def __call__(
30
+ self,
31
+ x: Graph[K, N, E, G],
32
+ y: Graph[K, N, E, G],
33
+ ) -> GraphSim[K]:
34
+ node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
35
+
36
+ n = len(y.nodes)
37
+ m = len(x.nodes)
38
+ dim = n + m
39
+ a = np.zeros((dim, dim), dtype=float)
40
+ b = np.full((dim, dim), self.illegal_cost, dtype=float)
41
+
42
+ y2idx = {k: i for i, k in enumerate(y.nodes)}
43
+ x2idx = {k: i for i, k in enumerate(x.nodes)}
44
+ idx2y = {i: k for k, i in y2idx.items()}
45
+ idx2x = {i: k for k, i in x2idx.items()}
46
+
47
+ # (fast look‑ups for present / absent edges)
48
+ y_edges = {(y2idx[e.source.key], y2idx[e.target.key]) for e in y.edges.values()}
49
+ x_edges = {(x2idx[e.source.key], x2idx[e.target.key]) for e in x.edges.values()}
50
+
51
+ # linear part
52
+ # substitution: real-real
53
+ for (y_key, i), (x_key, j) in itertools.product(y2idx.items(), x2idx.items()):
54
+ b[i, j] = (
55
+ 1.0 - node_pair_sims[(y_key, x_key)]
56
+ if (y_key, x_key) in node_pair_sims
57
+ else self.illegal_cost
58
+ )
59
+
60
+ # deletion: real-dummy
61
+ for y_key, i in y2idx.items():
62
+ dummy_col = m + i
63
+ b[i, dummy_col] = self.node_del_cost
64
+ a[dummy_col, dummy_col] = 1.0 # selector
65
+
66
+ # insertion: dummy-real
67
+ for x_key, j in x2idx.items():
68
+ dummy_row = n + j
69
+ b[dummy_row, j] = self.node_ins_cost
70
+ a[dummy_row, dummy_row] = 1.0 # selector
71
+
72
+ # quadratic part
73
+ # real edges of y in A
74
+ for e in y.edges.values():
75
+ i, j = y2idx[e.source.key], y2idx[e.target.key]
76
+ a[i, j] = 1.0
77
+
78
+ # real edges of x in B
79
+ # not needed for directed graphs
80
+ # for e in x.edges.values():
81
+ # i, j = x2idx[e.source.key], x2idx[e.target.key]
82
+ # b[i, j] = 1.0
83
+
84
+ # edge substitution
85
+ for y_edge, x_edge in itertools.product(y.edges.values(), x.edges.values()):
86
+ iy, jy = y2idx[y_edge.source.key], y2idx[y_edge.target.key]
87
+ ix, jx = x2idx[x_edge.source.key], x2idx[x_edge.target.key]
88
+ cost = (
89
+ 1.0 - edge_pair_sims[(y_edge.key, x_edge.key)]
90
+ if (y_edge.key, x_edge.key) in edge_pair_sims
91
+ else self.illegal_cost
92
+ )
93
+
94
+ # four row/col combinations induced by the permutation
95
+ for r, c in ((iy, ix), (iy, jx), (jy, ix), (jy, jx)):
96
+ # keep the *lowest* cost if collisions happen
97
+ b[r, c] = min(b[r, c], cost)
98
+
99
+ # edge deletion/insertion
100
+ for iy, jy in itertools.product(range(n), range(n)):
101
+ y_has = (iy, jy) in y_edges
102
+
103
+ for ix, jx in itertools.product(range(m), range(m)):
104
+ x_has = (ix, jx) in x_edges
105
+
106
+ if y_has and not x_has: # deletion
107
+ cost = self.edge_del_cost
108
+ elif not y_has and x_has: # insertion
109
+ cost = self.edge_ins_cost
110
+ else: # no op
111
+ continue
112
+
113
+ for r, c in ((iy, ix), (iy, jx), (jy, ix), (jy, jx)):
114
+ b[r, c] = min(b[r, c], cost)
115
+
116
+ try:
117
+ res = quadratic_assignment(a, b, method="faq")
118
+ except ValueError as e:
119
+ logger.warning(f"Failed to compute QAP mapping for two graphs: {e}")
120
+
121
+ return GraphSim(
122
+ 0.0,
123
+ frozendict(),
124
+ frozendict(),
125
+ frozendict(),
126
+ frozendict(),
127
+ )
128
+
129
+ # only consider substitutions of real nodes
130
+ node_mapping = frozendict(
131
+ (idx2y[row], idx2x[col])
132
+ for row, col in enumerate(res.col_ind)
133
+ if row < n and col < m and (idx2y[row], idx2x[col]) in node_pair_sims
134
+ )
135
+
136
+ edge_mapping = self.induced_edge_mapping(x, y, node_mapping)
137
+
138
+ return self.similarity(
139
+ x,
140
+ y,
141
+ node_mapping,
142
+ edge_mapping,
143
+ node_pair_sims,
144
+ edge_pair_sims,
145
+ )
@@ -1,4 +1,5 @@
1
1
  import itertools
2
+ from abc import ABC, abstractmethod
2
3
  from dataclasses import dataclass
3
4
  from typing import override
4
5
 
@@ -11,27 +12,31 @@ from .common import BaseGraphSimFunc, GraphSim
11
12
 
12
13
  with optional_dependencies():
13
14
  import rustworkx
15
+ from networkx.algorithms.isomorphism import DiGraphMatcher
14
16
 
15
- from ...model.graph import to_rustworkx_with_lookup
17
+ from ...model.graph import to_networkx, to_rustworkx_with_lookup
16
18
 
17
19
 
18
20
  @dataclass(slots=True)
19
- class vf2[K, N, E, G](
20
- BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
21
+ class VF2Base[K, N, E, G](
22
+ ABC, BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
21
23
  ):
22
24
  """Compute subgraph isomorphisms between two graphs.
23
25
 
24
- - Convert the input graphs to Rustworkx graphs.
25
26
  - Compute all possible subgraph isomorphisms between the two graphs.
26
27
  - For each isomorphism, compute the global similarity.
27
28
  - Return the isomorphism mapping with the highest similarity.
28
29
  """
29
30
 
30
- id_order: bool = False
31
- subgraph: bool = True
32
- induced: bool = False
33
- call_limit: int | None = None
34
31
  max_iterations: int = 0
32
+ maximum_common_subgraph: bool = True
33
+
34
+ @abstractmethod
35
+ def node_mappings(
36
+ self,
37
+ x: Graph[K, N, E, G],
38
+ y: Graph[K, N, E, G],
39
+ ) -> list[frozendict[K, K]]: ...
35
40
 
36
41
  @override
37
42
  def __call__(
@@ -39,6 +44,77 @@ class vf2[K, N, E, G](
39
44
  x: Graph[K, N, E, G],
40
45
  y: Graph[K, N, E, G],
41
46
  ) -> GraphSim[K]:
47
+ node_mappings: list[frozendict[K, K]] = []
48
+ next_permutations: list[Graph] = [y]
49
+
50
+ while next_permutations and not node_mappings:
51
+ current_permutations = next_permutations
52
+ next_permutations = []
53
+
54
+ for current_permutation in current_permutations:
55
+ node_mappings.extend(self.node_mappings(x, current_permutation))
56
+
57
+ if self.maximum_common_subgraph:
58
+ # remove nodes from y to determine partial mappings
59
+ next_permutations.extend(
60
+ Graph(
61
+ nodes=frozendict(
62
+ (k, v)
63
+ for k, v in current_permutation.nodes.items()
64
+ if k != node_key
65
+ ),
66
+ edges=frozendict(
67
+ (k, v)
68
+ for k, v in current_permutation.edges.items()
69
+ if v.source.key != node_key and v.target.key != node_key
70
+ ),
71
+ value=current_permutation.value,
72
+ )
73
+ for node_key in current_permutation.nodes.keys()
74
+ )
75
+
76
+ graph_sims: list[GraphSim[K]] = []
77
+
78
+ for node_mapping in node_mappings:
79
+ edge_mapping = self.induced_edge_mapping(x, y, node_mapping)
80
+ node_pair_sims, edge_pair_sims = self.pair_similarities(
81
+ x, y, list(node_mapping.items()), list(edge_mapping.items())
82
+ )
83
+ graph_sims.append(
84
+ self.similarity(
85
+ x,
86
+ y,
87
+ node_mapping,
88
+ edge_mapping,
89
+ node_pair_sims,
90
+ edge_pair_sims,
91
+ )
92
+ )
93
+
94
+ return max(
95
+ graph_sims,
96
+ key=lambda sim: sim.value,
97
+ default=GraphSim(
98
+ 0.0,
99
+ frozendict(),
100
+ frozendict(),
101
+ frozendict(),
102
+ frozendict(),
103
+ ),
104
+ )
105
+
106
+
107
+ @dataclass(slots=True)
108
+ class vf2_rustworkx[K, N, E, G](VF2Base):
109
+ id_order: bool = False
110
+ induced: bool = False
111
+ call_limit: int | None = None
112
+
113
+ def node_mappings(
114
+ self,
115
+ x: Graph[K, N, E, G],
116
+ y: Graph[K, N, E, G],
117
+ ) -> list[frozendict[K, K]]:
42
118
  if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
43
119
  larger_graph, larger_graph_lookup = to_rustworkx_with_lookup(y)
44
120
  smaller_graph, smaller_graph_lookup = to_rustworkx_with_lookup(x)
@@ -59,14 +135,13 @@ class vf2[K, N, E, G](
59
135
  smaller_graph,
60
136
  node_matcher=node_matcher,
61
137
  edge_matcher=edge_matcher,
138
+ subgraph=True,
62
139
  id_order=self.id_order,
63
- subgraph=self.subgraph,
64
140
  induced=self.induced,
65
141
  call_limit=self.call_limit,
66
142
  )
67
143
 
68
144
  node_mappings: list[frozendict[K, K]] = []
69
- graph_sims: list[GraphSim[K]] = []
70
145
 
71
146
  for idx in itertools.count():
72
147
  if self.max_iterations > 0 and idx >= self.max_iterations:
@@ -98,30 +173,63 @@ class vf2[K, N, E, G](
98
173
  except StopIteration:
99
174
  break
100
175
 
101
- for node_mapping in node_mappings:
102
- edge_mapping = self.induced_edge_mapping(x, y, node_mapping)
103
- node_pair_sims, edge_pair_sims = self.pair_similarities(
104
- x, y, list(node_mapping.items()), list(edge_mapping.items())
105
- )
106
- graph_sims.append(
107
- self.similarity(
108
- x,
109
- y,
110
- node_mapping,
111
- edge_mapping,
112
- node_pair_sims,
113
- edge_pair_sims,
114
- )
115
- )
176
+ return node_mappings
116
177
 
117
- return max(
118
- graph_sims,
119
- key=lambda sim: sim.value,
120
- default=GraphSim(
121
- 0.0,
122
- frozendict(),
123
- frozendict(),
124
- frozendict(),
125
- frozendict(),
126
- ),
178
+
179
+ @dataclass(slots=True)
180
+ class vf2_networkx[K, N, E, G](VF2Base):
181
+ def node_mappings(
182
+ self,
183
+ x: Graph[K, N, E, G],
184
+ y: Graph[K, N, E, G],
185
+ ) -> list[frozendict[K, K]]:
186
+ if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
187
+ larger_graph = to_networkx(y)
188
+ smaller_graph = to_networkx(x)
189
+ node_matcher = reverse_positional(self.node_matcher)
190
+ edge_matcher = reverse_positional(self.edge_matcher)
191
+ else:
192
+ larger_graph = to_networkx(x)
193
+ smaller_graph = to_networkx(y)
194
+ node_matcher = self.node_matcher
195
+ edge_matcher = self.edge_matcher
196
+
197
+ # `first` must be the larger graph and `second` the smaller one.
198
+ graph_matcher = DiGraphMatcher(
199
+ larger_graph,
200
+ smaller_graph,
201
+ node_match=lambda x, y: node_matcher(x["value"], y["value"]),
202
+ edge_match=lambda x, y: edge_matcher(x["value"], y["value"]),
127
203
  )
204
+
205
+ mappings_iter = graph_matcher.subgraph_isomorphisms_iter()
206
+ node_mappings: list[frozendict[K, K]] = []
207
+
208
+ for idx in itertools.count():
209
+ if self.max_iterations > 0 and idx >= self.max_iterations:
210
+ break
211
+
212
+ try:
213
+ if len(y.nodes) + len(y.edges) > len(x.nodes) + len(x.edges):
214
+ # y -> x (as needed)
215
+ node_mappings.append(
216
+ frozendict(
217
+ (larger_idx, smaller_idx)
218
+ for larger_idx, smaller_idx in next(mappings_iter).items()
219
+ )
220
+ )
221
+ else:
222
+ # x -> y (needs to be inverted)
223
+ node_mappings.append(
224
+ frozendict(
225
+ (smaller_idx, larger_idx)
226
+ for larger_idx, smaller_idx in next(mappings_iter).items()
227
+ )
228
+ )
229
+ except StopIteration:
230
+ break
231
+
232
+ return node_mappings
233
+
234
+
235
+ vf2 = vf2_rustworkx
@@ -1,56 +0,0 @@
1
- import itertools
2
- from collections.abc import Sequence
3
- from dataclasses import dataclass
4
-
5
- from ...model.graph import (
6
- Edge,
7
- Graph,
8
- Node,
9
- )
10
- from ...typing import BatchSimFunc
11
- from .common import BaseGraphSimFunc, SemanticEdgeSim
12
-
13
-
14
- @dataclass(slots=True)
15
- class precompute[K, N, E, G](
16
- BaseGraphSimFunc[K, N, E, G], BatchSimFunc[Graph[K, N, E, G], float]
17
- ):
18
- precompute_nodes: bool = True
19
- precompute_edges: bool = True
20
-
21
- def __call__(
22
- self, batches: Sequence[tuple[Graph[K, N, E, G], Graph[K, N, E, G]]]
23
- ) -> list[float]:
24
- if self.precompute_nodes:
25
- node_pairs: list[tuple[Node[K, N], Node[K, N]]] = []
26
-
27
- for x, y in batches:
28
- node_pairs.extend(
29
- (x_node, y_node)
30
- for x_node, y_node in itertools.product(
31
- x.nodes.values(), y.nodes.values()
32
- )
33
- if self.node_matcher(x_node.value, y_node.value)
34
- )
35
-
36
- self.batch_node_sim_func(node_pairs)
37
-
38
- if self.precompute_edges and not isinstance(
39
- self.batch_edge_sim_func, SemanticEdgeSim
40
- ):
41
- edge_pairs: list[tuple[Edge[K, N, E], Edge[K, N, E]]] = []
42
-
43
- for x, y in batches:
44
- edge_pairs.extend(
45
- (x_edge, y_edge)
46
- for x_edge, y_edge in itertools.product(
47
- x.edges.values(), y.edges.values()
48
- )
49
- if self.edge_matcher(x_edge.value, y_edge.value)
50
- and self.node_matcher(x_edge.source.value, y_edge.source.value)
51
- and self.node_matcher(x_edge.target.value, y_edge.target.value)
52
- )
53
-
54
- self.batch_edge_sim_func(edge_pairs)
55
-
56
- return [1.0] * len(batches)
@@ -1,118 +0,0 @@
1
- import itertools
2
- from dataclasses import dataclass
3
-
4
- import numpy as np
5
- from frozendict import frozendict
6
- from scipy.optimize import quadratic_assignment
7
-
8
- from ...helpers import get_logger
9
- from ...model.graph import (
10
- Graph,
11
- )
12
- from ...typing import SimFunc
13
- from .common import BaseGraphSimFunc, GraphSim
14
-
15
- logger = get_logger(__name__)
16
-
17
- __all__ = ["qap"]
18
-
19
-
20
- # https://jack.valmadre.net/notes/2020/12/08/non-perfect-linear-assignment/
21
- @dataclass(slots=True)
22
- class qap[K, N, E, G](
23
- BaseGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
24
- ):
25
- """Quadratic Assignment Problem (QAP) solver for graph similarity
26
-
27
- Currently not functional, the generated mappings are not correct.
28
- """
29
-
30
- def __call__(
31
- self,
32
- x: Graph[K, N, E, G],
33
- y: Graph[K, N, E, G],
34
- ) -> GraphSim[K]:
35
- node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
36
-
37
- n = len(y.nodes)
38
- m = len(x.nodes)
39
- dim = n + m
40
- a = np.zeros((dim, dim), dtype=float)
41
- b = np.zeros((dim, dim), dtype=float)
42
-
43
- y2idx = {k: i for i, k in enumerate(y.nodes)}
44
- x2idx = {k: i for i, k in enumerate(x.nodes)}
45
- idx2y = {i: k for k, i in y2idx.items()}
46
- idx2x = {i: k for k, i in x2idx.items()}
47
-
48
- # put 1 on every real-node loop of a
49
- # encode substitution / deletion cost on the corresponding loop of b
50
- for i in idx2y.keys():
51
- a[i, i] = 1.0 # selector
52
- b[m + i, m + i] = 1.0 # deletion
53
-
54
- for j in idx2x.keys():
55
- b[j, j] = 1.0 # selector
56
- a[n + j, n + j] = 1.0 # insertion
57
-
58
- # substitution cost (real-real loops)
59
- for (y_key, i), (x_key, j) in itertools.product(y2idx.items(), x2idx.items()):
60
- b[i, j] = (
61
- 1 - node_pair_sims[(y_key, x_key)]
62
- if (y_key, x_key) in node_pair_sims
63
- else 1e9
64
- )
65
-
66
- # real edge in y, deletion cost when mapped to two dummies
67
- for e in y.edges.values():
68
- i, j = y2idx[e.source.key], y2idx[e.target.key]
69
- b[m + i, m + j] = 1
70
- b[m + j, m + i] = 1
71
-
72
- # real edge in x, insertion cost when mapped from two dummies
73
- for e in x.edges.values():
74
- i, j = x2idx[e.source.key], x2idx[e.target.key]
75
- a[n + i, n + j] = 1
76
- a[n + j, n + i] = 1
77
-
78
- # real-real pairs, substitution cost
79
- for y_edge, x_edge in itertools.product(y.edges.values(), x.edges.values()):
80
- i, j = x2idx[x_edge.source.key], x2idx[x_edge.target.key]
81
- b[i, j] = (
82
- 1 - edge_pair_sims[(y_edge.key, x_edge.key)]
83
- if (y_edge.key, x_edge.key) in edge_pair_sims
84
- else 1e9
85
- )
86
-
87
- try:
88
- res = quadratic_assignment(a, b, method="faq")
89
- except ValueError as e:
90
- logger.warning(f"Failed to compute QAP mapping for two graphs: {e}")
91
-
92
- return GraphSim(
93
- 0.0,
94
- frozendict(),
95
- frozendict(),
96
- frozendict(),
97
- frozendict(),
98
- )
99
-
100
- # only consider substitutions of real nodes
101
- node_mapping = frozendict(
102
- (idx2y[y_idx], idx2x[x_idx])
103
- for y_idx, x_idx in enumerate(res.col_ind)
104
- if y_idx < n
105
- and x_idx < m
106
- and (idx2y[y_idx], idx2x[x_idx]) in node_pair_sims
107
- )
108
-
109
- edge_mapping = self.induced_edge_mapping(x, y, node_mapping)
110
-
111
- return self.similarity(
112
- x,
113
- y,
114
- node_mapping,
115
- edge_mapping,
116
- node_pair_sims,
117
- edge_pair_sims,
118
- )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes