cbrkit 0.18.2__tar.gz → 0.19.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {cbrkit-0.18.2 → cbrkit-0.19.0}/PKG-INFO +1 -1
  2. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/eval/_common.py +25 -25
  3. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/generic.py +1 -0
  4. cbrkit-0.19.0/cbrkit/sim/graphs/__init__.py +5 -0
  5. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/graphs/_astar.py +3 -22
  6. cbrkit-0.19.0/cbrkit/sim/graphs/_isomorphism.py +104 -0
  7. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/graphs/_model.py +68 -38
  8. {cbrkit-0.18.2 → cbrkit-0.19.0}/pyproject.toml +1 -1
  9. cbrkit-0.18.2/cbrkit/sim/graphs/__init__.py +0 -8
  10. {cbrkit-0.18.2 → cbrkit-0.19.0}/LICENSE +0 -0
  11. {cbrkit-0.18.2 → cbrkit-0.19.0}/README.md +0 -0
  12. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/__init__.py +0 -0
  13. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/__main__.py +0 -0
  14. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/adaptation.py +0 -0
  15. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/api.py +0 -0
  16. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/cli.py +0 -0
  17. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/eval/__init__.py +0 -0
  18. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/eval/_retrieval.py +0 -0
  19. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/helpers.py +0 -0
  20. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/loaders.py +0 -0
  21. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/py.typed +0 -0
  22. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/retrieval.py +0 -0
  23. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/__init__.py +0 -0
  24. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/_aggregator.py +0 -0
  25. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/_attribute_value.py +0 -0
  26. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/collections.py +0 -0
  27. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/numbers.py +0 -0
  28. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/strings/__init__.py +0 -0
  29. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/sim/strings/taxonomy.py +0 -0
  30. {cbrkit-0.18.2 → cbrkit-0.19.0}/cbrkit/typing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cbrkit
3
- Version: 0.18.2
3
+ Version: 0.19.0
4
4
  Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
5
5
  Home-page: https://wi2trier.github.io/cbrkit/
6
6
  License: MIT
@@ -1,4 +1,3 @@
1
- import itertools
2
1
  import statistics
3
2
  import warnings
4
3
  from collections.abc import Iterable, Mapping, Sequence
@@ -34,36 +33,37 @@ def _correctness_completeness_single(
34
33
  k: int | None,
35
34
  ) -> tuple[float, float]:
36
35
  sorted_run = sorted(run.items(), key=lambda x: x[1], reverse=True)
37
- run_ranking = {x[0]: i + 1 for i, x in enumerate(sorted_run[:k])}
36
+ run_k = {x[0]: x[1] for x in sorted_run[:k]}
38
37
 
39
- orders = 0
40
- concordances = 0
41
- disconcordances = 0
38
+ concordant_pairs = 0
39
+ discordant_pairs = 0
40
+ total_pairs = 0
42
41
 
43
- correctness = 1
44
- completeness = 1
42
+ case_keys = list(qrel.keys())
45
43
 
46
- for (user_key_1, user_rank_1), (user_key_2, user_rank_2) in itertools.product(
47
- qrel.items(), qrel.items()
48
- ):
49
- if user_key_1 != user_key_2 and user_rank_1 > user_rank_2:
50
- orders += 1
44
+ for i in range(len(case_keys)):
45
+ for j in range(i + 1, len(case_keys)):
46
+ idx1, idx2 = case_keys[i], case_keys[j]
47
+ total_pairs += 1
51
48
 
52
- system_rank_1 = run_ranking.get(user_key_1)
53
- system_rank_2 = run_ranking.get(user_key_2)
49
+ if idx1 in run_k and idx2 in run_k:
50
+ qrel1, qrel2 = qrel[idx1], qrel[idx2]
51
+ run1, run2 = run_k[idx1], run_k[idx2]
54
52
 
55
- if system_rank_1 is not None and system_rank_2 is not None:
56
- if system_rank_1 > system_rank_2:
57
- concordances += 1
58
- elif system_rank_1 < system_rank_2:
59
- disconcordances += 1
53
+ if (qrel1 < qrel2 and run1 < run2) or (qrel1 > qrel2 and run1 > run2):
54
+ concordant_pairs += 1
55
+ elif qrel1 != qrel2:
56
+ discordant_pairs += 1
60
57
 
61
- if concordances + disconcordances > 0:
62
- correctness = (concordances - disconcordances) / (
63
- concordances + disconcordances
64
- )
65
- if orders > 0:
66
- completeness = (concordances + disconcordances) / orders
58
+ correctness = (
59
+ (concordant_pairs - discordant_pairs) / (concordant_pairs + discordant_pairs)
60
+ if (concordant_pairs + discordant_pairs) > 0
61
+ else 0.0
62
+ )
63
+
64
+ completeness = (
65
+ (concordant_pairs + discordant_pairs) / total_pairs if total_pairs > 0 else 0.0
66
+ )
67
67
 
68
68
  return correctness, completeness
69
69
 
@@ -109,6 +109,7 @@ class dynamic_table[K, V, S: Float](SimSeqFunc[V, S], SupportsMetadata):
109
109
  entries: Sequence[tuple[a, b, sim(a, b)]
110
110
  symmetric: If True, the table is assumed to be symmetric, i.e. sim(a, b) = sim(b, a)
111
111
  default: Default similarity value for pairs not in the table
112
+ key_getter: A function that extracts the the key for lookup from the input values
112
113
 
113
114
  Examples:
114
115
  >>> sim = dynamic_table(
@@ -0,0 +1,5 @@
1
+ from . import _model as model
2
+ from ._astar import astar
3
+ from ._isomorphism import isomorphism
4
+
5
+ __all__ = ["model", "astar", "isomorphism"]
@@ -5,23 +5,22 @@ import itertools
5
5
  import random
6
6
  from collections.abc import Iterable, Sequence
7
7
  from dataclasses import dataclass, field
8
- from typing import Any, Literal, Protocol, cast, override
8
+ from typing import Literal, Protocol, override
9
9
 
10
10
  from cbrkit.helpers import (
11
11
  SimSeqWrapper,
12
- SimWrapper,
13
12
  get_metadata,
14
13
  unpack_sim,
15
14
  unpack_sims,
16
15
  )
17
16
  from cbrkit.sim.graphs._model import (
17
+ DataSimWrapper,
18
18
  Edge,
19
19
  Graph,
20
- HasData,
20
+ GraphSim,
21
21
  Node,
22
22
  )
23
23
  from cbrkit.typing import (
24
- AnnotatedFloat,
25
24
  AnySimFunc,
26
25
  Float,
27
26
  JsonDict,
@@ -33,13 +32,6 @@ from cbrkit.typing import (
33
32
  type ElementKind = Literal["node", "edge"]
34
33
 
35
34
 
36
- @dataclass(slots=True, frozen=True)
37
- class GraphSim[K](AnnotatedFloat):
38
- value: float
39
- node_mappings: dict[K, K]
40
- edge_mappings: dict[K, K]
41
-
42
-
43
35
  @dataclass(slots=True, frozen=True)
44
36
  class SelectionResult[K]:
45
37
  query_element: K
@@ -195,17 +187,6 @@ class default_edge_sim[K, N, E](SimSeqFunc[Edge[K, N, E], Float]):
195
187
  ]
196
188
 
197
189
 
198
- class DataSimWrapper[V: HasData[Any], S: Float](SimWrapper, SimSeqFunc[V, S]):
199
- @override
200
- def __call__(self, pairs: Sequence[tuple[V, V]]) -> Sequence[S]:
201
- if self.kind == "pair":
202
- func = cast(SimPairFunc[V, S], self.func)
203
- return [func(x.data, y.data) for (x, y) in pairs]
204
-
205
- func = cast(SimSeqFunc[V, S], self.func)
206
- return func([(x.data, y.data) for x, y in pairs])
207
-
208
-
209
190
  @dataclass(slots=True)
210
191
  class astar[K, N, E, G](
211
192
  SimPairFunc[Graph[K, N, E, G], GraphSim[K]],
@@ -0,0 +1,104 @@
1
+ from collections.abc import Callable
2
+ from dataclasses import dataclass
3
+ from typing import Any, override
4
+
5
+ from cbrkit.helpers import SimSeqWrapper
6
+ from cbrkit.typing import (
7
+ AggregatorFunc,
8
+ AnySimFunc,
9
+ Float,
10
+ SimPairFunc,
11
+ SimSeqFunc,
12
+ SupportsMetadata,
13
+ )
14
+
15
+ from ._model import DataSimWrapper, Graph, GraphSim, Node, to_rustworkx_with_lookup
16
+
17
+
18
+ @dataclass(slots=True)
19
+ class isomorphism[K, N, E, G, S: Float](
20
+ SimPairFunc[Graph[K, N, E, G], GraphSim[K]],
21
+ SupportsMetadata,
22
+ ):
23
+ """Compute subgraph isomorphisms between two graphs.
24
+
25
+ - Convert the input graphs to Rustworkx graphs.
26
+ - Compute all possible subgraph isomorphisms between the two graphs.
27
+ - For each isomorphism, compute the similarity based on the node mapping.
28
+ - Return the isomorphism mapping with the highest similarity.
29
+ """
30
+
31
+ node_matcher: Callable[[N, N], bool]
32
+ edge_matcher: Callable[[E, E], bool]
33
+ node_sim_func: SimSeqFunc[Node[K, N], S]
34
+ aggregator: AggregatorFunc[Any, S]
35
+
36
+ def __init__(
37
+ self,
38
+ node_matcher: Callable[[N, N], bool],
39
+ edge_matcher: Callable[[E, E], bool],
40
+ aggregator: AggregatorFunc[Any, S],
41
+ node_obj_sim: AnySimFunc[Node[K, N], Float] | None = None,
42
+ node_data_sim: AnySimFunc[N, Float] | None = None,
43
+ ) -> None:
44
+ # verify that only one of the object or data similarity functions is provided
45
+ if node_obj_sim and node_data_sim:
46
+ raise ValueError(
47
+ "Only one of the object or data similarity functions can be provided for nodes"
48
+ )
49
+
50
+ if node_data_sim:
51
+ self.node_sim_func = DataSimWrapper(node_data_sim)
52
+ elif node_obj_sim:
53
+ self.node_sim_func = SimSeqWrapper(node_obj_sim)
54
+ else:
55
+ raise ValueError("Either node_obj_sim or node_data_sim must be provided")
56
+
57
+ self.node_matcher = node_matcher
58
+ self.edge_matcher = edge_matcher
59
+ self.aggregator = aggregator
60
+
61
+ @override
62
+ def __call__(
63
+ self,
64
+ x: Graph[K, N, E, G],
65
+ y: Graph[K, N, E, G],
66
+ ) -> GraphSim[K]:
67
+ import rustworkx
68
+
69
+ x_rw, x_lookup = to_rustworkx_with_lookup(x)
70
+ y_rw, y_lookup = to_rustworkx_with_lookup(y)
71
+
72
+ rw_mappings = rustworkx.vf2_mapping(
73
+ y_rw,
74
+ x_rw,
75
+ subgraph=True,
76
+ node_matcher=self.node_matcher,
77
+ edge_matcher=self.edge_matcher,
78
+ )
79
+
80
+ node_mappings: list[dict[K, K]] = [
81
+ {y_lookup[y_key]: x_lookup[x_key] for y_key, x_key in mapping.items()}
82
+ for mapping in rw_mappings
83
+ ]
84
+
85
+ if len(node_mappings) == 0:
86
+ return GraphSim(0.0, node_mappings={}, edge_mappings={})
87
+
88
+ mapping_similarities: list[float] = []
89
+
90
+ for node_mapping in node_mappings:
91
+ node_pairs = [
92
+ (x.nodes[x_key], y.nodes[y_key])
93
+ for y_key, x_key in node_mapping.items()
94
+ ]
95
+ node_similarities = self.node_sim_func(node_pairs)
96
+ mapping_similarities.append(self.aggregator(node_similarities))
97
+
98
+ best_mapping_id, best_sim = max(
99
+ enumerate(mapping_similarities),
100
+ key=lambda x: x[1],
101
+ )
102
+ best_mapping = node_mappings[best_mapping_id]
103
+
104
+ return GraphSim(best_sim, node_mappings=best_mapping, edge_mappings={})
@@ -1,9 +1,19 @@
1
- from collections.abc import Mapping
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping, Sequence
2
4
  from dataclasses import dataclass
3
- from typing import Any, Protocol, TypedDict
5
+ from typing import Any, Protocol, TypedDict, cast, override
4
6
 
5
7
  import immutables
6
8
 
9
+ from cbrkit.helpers import SimWrapper
10
+ from cbrkit.typing import (
11
+ AnnotatedFloat,
12
+ Float,
13
+ SimPairFunc,
14
+ SimSeqFunc,
15
+ )
16
+
7
17
  __all__ = [
8
18
  "Node",
9
19
  "Edge",
@@ -16,6 +26,24 @@ __all__ = [
16
26
  ]
17
27
 
18
28
 
29
+ @dataclass(slots=True, frozen=True)
30
+ class GraphSim[K](AnnotatedFloat):
31
+ value: float
32
+ node_mappings: dict[K, K]
33
+ edge_mappings: dict[K, K]
34
+
35
+
36
+ class DataSimWrapper[V: HasData[Any], S: Float](SimWrapper, SimSeqFunc[V, S]):
37
+ @override
38
+ def __call__(self, pairs: Sequence[tuple[V, V]]) -> Sequence[S]:
39
+ if self.kind == "pair":
40
+ func = cast(SimPairFunc[V, S], self.func)
41
+ return [func(x.data, y.data) for (x, y) in pairs]
42
+
43
+ func = cast(SimSeqFunc[V, S], self.func)
44
+ return func([(x.data, y.data) for x, y in pairs])
45
+
46
+
19
47
  class HasData[T](Protocol):
20
48
  data: T
21
49
 
@@ -49,7 +77,7 @@ class Node[K, N](HasData[N]):
49
77
  cls,
50
78
  key: K,
51
79
  data: SerializedNode[N],
52
- ) -> "Node[K, N]":
80
+ ) -> Node[K, N]:
53
81
  return cls(key, data["data"])
54
82
 
55
83
 
@@ -73,7 +101,7 @@ class Edge[K, N, E](HasData[E]):
73
101
  key: K,
74
102
  data: SerializedEdge[K, E],
75
103
  nodes: Mapping[K, Node[K, N]],
76
- ) -> "Edge[K, N, E]":
104
+ ) -> Edge[K, N, E]:
77
105
  return cls(
78
106
  key,
79
107
  nodes[data["source"]],
@@ -99,7 +127,7 @@ class Graph[K, N, E, G](HasData[G]):
99
127
  def from_dict(
100
128
  cls,
101
129
  g: SerializedGraph[K, N, E, G],
102
- ) -> "Graph[K, N, E, G]":
130
+ ) -> Graph[K, N, E, G]:
103
131
  nodes = immutables.Map(
104
132
  (key, Node.from_dict(key, value)) for key, value in g["nodes"].items()
105
133
  )
@@ -121,7 +149,9 @@ def from_dict[K, N, E, G](g: SerializedGraph[K, N, E, G]) -> Graph[K, N, E, G]:
121
149
  try:
122
150
  import rustworkx
123
151
 
124
- def to_rustworkx[N, E](g: Graph[Any, N, E, Any]) -> "rustworkx.PyDiGraph[N, E]":
152
+ def to_rustworkx_with_lookup[K, N, E](
153
+ g: Graph[K, N, E, Any],
154
+ ) -> tuple[rustworkx.PyDiGraph[N, E], dict[int, K]]:
125
155
  ng = rustworkx.PyDiGraph(attrs=g.data)
126
156
  new_ids = ng.add_nodes_from(list(g.nodes.values()))
127
157
  id_map = {
@@ -139,19 +169,18 @@ try:
139
169
  ]
140
170
  )
141
171
 
142
- return ng
172
+ return ng, {new_id: old_id for old_id, new_id in id_map.items()}
143
173
 
144
- def from_rustworkx[N, E](g: "rustworkx.PyDiGraph[N, E]") -> Graph[int, N, E, Any]:
174
+ def to_rustworkx[N, E](g: Graph[Any, N, E, Any]) -> rustworkx.PyDiGraph[N, E]:
175
+ return to_rustworkx_with_lookup(g)[0]
176
+
177
+ def from_rustworkx[N, E](g: rustworkx.PyDiGraph[N, E]) -> Graph[int, N, E, Any]:
145
178
  nodes = immutables.Map(
146
179
  (idx, Node(idx, g.get_node_data(idx))) for idx in g.node_indices()
147
180
  )
148
181
  edges = immutables.Map(
149
182
  (edge_id, Edge(edge_id, nodes[source_id], nodes[target_id], edge_data))
150
- for edge_id, (
151
- source_id,
152
- target_id,
153
- edge_data,
154
- ) in g.edge_index_map().items()
183
+ for edge_id, (source_id, target_id, edge_data) in g.edge_index_map().items()
155
184
  )
156
185
 
157
186
  return Graph(nodes, edges, g.attrs)
@@ -164,43 +193,44 @@ except ImportError:
164
193
  try:
165
194
  import networkx as nx
166
195
 
167
- def to_networkx[N, E](g: Graph[Any, N, E, Any]) -> "nx.DiGraph":
196
+ def to_networkx(g: Graph) -> nx.DiGraph:
168
197
  ng = nx.DiGraph()
169
- # Set graph attributes
170
- ng.graph.update(g.data)
198
+ ng.graph = g.data
171
199
 
172
- # Add nodes with their data
173
- for node in g.nodes.values():
174
- ng.add_node(node.key, data=node.data)
200
+ ng.add_nodes_from(
201
+ (
202
+ node.key,
203
+ (node.data if isinstance(node.data, Mapping) else {"data": node.data}),
204
+ )
205
+ for node in g.nodes
206
+ )
175
207
 
176
- # Add edges with their data
177
- for edge in g.edges.values():
178
- ng.add_edge(edge.source.key, edge.target.key, key=edge.key, data=edge.data)
208
+ ng.add_edges_from(
209
+ (
210
+ edge.source.key,
211
+ edge.target.key,
212
+ (
213
+ {**edge.data, "key": edge.key}
214
+ if isinstance(edge.data, Mapping)
215
+ else {"data": edge.data, "key": edge.key}
216
+ ),
217
+ )
218
+ for edge in g.edges.values()
219
+ )
179
220
 
180
221
  return ng
181
222
 
182
- def from_networkx[N, E](g: "nx.DiGraph") -> Graph[Any, N, E, Any]:
183
- # Create nodes
223
+ def from_networkx(g: nx.DiGraph) -> Graph:
184
224
  nodes = immutables.Map(
185
- (node_id, Node(node_id, g.nodes[node_id].get("data")))
186
- for node_id in g.nodes
225
+ (idx, Node(idx, data)) for idx, data in g.nodes(data=True)
187
226
  )
188
227
 
189
- # Create edges
190
228
  edges = immutables.Map(
191
- (
192
- edge_data.get("key", idx),
193
- Edge(
194
- edge_data.get("key", idx),
195
- nodes[source],
196
- nodes[target],
197
- edge_data.get("data"),
198
- ),
199
- )
200
- for idx, (source, target, edge_data) in enumerate(g.edges(data=True))
229
+ (idx, Edge(idx, nodes[source_id], nodes[target_id], edge_data))
230
+ for idx, (source_id, target_id, edge_data) in enumerate(g.edges(data=True))
201
231
  )
202
232
 
203
- return Graph(nodes, edges, dict(g.graph))
233
+ return Graph(nodes, edges, g.graph)
204
234
 
205
235
  __all__ += ["to_networkx", "from_networkx"]
206
236
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cbrkit"
3
- version = "0.18.2"
3
+ version = "0.19.0"
4
4
  description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI."
5
5
  authors = ["Mirko Lenz <mirko@mirkolenz.com>"]
6
6
  license = "MIT"
@@ -1,8 +0,0 @@
1
- from . import _model as model
2
- from ._astar import GraphSim, astar
3
-
4
- __all__ = [
5
- "model",
6
- "GraphSim",
7
- "astar",
8
- ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes