PyPI - cbrkit - Versions diffs - 0.26.0__tar.gz → 0.26.1__tar.gz - Mend

cbrkit 0.26.0tar.gz → 0.26.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{cbrkit-0.26.0 → cbrkit-0.26.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: cbrkit
-Version: 0.26.0
+Version: 0.26.1
 Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
 Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
 Author: Mirko Lenz

{cbrkit-0.26.0 → cbrkit-0.26.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "cbrkit"
-version = "0.26.0"
+version = "0.26.1"
 description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
 authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
 readme = "README.md"

{cbrkit-0.26.0 → cbrkit-0.26.1}/src/cbrkit/retrieval/build.py RENAMED Viewed

@@ -2,7 +2,7 @@ import itertools
 from collections.abc import Sequence
 from dataclasses import dataclass
 from multiprocessing.pool import Pool
-from typing import Any, Literal, override
+from typing import Literal, override
 from ..helpers import (
     batchify_sim,
@@ -134,44 +134,59 @@ class combine[K, V, S: Float](RetrieverFunc[K, V, float]):
         A retriever function that combines the results from multiple retrievers.
     """
-    retriever_funcs: list[RetrieverFunc[K, V, S]]
-    aggregator: AggregatorFunc[Any, S] = default_aggregator
+    retriever_funcs: Sequence[RetrieverFunc[K, V, S]]
+    aggregator: AggregatorFunc[str, S] = default_aggregator
     strategy: Literal["intersection", "union"] = "union"
     @override
     def __call__(
         self, batches: Sequence[tuple[Casebase[K, V], V]]
     ) -> Sequence[SimMap[K, float]]:
-        results = [retriever_func(batches) for retriever_func in self.retriever_funcs]
-        return [
-            self.__call_batch__(
-                [
-                    results[retriever_idx][batch_idx]
-                    for retriever_idx in range(len(self.retriever_funcs))
-                ]
-            )
-            for batch_idx in range(len(batches))
-        ]
+        if isinstance(self.retriever_funcs, Sequence):
+            func_results = [
+                retriever_func(batches) for retriever_func in self.retriever_funcs
+            ]
+            return [
+                self.__call_batch__(
+                    [batch_results[batch_idx] for batch_results in func_results]
+                )
+                for batch_idx in range(len(batches))
+            ]
+        # elif isinstance(self.retriever_funcs, Mapping):
+        #     results = {
+        #         func_key: retriever_func(batches)
+        #         for func_key, retriever_func in self.retriever_funcs.items()
+        #     }
+        #     return [
+        #         self.__call_batch__(
+        #             {func_key: func_results[batch_idx] for func_key, func_results in results.items()}
+        #         )
+        #         for batch_idx in range(len(batches))
+        #     ]
+        raise ValueError(f"Invalid retriever_funcs type: {type(self.retriever_funcs)}")
-    def __call_batch__(self, results: list[SimMap[K, S]]) -> SimMap[K, float]:
+    def __call_batch__(self, results: Sequence[SimMap[K, S]]) -> SimMap[K, float]:
         if self.strategy == "intersection":
             return {
-                key: self.aggregator(
-                    [result[key] for result in results if key in result]
+                case_key: self.aggregator(
+                    [result[case_key] for result in results if case_key in result]
                 )
-                for key in set().intersection(
+                for case_key in set().intersection(
                     *[set(result.keys()) for result in results]
                 )
             }
         elif self.strategy == "union":
             return {
-                key: self.aggregator(
-                    [result[key] for result in results if key in result]
+                case_key: self.aggregator(
+                    [result[case_key] for result in results if case_key in result]
                 )
                 for result in results
-                for key in result.keys()
+                for case_key in result.keys()
             }
         raise ValueError(f"Unknown strategy: {self.strategy}")

{cbrkit-0.26.0 → cbrkit-0.26.1}/src/cbrkit/sim/__init__.py RENAMED Viewed

@@ -15,6 +15,7 @@ from .attribute_value import AttributeValueSim, attribute_value
 from .wrappers import (
     attribute_table,
     cache,
+    combine,
     dynamic_table,
     table,
     transpose,
@@ -26,6 +27,7 @@ __all__ = [
     "transpose",
     "transpose_value",
     "cache",
+    "combine",
     "table",
     "dynamic_table",
     "type_table",

{cbrkit-0.26.0 → cbrkit-0.26.1}/src/cbrkit/sim/graphs/__init__.py RENAMED Viewed

@@ -7,7 +7,10 @@ from .common import (
     GraphSim,
     SearchGraphSimFunc,
     SearchState,
+    SearchStateInit,
     SemanticEdgeSim,
+    init_empty,
+    init_unique_matches,
 )
 from .greedy import greedy
 from .lap import lap
@@ -33,10 +36,13 @@ __all__ = [
     "vf2",
     "dtw",
     "smith_waterman",
+    "init_empty",
+    "init_unique_matches",
     "GraphSim",
     "ElementMatcher",
     "SemanticEdgeSim",
     "BaseGraphSimFunc",
     "SearchGraphSimFunc",
     "SearchState",
+    "SearchStateInit",
 ]

{cbrkit-0.26.0 → cbrkit-0.26.1}/src/cbrkit/sim/graphs/astar.py RENAMED Viewed

@@ -1,12 +1,8 @@
 import heapq
-import itertools
-from collections import defaultdict
 from collections.abc import Mapping
 from dataclasses import dataclass, field
 from typing import Protocol
-from frozendict import frozendict
 from ...helpers import (
     get_logger,
     unpack_float,
@@ -18,25 +14,20 @@ from ...model.graph import (
 )
 from ...typing import SimFunc
 from .common import (
-    ElementMatcher,
     GraphSim,
     SearchGraphSimFunc,
     SearchState,
-    _induced_edge_mapping,
 )
 __all__ = [
     "HeuristicFunc",
     "SelectionFunc",
-    "InitFunc",
     "h1",
     "h2",
     "h3",
     "select1",
     "select2",
     "select3",
-    "init1",
-    "init2",
     "build",
 ]
@@ -74,17 +65,6 @@ class SelectionFunc[K, N, E, G](Protocol):
     ) -> None | tuple[K, GraphElementType]: ...
-class InitFunc[K, N, E, G](Protocol):
-    def __call__(
-        self,
-        x: Graph[K, N, E, G],
-        y: Graph[K, N, E, G],
-        node_matcher: ElementMatcher[N],
-        edge_matcher: ElementMatcher[E],
-        /,
-    ) -> SearchState[K]: ...
 @dataclass(slots=True, frozen=True)
 class h1[K, N, E, G](HeuristicFunc[K, N, E, G]):
     def __call__(
@@ -274,61 +254,6 @@ class select3[K, N, E, G](SelectionFunc[K, N, E, G]):
         return selection_key, selection_type
-@dataclass(slots=True, frozen=True)
-class init1[K, N, E, G](InitFunc[K, N, E, G]):
-    def __call__(
-        self,
-        x: Graph[K, N, E, G],
-        y: Graph[K, N, E, G],
-        node_matcher: ElementMatcher[N],
-        edge_matcher: ElementMatcher[E],
-    ) -> SearchState[K]:
-        return SearchState(
-            frozendict(),
-            frozendict(),
-            frozenset(y.nodes.keys()),
-            frozenset(y.edges.keys()),
-            frozenset(x.nodes.keys()),
-            frozenset(x.edges.keys()),
-        )
-@dataclass(slots=True, init=False)
-class init2[K, N, E, G](InitFunc[K, N, E, G]):
-    def __call__(
-        self,
-        x: Graph[K, N, E, G],
-        y: Graph[K, N, E, G],
-        node_matcher: ElementMatcher[N],
-        edge_matcher: ElementMatcher[E],
-    ) -> SearchState[K]:
-        # pre-populate the mapping with nodes/edges that only have one possible legal mapping
-        possible_node_mappings: defaultdict[K, set[K]] = defaultdict(set)
-        for y_key, x_key in itertools.product(y.nodes.keys(), x.nodes.keys()):
-            if node_matcher(x.nodes[x_key].value, y.nodes[y_key].value):
-                possible_node_mappings[y_key].add(x_key)
-        node_mapping: frozendict[K, K] = frozendict(
-            (y_key, next(iter(x_keys)))
-            for y_key, x_keys in possible_node_mappings.items()
-            if len(x_keys) == 1
-        )
-        edge_mapping: frozendict[K, K] = _induced_edge_mapping(
-            x, y, node_mapping, edge_matcher
-        )
-        return SearchState(
-            node_mapping,
-            edge_mapping,
-            frozenset(y.nodes.keys() - node_mapping.keys()),
-            frozenset(y.edges.keys() - edge_mapping.keys()),
-            frozenset(x.nodes.keys() - node_mapping.values()),
-            frozenset(x.edges.keys() - edge_mapping.values()),
-        )
 @dataclass(slots=True)
 class build[K, N, E, G](
     SearchGraphSimFunc[K, N, E, G], SimFunc[Graph[K, N, E, G], GraphSim[K]]
@@ -355,7 +280,6 @@ class build[K, N, E, G](
     heuristic_func: HeuristicFunc[K, N, E, G] = field(default_factory=h3)
     selection_func: SelectionFunc[K, N, E, G] = field(default_factory=select3)
-    init_func: InitFunc[K, N, E, G] = field(default_factory=init1)
     beam_width: int = 0
     pathlength_weight: int = 0
@@ -446,7 +370,7 @@ class build[K, N, E, G](
         node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
         open_set: list[PriorityState[K]] = []
-        best_state = self.init_func(x, y, self.node_matcher, self.edge_matcher)
+        best_state = self.init_search_state(x, y)
         heapq.heappush(open_set, PriorityState(0, best_state))
         while open_set:

{cbrkit-0.26.0 → cbrkit-0.26.1}/src/cbrkit/sim/graphs/common.py RENAMED Viewed

@@ -1,7 +1,8 @@
 import itertools
+from collections import defaultdict
 from collections.abc import Mapping, Sequence
 from dataclasses import dataclass, field
-from typing import Any, Protocol
+from typing import Any, Protocol, cast
 from frozendict import frozendict
@@ -9,10 +10,11 @@ from ...helpers import (
     batchify_sim,
     reverse_batch_positional,
     reverse_positional,
+    total_params,
     unpack_float,
 )
 from ...model.graph import Edge, Graph, Node
-from ...typing import AnySimFunc, BatchSimFunc, Float, StructuredValue
+from ...typing import AnySimFunc, BatchSimFunc, Float, SimFunc, StructuredValue
 from ..wrappers import transpose_value
 type PairSim[K] = Mapping[tuple[K, K], float]
@@ -256,7 +258,113 @@ class SearchState[K]:
     open_x_edges: frozenset[K]
+class SearchStateInit[K, N, E, G](Protocol):
+    def __call__(
+        self,
+        x: Graph[K, N, E, G],
+        y: Graph[K, N, E, G],
+        node_matcher: ElementMatcher[N],
+        edge_matcher: ElementMatcher[E],
+        /,
+    ) -> SearchState[K]: ...
+@dataclass(slots=True, frozen=True)
+class init_empty[K, N, E, G](SearchStateInit[K, N, E, G]):
+    def __call__(
+        self,
+        x: Graph[K, N, E, G],
+        y: Graph[K, N, E, G],
+        node_matcher: ElementMatcher[N],
+        edge_matcher: ElementMatcher[E],
+    ) -> SearchState[K]:
+        return SearchState(
+            frozendict(),
+            frozendict(),
+            frozenset(y.nodes.keys()),
+            frozenset(y.edges.keys()),
+            frozenset(x.nodes.keys()),
+            frozenset(x.edges.keys()),
+        )
+@dataclass(slots=True, init=False)
+class init_unique_matches[K, N, E, G](SearchStateInit[K, N, E, G]):
+    def __call__(
+        self,
+        x: Graph[K, N, E, G],
+        y: Graph[K, N, E, G],
+        node_matcher: ElementMatcher[N],
+        edge_matcher: ElementMatcher[E],
+    ) -> SearchState[K]:
+        # pre-populate the mapping with nodes/edges that only have one possible legal mapping
+        possible_node_mappings: defaultdict[K, set[K]] = defaultdict(set)
+        for y_key, x_key in itertools.product(y.nodes.keys(), x.nodes.keys()):
+            if node_matcher(x.nodes[x_key].value, y.nodes[y_key].value):
+                possible_node_mappings[y_key].add(x_key)
+        node_mapping: frozendict[K, K] = frozendict(
+            (y_key, next(iter(x_keys)))
+            for y_key, x_keys in possible_node_mappings.items()
+            if len(x_keys) == 1
+        )
+        edge_mapping: frozendict[K, K] = _induced_edge_mapping(
+            x, y, node_mapping, edge_matcher
+        )
+        return SearchState(
+            node_mapping,
+            edge_mapping,
+            frozenset(y.nodes.keys() - node_mapping.keys()),
+            frozenset(y.edges.keys() - edge_mapping.keys()),
+            frozenset(x.nodes.keys() - node_mapping.values()),
+            frozenset(x.edges.keys() - edge_mapping.values()),
+        )
+@dataclass(slots=True)
 class SearchGraphSimFunc[K, N, E, G](BaseGraphSimFunc[K, N, E, G]):
+    init_func: (
+        SearchStateInit[K, N, E, G] | AnySimFunc[Graph[K, N, E, G], GraphSim[K]]
+    ) = field(default_factory=init_unique_matches)
+    def init_search_state(
+        self, x: Graph[K, N, E, G], y: Graph[K, N, E, G]
+    ) -> SearchState[K]:
+        init_func_params = total_params(self.init_func)
+        sim: GraphSim[K]
+        if init_func_params == 4:
+            init_func = cast(SearchStateInit[K, N, E, G], self.init_func)
+            return init_func(x, y, self.node_matcher, self.edge_matcher)
+        elif init_func_params == 2:
+            init_func = cast(SimFunc[Graph[K, N, E, G], GraphSim[K]], self.init_func)
+            sim = init_func(x, y)
+        elif init_func_params == 1:
+            init_func = cast(
+                BatchSimFunc[Graph[K, N, E, G], GraphSim[K]], self.init_func
+            )
+            sim = init_func([(x, y)])[0]
+        else:
+            raise ValueError(
+                f"Invalid number of parameters for init_func: {init_func_params}"
+            )
+        return SearchState(
+            node_mapping=sim.node_mapping,
+            edge_mapping=sim.edge_mapping,
+            open_y_nodes=frozenset(y.nodes.keys() - sim.node_mapping.keys()),
+            open_y_edges=frozenset(y.edges.keys() - sim.edge_mapping.keys()),
+            open_x_nodes=frozenset(x.nodes.keys() - sim.node_mapping.values()),
+            open_x_edges=frozenset(x.edges.keys() - sim.edge_mapping.values()),
+        )
     def finished(self, state: SearchState[K]) -> bool:
         # the following condition could save a few iterations, but needs to be tested
         # return (not state.open_y_nodes and not state.open_y_edges) or (

{cbrkit-0.26.0 → cbrkit-0.26.1}/src/cbrkit/sim/graphs/greedy.py RENAMED Viewed

@@ -1,7 +1,5 @@
 from dataclasses import dataclass
-from frozendict import frozendict
 from ...helpers import (
     get_logger,
 )
@@ -41,20 +39,18 @@ class greedy[K, N, E, G](
         #     self_inv = dataclasses.replace(self, _invert=True)
         #     return self.invert_similarity(x, y, self_inv(x=y, y=x))
-        current_state = SearchState(
-            frozendict(),
-            frozendict(),
-            frozenset(y.nodes.keys()),
-            frozenset(y.edges.keys()),
-            frozenset(x.nodes.keys()),
-            frozenset(x.edges.keys()),
-        )
-        current_sim = GraphSim(
-            0.0, frozendict(), frozendict(), frozendict(), frozendict()
-        )
         node_pair_sims, edge_pair_sims = self.pair_similarities(x, y)
+        current_state = self.init_search_state(x, y)
+        current_sim = self.similarity(
+            x,
+            y,
+            current_state.node_mapping,
+            current_state.edge_mapping,
+            node_pair_sims,
+            edge_pair_sims,
+        )
         while not self.finished(current_state):
             # Iterate over all open pairs and find the best pair
             next_states: list[SearchState[K]] = []

{cbrkit-0.26.0 → cbrkit-0.26.1}/src/cbrkit/sim/wrappers.py RENAMED Viewed

@@ -1,10 +1,11 @@
 from collections import defaultdict
 from collections.abc import Callable, Mapping, MutableMapping, Sequence
-from dataclasses import dataclass, field
+from dataclasses import InitVar, dataclass, field
 from typing import Any, cast, override
 from ..helpers import batchify_sim, get_metadata, get_value, getitem_or_getattr
 from ..typing import (
+    AggregatorFunc,
     AnySimFunc,
     BatchSimFunc,
     ConversionFunc,
@@ -14,6 +15,7 @@ from ..typing import (
     SimSeq,
     StructuredValue,
 )
+from .aggregator import default_aggregator
 from .generic import static
@@ -59,6 +61,67 @@ def transpose_value[V, S: Float](
     return transpose(func, get_value)
+@dataclass(slots=True)
+class combine[V, S: Float](BatchSimFunc[V, float]):
+    """Combines multiple similarity functions into one.
+    Args:
+        sim_funcs: A list of similarity functions to be combined.
+        aggregator: A function to aggregate the results from the similarity functions.
+    Returns:
+        A similarity function that combines the results from multiple similarity functions.
+    """
+    sim_funcs: InitVar[Sequence[AnySimFunc[V, S]] | Mapping[str, AnySimFunc[V, S]]]
+    aggregator: AggregatorFunc[str, S] = default_aggregator
+    batch_sim_funcs: Sequence[BatchSimFunc[V, S]] | Mapping[str, BatchSimFunc[V, S]] = (
+        field(init=False, repr=False)
+    )
+    def __post_init__(
+        self, sim_funcs: Sequence[AnySimFunc[V, S]] | Mapping[str, AnySimFunc[V, S]]
+    ):
+        if isinstance(sim_funcs, Sequence):
+            self.batch_sim_funcs = [batchify_sim(func) for func in sim_funcs]
+        elif isinstance(sim_funcs, Mapping):
+            self.batch_sim_funcs = {
+                key: batchify_sim(func) for key, func in sim_funcs.items()
+            }
+        else:
+            raise ValueError(f"Invalid sim_funcs type: {type(sim_funcs)}")
+    @override
+    def __call__(self, batches: Sequence[tuple[V, V]]) -> Sequence[float]:
+        if isinstance(self.batch_sim_funcs, Sequence):
+            func_results = [func(batches) for func in self.batch_sim_funcs]
+            return [
+                self.aggregator(
+                    [batch_results[batch_idx] for batch_results in func_results]
+                )
+                for batch_idx in range(len(batches))
+            ]
+        elif isinstance(self.batch_sim_funcs, Mapping):
+            func_results = {
+                func_key: func(batches)
+                for func_key, func in self.batch_sim_funcs.items()
+            }
+            return [
+                self.aggregator(
+                    {
+                        func_key: batch_results[batch_idx]
+                        for func_key, batch_results in func_results.items()
+                    }
+                )
+                for batch_idx in range(len(batches))
+            ]
+        raise ValueError(f"Invalid batch_sim_funcs type: {type(self.batch_sim_funcs)}")
 @dataclass(slots=True)
 class cache[V, U, S: Float](BatchSimFunc[V, S]):
     similarity_func: BatchSimFunc[V, S]