PyPI - cbrkit - Versions diffs - 0.26.3__tar.gz → 0.26.5__tar.gz - Mend

cbrkit 0.26.3tar.gz → 0.26.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{cbrkit-0.26.3 → cbrkit-0.26.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: cbrkit
-Version: 0.26.3
+Version: 0.26.5
 Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
 Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
 Author: Mirko Lenz

{cbrkit-0.26.3 → cbrkit-0.26.5}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "cbrkit"
-version = "0.26.3"
+version = "0.26.5"
 description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
 authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
 readme = "README.md"

{cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/eval/common.py RENAMED Viewed

@@ -244,7 +244,7 @@ def kendall_tau(
         qrel_relevant = {k for k, v in qrels[key].items() if v >= relevance_level}
         sorted_qrel_relevant = sorted(qrel_relevant, key=lambda x: qrels[key][x])
-        sorted_run = sorted(run.keys(), key=lambda x: run[key][x], reverse=True)
+        sorted_run = sorted(run[key].keys(), key=lambda x: run[key][x], reverse=True)
         run_k = sorted_run[: k if k > 0 else len(sorted_run)]
         max_idx = min(len(run_k), len(sorted_qrel_relevant))

{cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/eval/retrieval.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from collections.abc import Sequence
 from typing import Any, Literal
-from ..helpers import round, scale, unpack_float
+from ..helpers import normalize_and_scale, round, unpack_float
 from ..retrieval import Result, ResultStep
 from ..typing import EvalMetricFunc, Float, QueryCaseMatrix
 from .common import DEFAULT_METRICS, compute
@@ -65,12 +65,10 @@ def retrieval_step_to_qrels[Q, C, S: Float](
         min_sim = 0.0
         max_sim = 1.0
-    qrel_factor = max_qrel - min_qrel
     return {
         query: {
             case: round(
-                scale(sim, min_sim, max_sim) * qrel_factor + min_qrel,
+                normalize_and_scale(sim, min_sim, max_sim, min_qrel, max_qrel),
                 round_mode,
             )
             for case, sim in entry.items()

{cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/helpers.py RENAMED Viewed

@@ -71,6 +71,8 @@ __all__ = [
     "load_callables_map",
     "load_callables",
     "load_object",
+    "normalize",
+    "normalize_and_scale",
     "log_batch",
     "mp_count",
     "mp_map",
@@ -605,6 +607,30 @@ def scale(value: float, lower: float, upper: float) -> float:
     return value * (upper - lower) + lower
+def normalize(value: float, value_min: float, value_max: float) -> float:
+    """Normalize a value from [value_min, value_max] to [0, 1]."""
+    if value_max == value_min:
+        # Handle edge case where all values are identical
+        return 0.0
+    return (value - value_min) / (value_max - value_min)
+def normalize_and_scale(
+    value: float,
+    value_min: float,
+    value_max: float,
+    target_min: float,
+    target_max: float,
+) -> float:
+    """Normalize a value from [value_min, value_max] to [target_min, target_max]."""
+    # First normalize to [0, 1]
+    normalized = normalize(value, value_min, value_max)
+    # Then scale to target range
+    return scale(normalized, target_min, target_max)
 def load_object(import_name: str) -> Any:
     """Import an object based on a string.

{cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/astar.py RENAMED Viewed

@@ -13,11 +13,7 @@ from ...model.graph import (
     Node,
 )
 from ...typing import SimFunc
-from .common import (
-    GraphSim,
-    SearchGraphSimFunc,
-    SearchState,
-)
+from .common import GraphSim, SearchGraphSimFunc, SearchState, next_elem, sorted_iter
 __all__ = [
     "HeuristicFunc",
@@ -157,15 +153,11 @@ class select1[K, N, E, G](SelectionFunc[K, N, E, G]):
     ) -> None | tuple[K, GraphElementType]:
         """Select the next node or edge to be mapped"""
-        try:
-            return next(iter(s.open_y_nodes)), "node"
-        except StopIteration:
-            pass
+        if s.open_y_nodes:
+            return next_elem(s.open_y_nodes), "node"
-        try:
-            return next(iter(s.open_y_edges)), "edge"
-        except StopIteration:
-            pass
+        if s.open_y_edges:
+            return next_elem(s.open_y_edges), "edge"
         return None
@@ -182,20 +174,18 @@ class select2[K, N, E, G](SelectionFunc[K, N, E, G]):
     ) -> None | tuple[K, GraphElementType]:
         """Select the next node or edge to be mapped"""
-        try:
-            return next(
-                key
-                for key in s.open_y_edges
-                if y.edges[key].source.key not in s.open_y_nodes
-                and y.edges[key].target.key not in s.open_y_nodes
-            ), "edge"
-        except StopIteration:
-            pass
-        try:
-            return next(iter(s.open_y_nodes)), "node"
-        except StopIteration:
-            pass
+        edge_candidates = {
+            key
+            for key in sorted_iter(s.open_y_edges)
+            if y.edges[key].source.key not in s.open_y_nodes
+            and y.edges[key].target.key not in s.open_y_nodes
+        }
+        if edge_candidates:
+            return next_elem(edge_candidates), "edge"
+        if s.open_y_nodes:
+            return next_elem(s.open_y_nodes), "node"
         return None
@@ -246,21 +236,33 @@ class select3[K, N, E, G](SelectionFunc[K, N, E, G]):
         if not heuristic_scores:
             # Fallback: select any remaining node or edge for null mapping
+            # Use sorted to ensure deterministic selection
             if s.open_y_nodes:
-                return next(iter(s.open_y_nodes)), "node"
+                return next_elem(s.open_y_nodes), "node"
             elif s.open_y_edges:
-                return next(iter(s.open_y_edges)), "edge"
+                return next_elem(s.open_y_edges), "edge"
             return None
+        # Find the maximum heuristic score
         max_score = max(heuristic_scores.values())
-        best_selections = [
+        best_selections = {
             key for key, value in heuristic_scores.items() if value == max_score
-        ]
+        }
         # if multiple selections have the same score, select the one with the lowest number of possible mappings
-        best_selection = min(best_selections, key=lambda key: mapping_options[key])
-        selection_key, selection_type = best_selection
+        if len(best_selections) > 1:
+            min_mapping_options = min(mapping_options[key] for key in best_selections)
+            best_selections = {
+                key
+                for key in best_selections
+                if mapping_options[key] == min_mapping_options
+            }
+        # select the one with the lowest key
+        selection_key, selection_type = next_elem(
+            best_selections,
+            key=lambda item: item[0],
+        )
         if selection_type == "edge":
             edge = y.edges[selection_key]
@@ -290,7 +292,7 @@ class build[K, N, E, G](
         beam_width: Limits the queue size which prunes the search space.
             This leads to a faster search and less memory usage but also introduces a similarity error.
             Disabled by default. Based on [Neuhaus et al. (2006)](https://doi.org/10.1007/11815921_17).
-        pathlength_weight: Add a penalty for states with few mapped elements that already have a low similarity.
+        pathlength_weight: Favor long partial edit paths over shorter ones.
             Disabled by default. Based on [Neuhaus et al. (2006)](https://doi.org/10.1007/11815921_17).
     Returns:
@@ -356,22 +358,11 @@ class build[K, N, E, G](
         prio = 1 - (past_sim + future_sim)
         if self.pathlength_weight > 0:
-            node_null_mapping = (
-                set(y.nodes.keys())
-                - set(state.node_mapping.keys())
-                - set(state.open_y_nodes)
-            )
-            edge_null_mapping = (
-                set(y.edges.keys())
-                - set(state.edge_mapping.keys())
-                - set(state.open_y_edges)
-            )
-            num_paths = (
-                len(state.node_mapping)
-                + len(state.edge_mapping)
-                + len(node_null_mapping)
-                + len(edge_null_mapping)
-            )
+            # Calculate the number of mapping decisions made so far (partial edit path length)
+            # This includes actual mappings plus null mappings (elements processed but not mapped)
+            total_y_elements = len(y.nodes) + len(y.edges)
+            open_y_elements = len(state.open_y_nodes) + len(state.open_y_edges)
+            num_paths = total_y_elements - open_y_elements
             return prio / (self.pathlength_weight**num_paths)
         return prio
@@ -432,7 +423,8 @@ class build[K, N, E, G](
                 heapq.heappush(open_set, PriorityState(next_prio, next_state))
             if self.beam_width > 0 and len(open_set) > self.beam_width:
-                open_set = open_set[: self.beam_width]
+                open_set = heapq.nsmallest(self.beam_width, open_set)
+                heapq.heapify(open_set)
         return self.similarity(
             x,

{cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/common.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import itertools
 from collections import defaultdict
-from collections.abc import Mapping, Sequence
+from collections.abc import Callable, Collection, Iterable, Mapping, Sequence
 from dataclasses import dataclass, field
 from typing import Any, Protocol, cast
@@ -247,6 +247,44 @@ class SearchState[K]:
     open_x_edges: frozenset[K]
+def sorted_iter[K](iterable: Iterable[K]) -> Iterable[K]:
+    """Sort an iterable if possible, otherwise return it unchanged."""
+    try:
+        return sorted(cast(Iterable[Any], iterable))
+    except TypeError:
+        return iterable
+def next_elem[K](
+    elements: Collection[K],
+    key: Callable[[K], Any] | None = None,
+) -> K:
+    """Select the next element from a set deterministically.
+    If elements are sortable, returns the smallest one.
+    Otherwise, returns the first element from iteration.
+    Args:
+        elements: Set of elements to choose from
+    Returns:
+        A single element from the set
+    Raises:
+        ValueError: If the set is empty
+    """
+    if not elements:
+        raise ValueError("Cannot select from empty set")
+    if len(elements) == 1:
+        return next(iter(elements))
+    try:
+        return min(cast(Iterable[Any], elements), key=key)
+    except TypeError:
+        return next(iter(elements))
 class SearchStateInit[K, N, E, G](Protocol):
     def __call__(
         self,
@@ -413,7 +451,7 @@ class SearchGraphSimFunc[K, N, E, G](BaseGraphSimFunc[K, N, E, G]):
                 state.open_x_nodes - {x_key},
                 state.open_x_edges,
             )
-            for x_key in state.open_x_nodes
+            for x_key in sorted_iter(state.open_x_nodes)
             if self.legal_node_mapping(x, y, state, x_key, y_key)
         ]
@@ -447,7 +485,7 @@ class SearchGraphSimFunc[K, N, E, G](BaseGraphSimFunc[K, N, E, G]):
                 state.open_x_nodes,
                 state.open_x_edges - {x_key},
             )
-            for x_key in state.open_x_edges
+            for x_key in sorted_iter(state.open_x_edges)
             if self.legal_edge_mapping(x, y, state, x_key, y_key)
         ]
@@ -473,10 +511,9 @@ class SearchGraphSimFunc[K, N, E, G](BaseGraphSimFunc[K, N, E, G]):
         y_key: K,
     ) -> list[SearchState[K]]:
         """Expand a given edge and map its source/target node if not already mapped"""
         next_states: list[SearchState[K]] = []
-        for x_key in state.open_x_edges:
+        for x_key in sorted_iter(state.open_x_edges):
             next_state = state
             x_source_key = x.edges[x_key].source.key
             x_target_key = x.edges[x_key].target.key