cbrkit 0.26.3__tar.gz → 0.26.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {cbrkit-0.26.3 → cbrkit-0.26.5}/PKG-INFO +1 -1
  2. {cbrkit-0.26.3 → cbrkit-0.26.5}/pyproject.toml +1 -1
  3. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/eval/common.py +1 -1
  4. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/eval/retrieval.py +2 -4
  5. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/helpers.py +26 -0
  6. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/astar.py +44 -52
  7. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/common.py +42 -5
  8. {cbrkit-0.26.3 → cbrkit-0.26.5}/README.md +0 -0
  9. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/__init__.py +0 -0
  10. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/__main__.py +0 -0
  11. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/adapt/__init__.py +0 -0
  12. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/adapt/attribute_value.py +0 -0
  13. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/adapt/generic.py +0 -0
  14. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/adapt/numbers.py +0 -0
  15. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/adapt/strings.py +0 -0
  16. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/api.py +0 -0
  17. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/cli.py +0 -0
  18. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/constants.py +0 -0
  19. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/cycle.py +0 -0
  20. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/dumpers.py +0 -0
  21. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/eval/__init__.py +0 -0
  22. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/loaders.py +0 -0
  23. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/model/__init__.py +0 -0
  24. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/model/graph.py +0 -0
  25. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/model/result.py +0 -0
  26. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/py.typed +0 -0
  27. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/retrieval/__init__.py +0 -0
  28. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/retrieval/apply.py +0 -0
  29. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/retrieval/build.py +0 -0
  30. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/retrieval/rerank.py +0 -0
  31. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/reuse/__init__.py +0 -0
  32. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/reuse/apply.py +0 -0
  33. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/reuse/build.py +0 -0
  34. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/__init__.py +0 -0
  35. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/aggregator.py +0 -0
  36. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/attribute_value.py +0 -0
  37. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/collections.py +0 -0
  38. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/embed.py +0 -0
  39. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/generic.py +0 -0
  40. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/__init__.py +0 -0
  41. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/alignment.py +0 -0
  42. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/brute_force.py +0 -0
  43. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/dfs.py +0 -0
  44. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/greedy.py +0 -0
  45. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/lap.py +0 -0
  46. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/precompute.py +0 -0
  47. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/qap.py +0 -0
  48. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/graphs/vf2.py +0 -0
  49. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/numbers.py +0 -0
  50. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/strings.py +0 -0
  51. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/taxonomy.py +0 -0
  52. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/sim/wrappers.py +0 -0
  53. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/__init__.py +0 -0
  54. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/apply.py +0 -0
  55. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/build.py +0 -0
  56. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/model.py +0 -0
  57. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/prompts.py +0 -0
  58. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/providers/__init__.py +0 -0
  59. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/providers/anthropic.py +0 -0
  60. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/providers/cohere.py +0 -0
  61. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/providers/google.py +0 -0
  62. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/providers/instructor.py +0 -0
  63. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/providers/model.py +0 -0
  64. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/providers/ollama.py +0 -0
  65. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/providers/openai.py +0 -0
  66. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/synthesis/providers/wrappers.py +0 -0
  67. {cbrkit-0.26.3 → cbrkit-0.26.5}/src/cbrkit/typing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cbrkit
3
- Version: 0.26.3
3
+ Version: 0.26.5
4
4
  Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
5
5
  Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
6
6
  Author: Mirko Lenz
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cbrkit"
3
- version = "0.26.3"
3
+ version = "0.26.5"
4
4
  description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
5
5
  authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
6
6
  readme = "README.md"
@@ -244,7 +244,7 @@ def kendall_tau(
244
244
  qrel_relevant = {k for k, v in qrels[key].items() if v >= relevance_level}
245
245
  sorted_qrel_relevant = sorted(qrel_relevant, key=lambda x: qrels[key][x])
246
246
 
247
- sorted_run = sorted(run.keys(), key=lambda x: run[key][x], reverse=True)
247
+ sorted_run = sorted(run[key].keys(), key=lambda x: run[key][x], reverse=True)
248
248
  run_k = sorted_run[: k if k > 0 else len(sorted_run)]
249
249
 
250
250
  max_idx = min(len(run_k), len(sorted_qrel_relevant))
@@ -1,7 +1,7 @@
1
1
  from collections.abc import Sequence
2
2
  from typing import Any, Literal
3
3
 
4
- from ..helpers import round, scale, unpack_float
4
+ from ..helpers import normalize_and_scale, round, unpack_float
5
5
  from ..retrieval import Result, ResultStep
6
6
  from ..typing import EvalMetricFunc, Float, QueryCaseMatrix
7
7
  from .common import DEFAULT_METRICS, compute
@@ -65,12 +65,10 @@ def retrieval_step_to_qrels[Q, C, S: Float](
65
65
  min_sim = 0.0
66
66
  max_sim = 1.0
67
67
 
68
- qrel_factor = max_qrel - min_qrel
69
-
70
68
  return {
71
69
  query: {
72
70
  case: round(
73
- scale(sim, min_sim, max_sim) * qrel_factor + min_qrel,
71
+ normalize_and_scale(sim, min_sim, max_sim, min_qrel, max_qrel),
74
72
  round_mode,
75
73
  )
76
74
  for case, sim in entry.items()
@@ -71,6 +71,8 @@ __all__ = [
71
71
  "load_callables_map",
72
72
  "load_callables",
73
73
  "load_object",
74
+ "normalize",
75
+ "normalize_and_scale",
74
76
  "log_batch",
75
77
  "mp_count",
76
78
  "mp_map",
@@ -605,6 +607,30 @@ def scale(value: float, lower: float, upper: float) -> float:
605
607
  return value * (upper - lower) + lower
606
608
 
607
609
 
610
+ def normalize(value: float, value_min: float, value_max: float) -> float:
611
+ """Normalize a value from [value_min, value_max] to [0, 1]."""
612
+ if value_max == value_min:
613
+ # Handle edge case where all values are identical
614
+ return 0.0
615
+
616
+ return (value - value_min) / (value_max - value_min)
617
+
618
+
619
+ def normalize_and_scale(
620
+ value: float,
621
+ value_min: float,
622
+ value_max: float,
623
+ target_min: float,
624
+ target_max: float,
625
+ ) -> float:
626
+ """Normalize a value from [value_min, value_max] to [target_min, target_max]."""
627
+ # First normalize to [0, 1]
628
+ normalized = normalize(value, value_min, value_max)
629
+
630
+ # Then scale to target range
631
+ return scale(normalized, target_min, target_max)
632
+
633
+
608
634
  def load_object(import_name: str) -> Any:
609
635
  """Import an object based on a string.
610
636
 
@@ -13,11 +13,7 @@ from ...model.graph import (
13
13
  Node,
14
14
  )
15
15
  from ...typing import SimFunc
16
- from .common import (
17
- GraphSim,
18
- SearchGraphSimFunc,
19
- SearchState,
20
- )
16
+ from .common import GraphSim, SearchGraphSimFunc, SearchState, next_elem, sorted_iter
21
17
 
22
18
  __all__ = [
23
19
  "HeuristicFunc",
@@ -157,15 +153,11 @@ class select1[K, N, E, G](SelectionFunc[K, N, E, G]):
157
153
  ) -> None | tuple[K, GraphElementType]:
158
154
  """Select the next node or edge to be mapped"""
159
155
 
160
- try:
161
- return next(iter(s.open_y_nodes)), "node"
162
- except StopIteration:
163
- pass
156
+ if s.open_y_nodes:
157
+ return next_elem(s.open_y_nodes), "node"
164
158
 
165
- try:
166
- return next(iter(s.open_y_edges)), "edge"
167
- except StopIteration:
168
- pass
159
+ if s.open_y_edges:
160
+ return next_elem(s.open_y_edges), "edge"
169
161
 
170
162
  return None
171
163
 
@@ -182,20 +174,18 @@ class select2[K, N, E, G](SelectionFunc[K, N, E, G]):
182
174
  ) -> None | tuple[K, GraphElementType]:
183
175
  """Select the next node or edge to be mapped"""
184
176
 
185
- try:
186
- return next(
187
- key
188
- for key in s.open_y_edges
189
- if y.edges[key].source.key not in s.open_y_nodes
190
- and y.edges[key].target.key not in s.open_y_nodes
191
- ), "edge"
192
- except StopIteration:
193
- pass
194
-
195
- try:
196
- return next(iter(s.open_y_nodes)), "node"
197
- except StopIteration:
198
- pass
177
+ edge_candidates = {
178
+ key
179
+ for key in sorted_iter(s.open_y_edges)
180
+ if y.edges[key].source.key not in s.open_y_nodes
181
+ and y.edges[key].target.key not in s.open_y_nodes
182
+ }
183
+
184
+ if edge_candidates:
185
+ return next_elem(edge_candidates), "edge"
186
+
187
+ if s.open_y_nodes:
188
+ return next_elem(s.open_y_nodes), "node"
199
189
 
200
190
  return None
201
191
 
@@ -246,21 +236,33 @@ class select3[K, N, E, G](SelectionFunc[K, N, E, G]):
246
236
 
247
237
  if not heuristic_scores:
248
238
  # Fallback: select any remaining node or edge for null mapping
239
+ # Use sorted to ensure deterministic selection
249
240
  if s.open_y_nodes:
250
- return next(iter(s.open_y_nodes)), "node"
241
+ return next_elem(s.open_y_nodes), "node"
251
242
  elif s.open_y_edges:
252
- return next(iter(s.open_y_edges)), "edge"
243
+ return next_elem(s.open_y_edges), "edge"
253
244
  return None
254
245
 
246
+ # Find the maximum heuristic score
255
247
  max_score = max(heuristic_scores.values())
256
- best_selections = [
248
+ best_selections = {
257
249
  key for key, value in heuristic_scores.items() if value == max_score
258
- ]
250
+ }
259
251
 
260
252
  # if multiple selections have the same score, select the one with the lowest number of possible mappings
261
- best_selection = min(best_selections, key=lambda key: mapping_options[key])
262
-
263
- selection_key, selection_type = best_selection
253
+ if len(best_selections) > 1:
254
+ min_mapping_options = min(mapping_options[key] for key in best_selections)
255
+ best_selections = {
256
+ key
257
+ for key in best_selections
258
+ if mapping_options[key] == min_mapping_options
259
+ }
260
+
261
+ # select the one with the lowest key
262
+ selection_key, selection_type = next_elem(
263
+ best_selections,
264
+ key=lambda item: item[0],
265
+ )
264
266
 
265
267
  if selection_type == "edge":
266
268
  edge = y.edges[selection_key]
@@ -290,7 +292,7 @@ class build[K, N, E, G](
290
292
  beam_width: Limits the queue size which prunes the search space.
291
293
  This leads to a faster search and less memory usage but also introduces a similarity error.
292
294
  Disabled by default. Based on [Neuhaus et al. (2006)](https://doi.org/10.1007/11815921_17).
293
- pathlength_weight: Add a penalty for states with few mapped elements that already have a low similarity.
295
+ pathlength_weight: Favor long partial edit paths over shorter ones.
294
296
  Disabled by default. Based on [Neuhaus et al. (2006)](https://doi.org/10.1007/11815921_17).
295
297
 
296
298
  Returns:
@@ -356,22 +358,11 @@ class build[K, N, E, G](
356
358
  prio = 1 - (past_sim + future_sim)
357
359
 
358
360
  if self.pathlength_weight > 0:
359
- node_null_mapping = (
360
- set(y.nodes.keys())
361
- - set(state.node_mapping.keys())
362
- - set(state.open_y_nodes)
363
- )
364
- edge_null_mapping = (
365
- set(y.edges.keys())
366
- - set(state.edge_mapping.keys())
367
- - set(state.open_y_edges)
368
- )
369
- num_paths = (
370
- len(state.node_mapping)
371
- + len(state.edge_mapping)
372
- + len(node_null_mapping)
373
- + len(edge_null_mapping)
374
- )
361
+ # Calculate the number of mapping decisions made so far (partial edit path length)
362
+ # This includes actual mappings plus null mappings (elements processed but not mapped)
363
+ total_y_elements = len(y.nodes) + len(y.edges)
364
+ open_y_elements = len(state.open_y_nodes) + len(state.open_y_edges)
365
+ num_paths = total_y_elements - open_y_elements
375
366
  return prio / (self.pathlength_weight**num_paths)
376
367
 
377
368
  return prio
@@ -432,7 +423,8 @@ class build[K, N, E, G](
432
423
  heapq.heappush(open_set, PriorityState(next_prio, next_state))
433
424
 
434
425
  if self.beam_width > 0 and len(open_set) > self.beam_width:
435
- open_set = open_set[: self.beam_width]
426
+ open_set = heapq.nsmallest(self.beam_width, open_set)
427
+ heapq.heapify(open_set)
436
428
 
437
429
  return self.similarity(
438
430
  x,
@@ -1,6 +1,6 @@
1
1
  import itertools
2
2
  from collections import defaultdict
3
- from collections.abc import Mapping, Sequence
3
+ from collections.abc import Callable, Collection, Iterable, Mapping, Sequence
4
4
  from dataclasses import dataclass, field
5
5
  from typing import Any, Protocol, cast
6
6
 
@@ -247,6 +247,44 @@ class SearchState[K]:
247
247
  open_x_edges: frozenset[K]
248
248
 
249
249
 
250
+ def sorted_iter[K](iterable: Iterable[K]) -> Iterable[K]:
251
+ """Sort an iterable if possible, otherwise return it unchanged."""
252
+ try:
253
+ return sorted(cast(Iterable[Any], iterable))
254
+ except TypeError:
255
+ return iterable
256
+
257
+
258
+ def next_elem[K](
259
+ elements: Collection[K],
260
+ key: Callable[[K], Any] | None = None,
261
+ ) -> K:
262
+ """Select the next element from a set deterministically.
263
+
264
+ If elements are sortable, returns the smallest one.
265
+ Otherwise, returns the first element from iteration.
266
+
267
+ Args:
268
+ elements: Set of elements to choose from
269
+
270
+ Returns:
271
+ A single element from the set
272
+
273
+ Raises:
274
+ ValueError: If the set is empty
275
+ """
276
+ if not elements:
277
+ raise ValueError("Cannot select from empty set")
278
+
279
+ if len(elements) == 1:
280
+ return next(iter(elements))
281
+
282
+ try:
283
+ return min(cast(Iterable[Any], elements), key=key)
284
+ except TypeError:
285
+ return next(iter(elements))
286
+
287
+
250
288
  class SearchStateInit[K, N, E, G](Protocol):
251
289
  def __call__(
252
290
  self,
@@ -413,7 +451,7 @@ class SearchGraphSimFunc[K, N, E, G](BaseGraphSimFunc[K, N, E, G]):
413
451
  state.open_x_nodes - {x_key},
414
452
  state.open_x_edges,
415
453
  )
416
- for x_key in state.open_x_nodes
454
+ for x_key in sorted_iter(state.open_x_nodes)
417
455
  if self.legal_node_mapping(x, y, state, x_key, y_key)
418
456
  ]
419
457
 
@@ -447,7 +485,7 @@ class SearchGraphSimFunc[K, N, E, G](BaseGraphSimFunc[K, N, E, G]):
447
485
  state.open_x_nodes,
448
486
  state.open_x_edges - {x_key},
449
487
  )
450
- for x_key in state.open_x_edges
488
+ for x_key in sorted_iter(state.open_x_edges)
451
489
  if self.legal_edge_mapping(x, y, state, x_key, y_key)
452
490
  ]
453
491
 
@@ -473,10 +511,9 @@ class SearchGraphSimFunc[K, N, E, G](BaseGraphSimFunc[K, N, E, G]):
473
511
  y_key: K,
474
512
  ) -> list[SearchState[K]]:
475
513
  """Expand a given edge and map its source/target node if not already mapped"""
476
-
477
514
  next_states: list[SearchState[K]] = []
478
515
 
479
- for x_key in state.open_x_edges:
516
+ for x_key in sorted_iter(state.open_x_edges):
480
517
  next_state = state
481
518
  x_source_key = x.edges[x_key].source.key
482
519
  x_target_key = x.edges[x_key].target.key
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes