cbrkit 0.28.2__tar.gz → 0.28.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {cbrkit-0.28.2 → cbrkit-0.28.3}/PKG-INFO +1 -1
  2. {cbrkit-0.28.2 → cbrkit-0.28.3}/pyproject.toml +1 -1
  3. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/eval/__init__.py +8 -1
  4. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/eval/common.py +48 -3
  5. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/eval/retrieval.py +10 -29
  6. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/embed.py +1 -1
  7. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/system.py +41 -50
  8. {cbrkit-0.28.2 → cbrkit-0.28.3}/README.md +0 -0
  9. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/__init__.py +0 -0
  10. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/__main__.py +0 -0
  11. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/adapt/__init__.py +0 -0
  12. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/adapt/attribute_value.py +0 -0
  13. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/adapt/generic.py +0 -0
  14. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/adapt/numbers.py +0 -0
  15. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/adapt/strings.py +0 -0
  16. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/api.py +0 -0
  17. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/cli.py +0 -0
  18. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/constants.py +0 -0
  19. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/cycle.py +0 -0
  20. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/dumpers.py +0 -0
  21. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/helpers.py +0 -0
  22. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/loaders.py +0 -0
  23. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/model/__init__.py +0 -0
  24. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/model/graph.py +0 -0
  25. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/model/result.py +0 -0
  26. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/py.typed +0 -0
  27. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/retrieval/__init__.py +0 -0
  28. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/retrieval/apply.py +0 -0
  29. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/retrieval/build.py +0 -0
  30. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/retrieval/rerank.py +0 -0
  31. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/reuse/__init__.py +0 -0
  32. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/reuse/apply.py +0 -0
  33. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/reuse/build.py +0 -0
  34. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/__init__.py +0 -0
  35. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/aggregator.py +0 -0
  36. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/attribute_value.py +0 -0
  37. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/collections.py +0 -0
  38. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/generic.py +0 -0
  39. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/__init__.py +0 -0
  40. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/alignment.py +0 -0
  41. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/astar.py +0 -0
  42. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/brute_force.py +0 -0
  43. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/common.py +0 -0
  44. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/dfs.py +0 -0
  45. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/greedy.py +0 -0
  46. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/lap.py +0 -0
  47. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/precompute.py +0 -0
  48. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/qap.py +0 -0
  49. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/graphs/vf2.py +0 -0
  50. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/numbers.py +0 -0
  51. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/pooling.py +0 -0
  52. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/strings.py +0 -0
  53. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/taxonomy.py +0 -0
  54. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/sim/wrappers.py +0 -0
  55. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/__init__.py +0 -0
  56. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/apply.py +0 -0
  57. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/build.py +0 -0
  58. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/model.py +0 -0
  59. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/prompts.py +0 -0
  60. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/__init__.py +0 -0
  61. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/anthropic.py +0 -0
  62. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/cohere.py +0 -0
  63. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/google.py +0 -0
  64. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/instructor.py +0 -0
  65. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/model.py +0 -0
  66. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/ollama.py +0 -0
  67. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/openai.py +0 -0
  68. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/openai_agents.py +0 -0
  69. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/pydantic_ai.py +0 -0
  70. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/synthesis/providers/wrappers.py +0 -0
  71. {cbrkit-0.28.2 → cbrkit-0.28.3}/src/cbrkit/typing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cbrkit
3
- Version: 0.28.2
3
+ Version: 0.28.3
4
4
  Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
5
5
  Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
6
6
  Author: Mirko Lenz
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cbrkit"
3
- version = "0.28.2"
3
+ version = "0.28.3"
4
4
  description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
5
5
  authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
6
6
  readme = "README.md"
@@ -4,7 +4,13 @@ Please refer to the official documentation for more information on the available
4
4
  <https://amenra.github.io/ranx/metrics/>
5
5
  """
6
6
 
7
- from .common import compute, compute_score_metrics, generate_metrics, parse_metric
7
+ from .common import (
8
+ compute,
9
+ compute_score_metrics,
10
+ generate_metrics,
11
+ parse_metric,
12
+ similarities_to_qrels,
13
+ )
8
14
  from .retrieval import (
9
15
  retrieval,
10
16
  retrieval_step,
@@ -17,6 +23,7 @@ __all__ = [
17
23
  "generate_metrics",
18
24
  "parse_metric",
19
25
  "compute_score_metrics",
26
+ "similarities_to_qrels",
20
27
  "retrieval",
21
28
  "retrieval_step",
22
29
  "retrieval_step_to_qrels",
@@ -2,9 +2,16 @@ import itertools
2
2
  import statistics
3
3
  import warnings
4
4
  from collections.abc import Callable, Iterable, Mapping, Sequence
5
- from typing import Any, cast
6
-
7
- from ..helpers import get_logger, unpack_float, unpack_floats
5
+ from typing import Any, Literal, cast
6
+
7
+ from ..helpers import (
8
+ get_logger,
9
+ normalize_and_scale,
10
+ round,
11
+ sim_map2ranking,
12
+ unpack_float,
13
+ unpack_floats,
14
+ )
8
15
  from ..typing import ConversionFunc, EvalMetricFunc, Float, QueryCaseMatrix
9
16
 
10
17
  logger = get_logger(__name__)
@@ -388,3 +395,41 @@ def generate_metrics(
388
395
  generate_metric(*args)
389
396
  for args in itertools.product(metrics, ks, relevance_levels)
390
397
  ]
398
+
399
+
400
+ def similarities_to_qrels[Q, C](
401
+ similarities: QueryCaseMatrix[Q, C, float],
402
+ max_qrel: int | None = None,
403
+ min_qrel: int = 1,
404
+ round_mode: Literal["floor", "ceil", "nearest"] = "nearest",
405
+ auto_scale: bool = True,
406
+ ) -> QueryCaseMatrix[Q, C, int]:
407
+ if max_qrel is None:
408
+ return {
409
+ query: {
410
+ case: rank
411
+ for rank, case in enumerate(
412
+ reversed(sim_map2ranking(case_sims)),
413
+ start=min_qrel,
414
+ )
415
+ }
416
+ for query, case_sims in similarities.items()
417
+ }
418
+
419
+ if auto_scale:
420
+ min_sim = min(min(entries.values()) for entries in similarities.values())
421
+ max_sim = max(max(entries.values()) for entries in similarities.values())
422
+ else:
423
+ min_sim = 0.0
424
+ max_sim = 1.0
425
+
426
+ return {
427
+ query: {
428
+ case: round(
429
+ normalize_and_scale(sim, min_sim, max_sim, min_qrel, max_qrel),
430
+ round_mode,
431
+ )
432
+ for case, sim in case_sims.items()
433
+ }
434
+ for query, case_sims in similarities.items()
435
+ }
@@ -1,10 +1,10 @@
1
1
  from collections.abc import Sequence
2
2
  from typing import Any, Literal
3
3
 
4
- from ..helpers import normalize_and_scale, round, unpack_float
4
+ from ..helpers import unpack_float
5
5
  from ..retrieval import Result, ResultStep
6
6
  from ..typing import EvalMetricFunc, Float, QueryCaseMatrix
7
- from .common import DEFAULT_METRICS, compute
7
+ from .common import DEFAULT_METRICS, compute, similarities_to_qrels
8
8
 
9
9
 
10
10
  def retrieval_step[Q, C, S: Float](
@@ -45,36 +45,17 @@ def retrieval_step_to_qrels[Q, C, S: Float](
45
45
  round_mode: Literal["floor", "ceil", "nearest"] = "nearest",
46
46
  auto_scale: bool = True,
47
47
  ) -> QueryCaseMatrix[Q, C, int]:
48
- if max_qrel is None:
49
- return {
50
- query: {
51
- case: rank
52
- for rank, case in enumerate(reversed(entry.ranking), start=min_qrel)
53
- }
54
- for query, entry in result.queries.items()
55
- }
56
-
57
- sims = {
48
+ unpacked_sims = {
58
49
  query: {case: unpack_float(value) for case, value in entry.similarities.items()}
59
50
  for query, entry in result.queries.items()
60
51
  }
61
- if auto_scale:
62
- min_sim = min(min(entries.values()) for entries in sims.values())
63
- max_sim = max(max(entries.values()) for entries in sims.values())
64
- else:
65
- min_sim = 0.0
66
- max_sim = 1.0
67
-
68
- return {
69
- query: {
70
- case: round(
71
- normalize_and_scale(sim, min_sim, max_sim, min_qrel, max_qrel),
72
- round_mode,
73
- )
74
- for case, sim in entry.items()
75
- }
76
- for query, entry in sims.items()
77
- }
52
+ return similarities_to_qrels(
53
+ unpacked_sims,
54
+ max_qrel,
55
+ min_qrel,
56
+ round_mode,
57
+ auto_scale,
58
+ )
78
59
 
79
60
 
80
61
  def retrieval_to_qrels[Q, C, S: Float](
@@ -197,7 +197,7 @@ class cache(BatchConversionFunc[str, NumpyArray]):
197
197
  id INTEGER PRIMARY KEY AUTOINCREMENT,
198
198
  text TEXT NOT NULL UNIQUE,
199
199
  vector BLOB NOT NULL
200
- )
200
+ ) STRICT
201
201
  """)
202
202
  con.commit()
203
203
 
@@ -1,11 +1,10 @@
1
- from collections.abc import Callable, Mapping, Sequence
2
- from dataclasses import dataclass, field
1
+ from collections.abc import Callable
2
+ from dataclasses import dataclass
3
3
 
4
4
  from pydantic import BaseModel
5
5
  from typing_extensions import Any
6
6
 
7
7
  import cbrkit
8
- from cbrkit.helpers import produce_sequence
9
8
  from cbrkit.typing import Float, MaybeSequence
10
9
 
11
10
  __all__ = [
@@ -17,76 +16,76 @@ __all__ = [
17
16
 
18
17
 
19
18
  @dataclass(slots=True, frozen=True)
20
- class System[K: str | int, V: BaseModel, S: Float, P: str]:
19
+ class System[
20
+ K: str | int,
21
+ V: BaseModel,
22
+ S: Float,
23
+ R2: BaseModel | None,
24
+ R1: BaseModel | None,
25
+ ]:
21
26
  casebase: cbrkit.typing.Casebase[K, V]
22
- retriever_pipelines: Mapping[
23
- P, MaybeSequence[cbrkit.typing.RetrieverFunc[K, V, S]]
24
- ] = field(default_factory=dict)
25
- reuser_pipelines: Mapping[P, MaybeSequence[cbrkit.typing.ReuserFunc[K, V, S]]] = (
26
- field(default_factory=dict)
27
- )
28
-
29
- def get_retriever_pipeline(
30
- self, name: P, limit: int | None
31
- ) -> Sequence[cbrkit.typing.RetrieverFunc[K, V, S]]:
32
- retrievers = produce_sequence(self.retriever_pipelines[name])
33
-
34
- if limit is not None:
35
- *head_retrievers, tail_retriever = retrievers
36
- retrievers = head_retrievers + [
37
- cbrkit.retrieval.dropout(tail_retriever, limit=limit)
38
- ]
39
-
40
- return retrievers
27
+ retriever_factory: (
28
+ Callable[[R1], MaybeSequence[cbrkit.typing.RetrieverFunc[K, V, S]]] | None
29
+ ) = None
30
+ reuser_factory: (
31
+ Callable[[R2], MaybeSequence[cbrkit.typing.ReuserFunc[K, V, S]]] | None
32
+ ) = None
41
33
 
42
34
  def retrieve(
43
35
  self,
44
36
  query: V,
45
- retriever_pipeline: P,
46
- limit: int | None = None,
37
+ parameters: R1,
47
38
  ) -> cbrkit.retrieval.QueryResultStep[K, V, S]:
39
+ if not self.retriever_factory:
40
+ raise ValueError("Retriever factory is not defined.")
41
+
48
42
  return cbrkit.retrieval.apply_query(
49
43
  self.casebase,
50
44
  query,
51
- self.get_retriever_pipeline(retriever_pipeline, limit),
45
+ self.retriever_factory(parameters),
52
46
  ).default_query
53
47
 
54
48
  def reuse(
55
49
  self,
56
50
  query: V,
57
- reuser_pipeline: P,
51
+ parameters: R2,
58
52
  ) -> cbrkit.retrieval.QueryResultStep[K, V, S]:
53
+ if not self.reuser_factory:
54
+ raise ValueError("Reuser factory is not defined.")
55
+
59
56
  return cbrkit.reuse.apply_query(
60
57
  self.casebase,
61
58
  query,
62
- self.reuser_pipelines[reuser_pipeline],
59
+ self.reuser_factory(parameters),
63
60
  ).default_query
64
61
 
65
62
  def cycle(
66
63
  self,
67
64
  query: V,
68
- retriever_pipeline: P,
69
- reuser_pipeline: P,
70
- limit: int | None = None,
65
+ retrieve_parameters: R1,
66
+ reuse_parameters: R2,
71
67
  ) -> cbrkit.retrieval.QueryResultStep[K, V, S]:
68
+ if not self.retriever_factory or not self.reuser_factory:
69
+ raise ValueError("Retriever or reuser factory is not defined.")
70
+
72
71
  return cbrkit.cycle.apply_query(
73
72
  self.casebase,
74
73
  query,
75
- self.get_retriever_pipeline(retriever_pipeline, limit),
76
- self.reuser_pipelines[reuser_pipeline],
74
+ self.retriever_factory(retrieve_parameters),
75
+ self.reuser_factory(reuse_parameters),
77
76
  ).final_step.default_query
78
77
 
79
78
  @property
80
79
  def tools(self) -> list[Callable[..., Any]]:
81
80
  res: list[Callable[..., Any]] = []
82
81
 
83
- if self.retriever_pipelines:
82
+ if self.retriever_factory:
84
83
  res.append(self.retrieve)
85
84
 
86
- if self.reuser_pipelines:
85
+ if self.reuser_factory:
87
86
  res.append(self.reuse)
88
87
 
89
- if self.retriever_pipelines and self.reuser_pipelines:
88
+ if self.retriever_factory and self.reuser_factory:
90
89
  res.append(self.cycle)
91
90
 
92
91
  return res
@@ -94,18 +93,10 @@ class System[K: str | int, V: BaseModel, S: Float, P: str]:
94
93
  def get_case(self, name: K) -> V:
95
94
  return self.casebase[name]
96
95
 
97
- def get_retriever_names(self) -> list[str]:
98
- return list(self.retriever_pipelines.keys())
99
-
100
- def get_reuser_names(self) -> list[str]:
101
- return list(self.reuser_pipelines.keys())
102
-
103
96
  @property
104
97
  def resources(self) -> dict[str, Callable[..., Any]]:
105
98
  return {
106
- "casebase/{name}": self.get_case,
107
- "pipelines/retrieve": self.get_retriever_names,
108
- "pipelines/reuse": self.get_reuser_names,
99
+ "casebase://{name}": self.get_case,
109
100
  }
110
101
 
111
102
  @property
@@ -114,14 +105,14 @@ class System[K: str | int, V: BaseModel, S: Float, P: str]:
114
105
 
115
106
 
116
107
  with cbrkit.helpers.optional_dependencies():
117
- from fastapi import FastAPI
108
+ from fastapi import APIRouter, FastAPI
118
109
 
119
- def to_fastapi(system: System, app: FastAPI) -> FastAPI:
110
+ def to_fastapi[T: APIRouter | FastAPI](system: System, app: T) -> T:
120
111
  for value in system.tools:
121
112
  app.post(f"/tool/{value.__name__}")(value)
122
113
 
123
114
  for key, value in system.resources.items():
124
- app.get(f"/resource/{key}")(value)
115
+ app.get(f"/resource/{key.replace('://', '/')}")(value)
125
116
 
126
117
  for value in system.prompts:
127
118
  app.post(f"/prompt/{value.__name__}")(value)
@@ -132,12 +123,12 @@ with cbrkit.helpers.optional_dependencies():
132
123
  with cbrkit.helpers.optional_dependencies():
133
124
  from fastmcp import FastMCP
134
125
 
135
- def to_fastmcp[T](system: System, app: FastMCP[T]) -> FastMCP[T]:
126
+ def to_fastmcp[T: FastMCP](system: System, app: T) -> T:
136
127
  for value in system.tools:
137
128
  app.tool(value)
138
129
 
139
130
  for key, value in system.resources.items():
140
- app.resource(f"cbrkit://{key}")(value)
131
+ app.resource(key)(value)
141
132
 
142
133
  for value in system.prompts:
143
134
  app.prompt(value)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes