cbrkit 0.6.0__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cbrkit
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
5
5
  Home-page: https://wi2trier.github.io/cbrkit/
6
6
  License: MIT
@@ -41,9 +41,9 @@ Requires-Dist: sentence-transformers (>=2.2,<3.0) ; extra == "all" or extra == "
41
41
  Requires-Dist: spacy (>=3.7,<4.0) ; extra == "all" or extra == "all" or extra == "nlp"
42
42
  Requires-Dist: torch (>=2.1.1,<3.0.0) ; extra == "all" or extra == "transformers"
43
43
  Requires-Dist: transformers (>=4.35,<5.0) ; extra == "all" or extra == "transformers"
44
- Requires-Dist: typer[all] (>=0.9,<0.10) ; extra == "all" or extra == "cli"
44
+ Requires-Dist: typer[all] (>=0.9,<1.0) ; extra == "all" or extra == "cli"
45
45
  Requires-Dist: uvicorn[standard] (>=0.24,<1.0) ; extra == "all" or extra == "api"
46
- Requires-Dist: xmltodict (>=0.13,<0.14)
46
+ Requires-Dist: xmltodict (>=0.13,<1.0)
47
47
  Project-URL: Repository, https://github.com/wi2trier/cbrkit
48
48
  Description-Content-Type: text/markdown
49
49
 
@@ -161,7 +161,7 @@ It is possible to define custom measures, use built-in ones, or combine both.
161
161
  In CBRkit, a similarity measure is defined as a function that takes two arguments (a case and a query) and returns a similarity score: `sim = f(x, y)`.
162
162
  It also supports pipeline-based similarity measures that are popular in NLP where a list of tuples is passed to the similarity measure: `sims = f([(x1, y1), (x2, y2), ...])`.
163
163
  This generic approach allows you to define custom similarity measures for your specific use case.
164
- For instance, you may define the following function for comparing colors:
164
+ For instance, the following function not only checks for strict equality, but also for partial matches (e.g., `x = "blue"` and `y = "light blue"`):
165
165
 
166
166
  ```python
167
167
  def color_similarity(x: str, y: str) -> float:
@@ -173,7 +173,8 @@ def color_similarity(x: str, y: str) -> float:
173
173
  return 0.0
174
174
  ```
175
175
 
176
- In addition to checking for strict equality, our function also checks for partial matches (e.g., `x = "blue"` and `y = "light blue"`).
176
+ **Please note:** CBRkit inspects the signature of custom similarity functions to perform some checks.
177
+ You need to make sure that the two parameters are named `x` and `y`, otherwise CBRkit will throw an error.
177
178
 
178
179
  ### Built-in Similarity Measures
179
180
 
@@ -207,7 +208,7 @@ For the common use case of attribute-value based data, CBRkit provides a predefi
207
208
  cbrkit.sim.attribute_value(
208
209
  attributes={
209
210
  "price": cbrkit.sim.numbers.linear(),
210
- "color": color_similarity
211
+ "color": color_similarity # custom measure
211
212
  ...
212
213
  },
213
214
  aggregator=cbrkit.sim.aggregator(pooling="mean"),
@@ -216,7 +217,8 @@ cbrkit.sim.attribute_value(
216
217
 
217
218
  The `attribute_value` function lets you define measures for each attribute of the cases/queries as well as the aggregation function.
218
219
  It also allows to use custom measures like the `color_similarity` function defined above.
219
- **Please note:** The custom measure is not called directly but passed as a reference to the `attribute_value` function since it is not a generator function.
220
+
221
+ **Please note:** The custom measure is not executed (i.e., there are **no** parenthesis at the end), but instead passed as a reference to the `attribute_value` function.
220
222
 
221
223
  You may even nest similarity functions to create measures for object-oriented cases:
222
224
 
@@ -230,7 +232,7 @@ cbrkit.sim.attribute_value(
230
232
  },
231
233
  aggregator=cbrkit.sim.aggregator(pooling="mean"),
232
234
  ),
233
- "color": color_similarity
235
+ "color": color_similarity # custom measure
234
236
  ...
235
237
  },
236
238
  aggregator=cbrkit.sim.aggregator(pooling="mean"),
@@ -268,19 +270,14 @@ In some cases, it is useful to combine multiple retrieval pipelines, for example
268
270
  To use this pattern, first create the corresponding retrievers using the builder:
269
271
 
270
272
  ```python
271
- retriever1 = cbrkit.retrieval.build(..., limit=10)
272
- # since retriever2 only receives the cases from retriever1, we do not need a limit
273
- retriever2 = cbrkit.retrieval.build(..., limit=None)
273
+ retriever1 = cbrkit.retrieval.build(..., min_similarity=0.5, limit=20)
274
+ retriever2 = cbrkit.retrieval.build(..., limit=10)
274
275
  ```
275
276
 
276
277
  Then apply all of them sequentially by passing them as a list or tuple to the `apply` function:
277
278
 
278
279
  ```python
279
- result = cbrkit.retrieval.apply(
280
- casebase,
281
- query,
282
- (retriever1, retriever2)
283
- )
280
+ result = cbrkit.retrieval.apply(casebase, query, (retriever1, retriever2))
284
281
  ```
285
282
 
286
283
  The result has the following two attributes:
@@ -112,7 +112,7 @@ It is possible to define custom measures, use built-in ones, or combine both.
112
112
  In CBRkit, a similarity measure is defined as a function that takes two arguments (a case and a query) and returns a similarity score: `sim = f(x, y)`.
113
113
  It also supports pipeline-based similarity measures that are popular in NLP where a list of tuples is passed to the similarity measure: `sims = f([(x1, y1), (x2, y2), ...])`.
114
114
  This generic approach allows you to define custom similarity measures for your specific use case.
115
- For instance, you may define the following function for comparing colors:
115
+ For instance, the following function not only checks for strict equality, but also for partial matches (e.g., `x = "blue"` and `y = "light blue"`):
116
116
 
117
117
  ```python
118
118
  def color_similarity(x: str, y: str) -> float:
@@ -124,7 +124,8 @@ def color_similarity(x: str, y: str) -> float:
124
124
  return 0.0
125
125
  ```
126
126
 
127
- In addition to checking for strict equality, our function also checks for partial matches (e.g., `x = "blue"` and `y = "light blue"`).
127
+ **Please note:** CBRkit inspects the signature of custom similarity functions to perform some checks.
128
+ You need to make sure that the two parameters are named `x` and `y`, otherwise CBRkit will throw an error.
128
129
 
129
130
  ### Built-in Similarity Measures
130
131
 
@@ -158,7 +159,7 @@ For the common use case of attribute-value based data, CBRkit provides a predefi
158
159
  cbrkit.sim.attribute_value(
159
160
  attributes={
160
161
  "price": cbrkit.sim.numbers.linear(),
161
- "color": color_similarity
162
+ "color": color_similarity # custom measure
162
163
  ...
163
164
  },
164
165
  aggregator=cbrkit.sim.aggregator(pooling="mean"),
@@ -167,7 +168,8 @@ cbrkit.sim.attribute_value(
167
168
 
168
169
  The `attribute_value` function lets you define measures for each attribute of the cases/queries as well as the aggregation function.
169
170
  It also allows to use custom measures like the `color_similarity` function defined above.
170
- **Please note:** The custom measure is not called directly but passed as a reference to the `attribute_value` function since it is not a generator function.
171
+
172
+ **Please note:** The custom measure is not executed (i.e., there are **no** parenthesis at the end), but instead passed as a reference to the `attribute_value` function.
171
173
 
172
174
  You may even nest similarity functions to create measures for object-oriented cases:
173
175
 
@@ -181,7 +183,7 @@ cbrkit.sim.attribute_value(
181
183
  },
182
184
  aggregator=cbrkit.sim.aggregator(pooling="mean"),
183
185
  ),
184
- "color": color_similarity
186
+ "color": color_similarity # custom measure
185
187
  ...
186
188
  },
187
189
  aggregator=cbrkit.sim.aggregator(pooling="mean"),
@@ -219,19 +221,14 @@ In some cases, it is useful to combine multiple retrieval pipelines, for example
219
221
  To use this pattern, first create the corresponding retrievers using the builder:
220
222
 
221
223
  ```python
222
- retriever1 = cbrkit.retrieval.build(..., limit=10)
223
- # since retriever2 only receives the cases from retriever1, we do not need a limit
224
- retriever2 = cbrkit.retrieval.build(..., limit=None)
224
+ retriever1 = cbrkit.retrieval.build(..., min_similarity=0.5, limit=20)
225
+ retriever2 = cbrkit.retrieval.build(..., limit=10)
225
226
  ```
226
227
 
227
228
  Then apply all of them sequentially by passing them as a list or tuple to the `apply` function:
228
229
 
229
230
  ```python
230
- result = cbrkit.retrieval.apply(
231
- casebase,
232
- query,
233
- (retriever1, retriever2)
234
- )
231
+ result = cbrkit.retrieval.apply(casebase, query, (retriever1, retriever2))
235
232
  ```
236
233
 
237
234
  The result has the following two attributes:
@@ -97,7 +97,12 @@ def sim2seq(
97
97
 
98
98
  return wrapped_func
99
99
 
100
- return cast(SimSeqFunc[ValueType, SimType], func)
100
+ elif len(signature.parameters) == 1:
101
+ return cast(SimSeqFunc[ValueType, SimType], func)
102
+
103
+ raise TypeError(
104
+ f"Invalid signature for similarity function: {signature.parameters}"
105
+ )
101
106
 
102
107
 
103
108
  def sim2map(
@@ -107,7 +112,13 @@ def sim2map(
107
112
  ) -> SimMapFunc[KeyType, ValueType, SimType]:
108
113
  signature = inspect_signature(func)
109
114
 
110
- if len(signature.parameters) == 2 and signature.parameters.keys() == {"x", "y"}:
115
+ if len(signature.parameters) == 2 and signature.parameters.keys() in (
116
+ {"x_map", "y"},
117
+ {"casebase", "query"},
118
+ ):
119
+ return cast(SimMapFunc[KeyType, ValueType, SimType], func)
120
+
121
+ elif len(signature.parameters) == 2:
111
122
  sim_pair_func = cast(SimPairFunc[ValueType, SimType], func)
112
123
 
113
124
  def wrapped_sim_pair_func(
@@ -131,7 +142,9 @@ def sim2map(
131
142
 
132
143
  return wrapped_sim_seq_func
133
144
 
134
- return cast(SimMapFunc[KeyType, ValueType, SimType], func)
145
+ raise TypeError(
146
+ f"Invalid signature for similarity function: {signature.parameters}"
147
+ )
135
148
 
136
149
 
137
150
  def unpack_sim(sim: AnyFloat) -> float:
@@ -8,8 +8,8 @@ from cbrkit.typing import (
8
8
  AnySimFunc,
9
9
  Casebase,
10
10
  KeyType,
11
- RetrieveFunc,
12
11
  SimMap,
12
+ SimMapFunc,
13
13
  SimType,
14
14
  ValueType,
15
15
  )
@@ -76,8 +76,8 @@ class Result(Generic[KeyType, ValueType, SimType]):
76
76
  def apply(
77
77
  casebase: Casebase[KeyType, ValueType],
78
78
  query: ValueType,
79
- retrievers: RetrieveFunc[KeyType, ValueType, SimType]
80
- | Sequence[RetrieveFunc[KeyType, ValueType, SimType]],
79
+ retrievers: SimMapFunc[KeyType, ValueType, SimType]
80
+ | Sequence[SimMapFunc[KeyType, ValueType, SimType]],
81
81
  ) -> Result[KeyType, ValueType, SimType]:
82
82
  """Applies a query to a Casebase using retriever functions.
83
83
 
@@ -135,7 +135,7 @@ def build(
135
135
  limit: int | None = None,
136
136
  min_similarity: float | None = None,
137
137
  max_similarity: float | None = None,
138
- ) -> RetrieveFunc[KeyType, ValueType, SimType]:
138
+ ) -> SimMapFunc[KeyType, ValueType, SimType]:
139
139
  """Based on the similarity function this function creates a retriever function.
140
140
 
141
141
  The given limit will be applied after filtering for min/max similarity.
@@ -174,10 +174,10 @@ def build(
174
174
  sim_func = sim2map(similarity_func)
175
175
 
176
176
  def wrapped_func(
177
- casebase: Casebase[KeyType, ValueType],
178
- query: ValueType,
177
+ x_map: Casebase[KeyType, ValueType],
178
+ y: ValueType,
179
179
  ) -> SimMap[KeyType, SimType]:
180
- similarities = sim_func(casebase, query)
180
+ similarities = sim_func(x_map, y)
181
181
  ranking = _similarities2ranking(similarities)
182
182
 
183
183
  if min_similarity is not None:
@@ -200,11 +200,11 @@ def build(
200
200
 
201
201
  def load(
202
202
  import_names: Sequence[str] | str,
203
- ) -> list[RetrieveFunc[Any, Any, Any]]:
203
+ ) -> list[SimMapFunc[Any, Any, Any]]:
204
204
  if isinstance(import_names, str):
205
205
  import_names = [import_names]
206
206
 
207
- retrievers: list[RetrieveFunc] = []
207
+ retrievers: list[SimMapFunc] = []
208
208
 
209
209
  for import_path in import_names:
210
210
  obj = load_python(import_path)
@@ -220,11 +220,11 @@ def load(
220
220
 
221
221
  def load_map(
222
222
  import_names: Collection[str] | str,
223
- ) -> dict[str, RetrieveFunc[Any, Any, Any]]:
223
+ ) -> dict[str, SimMapFunc[Any, Any, Any]]:
224
224
  if isinstance(import_names, str):
225
225
  import_names = [import_names]
226
226
 
227
- retrievers: dict[str, RetrieveFunc] = {}
227
+ retrievers: dict[str, SimMapFunc] = {}
228
228
 
229
229
  for import_path in import_names:
230
230
  obj = load_python(import_path)
@@ -28,9 +28,10 @@ SimSeq = Sequence[SimType]
28
28
  SimSeqOrMap = SimMap[KeyType, SimType] | SimSeq[SimType]
29
29
 
30
30
 
31
+ # Parameter names must match so that the signature can be inspected, do not add `/` here!
31
32
  class SimMapFunc(Protocol[KeyType, ValueType_contra, SimType_cov]):
32
33
  def __call__(
33
- self, x_map: Mapping[KeyType, ValueType_contra], y: ValueType_contra, /
34
+ self, x_map: Mapping[KeyType, ValueType_contra], y: ValueType_contra
34
35
  ) -> SimMap[KeyType, SimType_cov]:
35
36
  ...
36
37
 
@@ -42,9 +43,8 @@ class SimSeqFunc(Protocol[ValueType_contra, SimType_cov]):
42
43
  ...
43
44
 
44
45
 
45
- # Parameter names must match so that the signature can be inspected, do not add `/` here!
46
46
  class SimPairFunc(Protocol[ValueType_contra, SimType_cov]):
47
- def __call__(self, x: ValueType_contra, y: ValueType_contra) -> SimType_cov:
47
+ def __call__(self, x: ValueType_contra, y: ValueType_contra, /) -> SimType_cov:
48
48
  ...
49
49
 
50
50
 
@@ -54,8 +54,6 @@ AnySimFunc = (
54
54
  | SimPairFunc[ValueType, SimType]
55
55
  )
56
56
 
57
- RetrieveFunc = SimMapFunc[KeyType, ValueType, SimType]
58
-
59
57
 
60
58
  class AggregatorFunc(Protocol[KeyType, SimType_contra]):
61
59
  def __call__(
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cbrkit"
3
- version = "0.6.0"
3
+ version = "0.6.2"
4
4
  description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI."
5
5
  authors = ["Mirko Lenz <mirko@mirkolenz.com>"]
6
6
  license = "MIT"
@@ -52,13 +52,13 @@ sentence-transformers = { version = "^2.2", optional = true }
52
52
  spacy = { version = "^3.7", optional = true }
53
53
  torch = { version = "^2.1.1", optional = true }
54
54
  transformers = { version = "^4.35", optional = true }
55
- typer = { version = "^0.9", extras = ["all"], optional = true }
55
+ typer = { version = ">=0.9, <1.0", extras = ["all"], optional = true }
56
56
  uvicorn = { version = ">=0.24, <1.0", optional = true, extras = ["standard"] }
57
- xmltodict = "^0.13"
57
+ xmltodict = ">=0.13, <1.0"
58
58
 
59
59
  [tool.poetry.group.dev.dependencies]
60
60
  pytest = "^8.0.0"
61
- pytest-cov = "^4.1"
61
+ pytest-cov = "^5.0.0"
62
62
 
63
63
  [tool.poetry.group.docs.dependencies]
64
64
  pdoc = "^14.4"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes