cbrkit 0.6.0__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cbrkit-0.6.0 → cbrkit-0.6.2}/PKG-INFO +13 -16
- {cbrkit-0.6.0 → cbrkit-0.6.2}/README.md +10 -13
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/helpers.py +16 -3
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/retrieval.py +11 -11
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/typing.py +3 -5
- {cbrkit-0.6.0 → cbrkit-0.6.2}/pyproject.toml +4 -4
- {cbrkit-0.6.0 → cbrkit-0.6.2}/LICENSE +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/__init__.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/__main__.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/api.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/cli.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/loaders.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/py.typed +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/__init__.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/_aggregator.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/_attribute_value.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/collections.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/generic.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/graph/__init__.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/graph/_astar.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/graph/_model.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/numbers.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/strings/__init__.py +0 -0
- {cbrkit-0.6.0 → cbrkit-0.6.2}/cbrkit/sim/strings/taxonomy.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cbrkit
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
|
|
5
5
|
Home-page: https://wi2trier.github.io/cbrkit/
|
|
6
6
|
License: MIT
|
|
@@ -41,9 +41,9 @@ Requires-Dist: sentence-transformers (>=2.2,<3.0) ; extra == "all" or extra == "
|
|
|
41
41
|
Requires-Dist: spacy (>=3.7,<4.0) ; extra == "all" or extra == "all" or extra == "nlp"
|
|
42
42
|
Requires-Dist: torch (>=2.1.1,<3.0.0) ; extra == "all" or extra == "transformers"
|
|
43
43
|
Requires-Dist: transformers (>=4.35,<5.0) ; extra == "all" or extra == "transformers"
|
|
44
|
-
Requires-Dist: typer[all] (>=0.9,<0
|
|
44
|
+
Requires-Dist: typer[all] (>=0.9,<1.0) ; extra == "all" or extra == "cli"
|
|
45
45
|
Requires-Dist: uvicorn[standard] (>=0.24,<1.0) ; extra == "all" or extra == "api"
|
|
46
|
-
Requires-Dist: xmltodict (>=0.13,<0
|
|
46
|
+
Requires-Dist: xmltodict (>=0.13,<1.0)
|
|
47
47
|
Project-URL: Repository, https://github.com/wi2trier/cbrkit
|
|
48
48
|
Description-Content-Type: text/markdown
|
|
49
49
|
|
|
@@ -161,7 +161,7 @@ It is possible to define custom measures, use built-in ones, or combine both.
|
|
|
161
161
|
In CBRkit, a similarity measure is defined as a function that takes two arguments (a case and a query) and returns a similarity score: `sim = f(x, y)`.
|
|
162
162
|
It also supports pipeline-based similarity measures that are popular in NLP where a list of tuples is passed to the similarity measure: `sims = f([(x1, y1), (x2, y2), ...])`.
|
|
163
163
|
This generic approach allows you to define custom similarity measures for your specific use case.
|
|
164
|
-
For instance,
|
|
164
|
+
For instance, the following function not only checks for strict equality, but also for partial matches (e.g., `x = "blue"` and `y = "light blue"`):
|
|
165
165
|
|
|
166
166
|
```python
|
|
167
167
|
def color_similarity(x: str, y: str) -> float:
|
|
@@ -173,7 +173,8 @@ def color_similarity(x: str, y: str) -> float:
|
|
|
173
173
|
return 0.0
|
|
174
174
|
```
|
|
175
175
|
|
|
176
|
-
|
|
176
|
+
**Please note:** CBRkit inspects the signature of custom similarity functions to perform some checks.
|
|
177
|
+
You need to make sure that the two parameters are named `x` and `y`, otherwise CBRkit will throw an error.
|
|
177
178
|
|
|
178
179
|
### Built-in Similarity Measures
|
|
179
180
|
|
|
@@ -207,7 +208,7 @@ For the common use case of attribute-value based data, CBRkit provides a predefi
|
|
|
207
208
|
cbrkit.sim.attribute_value(
|
|
208
209
|
attributes={
|
|
209
210
|
"price": cbrkit.sim.numbers.linear(),
|
|
210
|
-
"color": color_similarity
|
|
211
|
+
"color": color_similarity # custom measure
|
|
211
212
|
...
|
|
212
213
|
},
|
|
213
214
|
aggregator=cbrkit.sim.aggregator(pooling="mean"),
|
|
@@ -216,7 +217,8 @@ cbrkit.sim.attribute_value(
|
|
|
216
217
|
|
|
217
218
|
The `attribute_value` function lets you define measures for each attribute of the cases/queries as well as the aggregation function.
|
|
218
219
|
It also allows to use custom measures like the `color_similarity` function defined above.
|
|
219
|
-
|
|
220
|
+
|
|
221
|
+
**Please note:** The custom measure is not executed (i.e., there are **no** parenthesis at the end), but instead passed as a reference to the `attribute_value` function.
|
|
220
222
|
|
|
221
223
|
You may even nest similarity functions to create measures for object-oriented cases:
|
|
222
224
|
|
|
@@ -230,7 +232,7 @@ cbrkit.sim.attribute_value(
|
|
|
230
232
|
},
|
|
231
233
|
aggregator=cbrkit.sim.aggregator(pooling="mean"),
|
|
232
234
|
),
|
|
233
|
-
"color": color_similarity
|
|
235
|
+
"color": color_similarity # custom measure
|
|
234
236
|
...
|
|
235
237
|
},
|
|
236
238
|
aggregator=cbrkit.sim.aggregator(pooling="mean"),
|
|
@@ -268,19 +270,14 @@ In some cases, it is useful to combine multiple retrieval pipelines, for example
|
|
|
268
270
|
To use this pattern, first create the corresponding retrievers using the builder:
|
|
269
271
|
|
|
270
272
|
```python
|
|
271
|
-
retriever1 = cbrkit.retrieval.build(..., limit=
|
|
272
|
-
|
|
273
|
-
retriever2 = cbrkit.retrieval.build(..., limit=None)
|
|
273
|
+
retriever1 = cbrkit.retrieval.build(..., min_similarity=0.5, limit=20)
|
|
274
|
+
retriever2 = cbrkit.retrieval.build(..., limit=10)
|
|
274
275
|
```
|
|
275
276
|
|
|
276
277
|
Then apply all of them sequentially by passing them as a list or tuple to the `apply` function:
|
|
277
278
|
|
|
278
279
|
```python
|
|
279
|
-
result = cbrkit.retrieval.apply(
|
|
280
|
-
casebase,
|
|
281
|
-
query,
|
|
282
|
-
(retriever1, retriever2)
|
|
283
|
-
)
|
|
280
|
+
result = cbrkit.retrieval.apply(casebase, query, (retriever1, retriever2))
|
|
284
281
|
```
|
|
285
282
|
|
|
286
283
|
The result has the following two attributes:
|
|
@@ -112,7 +112,7 @@ It is possible to define custom measures, use built-in ones, or combine both.
|
|
|
112
112
|
In CBRkit, a similarity measure is defined as a function that takes two arguments (a case and a query) and returns a similarity score: `sim = f(x, y)`.
|
|
113
113
|
It also supports pipeline-based similarity measures that are popular in NLP where a list of tuples is passed to the similarity measure: `sims = f([(x1, y1), (x2, y2), ...])`.
|
|
114
114
|
This generic approach allows you to define custom similarity measures for your specific use case.
|
|
115
|
-
For instance,
|
|
115
|
+
For instance, the following function not only checks for strict equality, but also for partial matches (e.g., `x = "blue"` and `y = "light blue"`):
|
|
116
116
|
|
|
117
117
|
```python
|
|
118
118
|
def color_similarity(x: str, y: str) -> float:
|
|
@@ -124,7 +124,8 @@ def color_similarity(x: str, y: str) -> float:
|
|
|
124
124
|
return 0.0
|
|
125
125
|
```
|
|
126
126
|
|
|
127
|
-
|
|
127
|
+
**Please note:** CBRkit inspects the signature of custom similarity functions to perform some checks.
|
|
128
|
+
You need to make sure that the two parameters are named `x` and `y`, otherwise CBRkit will throw an error.
|
|
128
129
|
|
|
129
130
|
### Built-in Similarity Measures
|
|
130
131
|
|
|
@@ -158,7 +159,7 @@ For the common use case of attribute-value based data, CBRkit provides a predefi
|
|
|
158
159
|
cbrkit.sim.attribute_value(
|
|
159
160
|
attributes={
|
|
160
161
|
"price": cbrkit.sim.numbers.linear(),
|
|
161
|
-
"color": color_similarity
|
|
162
|
+
"color": color_similarity # custom measure
|
|
162
163
|
...
|
|
163
164
|
},
|
|
164
165
|
aggregator=cbrkit.sim.aggregator(pooling="mean"),
|
|
@@ -167,7 +168,8 @@ cbrkit.sim.attribute_value(
|
|
|
167
168
|
|
|
168
169
|
The `attribute_value` function lets you define measures for each attribute of the cases/queries as well as the aggregation function.
|
|
169
170
|
It also allows to use custom measures like the `color_similarity` function defined above.
|
|
170
|
-
|
|
171
|
+
|
|
172
|
+
**Please note:** The custom measure is not executed (i.e., there are **no** parenthesis at the end), but instead passed as a reference to the `attribute_value` function.
|
|
171
173
|
|
|
172
174
|
You may even nest similarity functions to create measures for object-oriented cases:
|
|
173
175
|
|
|
@@ -181,7 +183,7 @@ cbrkit.sim.attribute_value(
|
|
|
181
183
|
},
|
|
182
184
|
aggregator=cbrkit.sim.aggregator(pooling="mean"),
|
|
183
185
|
),
|
|
184
|
-
"color": color_similarity
|
|
186
|
+
"color": color_similarity # custom measure
|
|
185
187
|
...
|
|
186
188
|
},
|
|
187
189
|
aggregator=cbrkit.sim.aggregator(pooling="mean"),
|
|
@@ -219,19 +221,14 @@ In some cases, it is useful to combine multiple retrieval pipelines, for example
|
|
|
219
221
|
To use this pattern, first create the corresponding retrievers using the builder:
|
|
220
222
|
|
|
221
223
|
```python
|
|
222
|
-
retriever1 = cbrkit.retrieval.build(..., limit=
|
|
223
|
-
|
|
224
|
-
retriever2 = cbrkit.retrieval.build(..., limit=None)
|
|
224
|
+
retriever1 = cbrkit.retrieval.build(..., min_similarity=0.5, limit=20)
|
|
225
|
+
retriever2 = cbrkit.retrieval.build(..., limit=10)
|
|
225
226
|
```
|
|
226
227
|
|
|
227
228
|
Then apply all of them sequentially by passing them as a list or tuple to the `apply` function:
|
|
228
229
|
|
|
229
230
|
```python
|
|
230
|
-
result = cbrkit.retrieval.apply(
|
|
231
|
-
casebase,
|
|
232
|
-
query,
|
|
233
|
-
(retriever1, retriever2)
|
|
234
|
-
)
|
|
231
|
+
result = cbrkit.retrieval.apply(casebase, query, (retriever1, retriever2))
|
|
235
232
|
```
|
|
236
233
|
|
|
237
234
|
The result has the following two attributes:
|
|
@@ -97,7 +97,12 @@ def sim2seq(
|
|
|
97
97
|
|
|
98
98
|
return wrapped_func
|
|
99
99
|
|
|
100
|
-
|
|
100
|
+
elif len(signature.parameters) == 1:
|
|
101
|
+
return cast(SimSeqFunc[ValueType, SimType], func)
|
|
102
|
+
|
|
103
|
+
raise TypeError(
|
|
104
|
+
f"Invalid signature for similarity function: {signature.parameters}"
|
|
105
|
+
)
|
|
101
106
|
|
|
102
107
|
|
|
103
108
|
def sim2map(
|
|
@@ -107,7 +112,13 @@ def sim2map(
|
|
|
107
112
|
) -> SimMapFunc[KeyType, ValueType, SimType]:
|
|
108
113
|
signature = inspect_signature(func)
|
|
109
114
|
|
|
110
|
-
if len(signature.parameters) == 2 and signature.parameters.keys()
|
|
115
|
+
if len(signature.parameters) == 2 and signature.parameters.keys() in (
|
|
116
|
+
{"x_map", "y"},
|
|
117
|
+
{"casebase", "query"},
|
|
118
|
+
):
|
|
119
|
+
return cast(SimMapFunc[KeyType, ValueType, SimType], func)
|
|
120
|
+
|
|
121
|
+
elif len(signature.parameters) == 2:
|
|
111
122
|
sim_pair_func = cast(SimPairFunc[ValueType, SimType], func)
|
|
112
123
|
|
|
113
124
|
def wrapped_sim_pair_func(
|
|
@@ -131,7 +142,9 @@ def sim2map(
|
|
|
131
142
|
|
|
132
143
|
return wrapped_sim_seq_func
|
|
133
144
|
|
|
134
|
-
|
|
145
|
+
raise TypeError(
|
|
146
|
+
f"Invalid signature for similarity function: {signature.parameters}"
|
|
147
|
+
)
|
|
135
148
|
|
|
136
149
|
|
|
137
150
|
def unpack_sim(sim: AnyFloat) -> float:
|
|
@@ -8,8 +8,8 @@ from cbrkit.typing import (
|
|
|
8
8
|
AnySimFunc,
|
|
9
9
|
Casebase,
|
|
10
10
|
KeyType,
|
|
11
|
-
RetrieveFunc,
|
|
12
11
|
SimMap,
|
|
12
|
+
SimMapFunc,
|
|
13
13
|
SimType,
|
|
14
14
|
ValueType,
|
|
15
15
|
)
|
|
@@ -76,8 +76,8 @@ class Result(Generic[KeyType, ValueType, SimType]):
|
|
|
76
76
|
def apply(
|
|
77
77
|
casebase: Casebase[KeyType, ValueType],
|
|
78
78
|
query: ValueType,
|
|
79
|
-
retrievers:
|
|
80
|
-
| Sequence[
|
|
79
|
+
retrievers: SimMapFunc[KeyType, ValueType, SimType]
|
|
80
|
+
| Sequence[SimMapFunc[KeyType, ValueType, SimType]],
|
|
81
81
|
) -> Result[KeyType, ValueType, SimType]:
|
|
82
82
|
"""Applies a query to a Casebase using retriever functions.
|
|
83
83
|
|
|
@@ -135,7 +135,7 @@ def build(
|
|
|
135
135
|
limit: int | None = None,
|
|
136
136
|
min_similarity: float | None = None,
|
|
137
137
|
max_similarity: float | None = None,
|
|
138
|
-
) ->
|
|
138
|
+
) -> SimMapFunc[KeyType, ValueType, SimType]:
|
|
139
139
|
"""Based on the similarity function this function creates a retriever function.
|
|
140
140
|
|
|
141
141
|
The given limit will be applied after filtering for min/max similarity.
|
|
@@ -174,10 +174,10 @@ def build(
|
|
|
174
174
|
sim_func = sim2map(similarity_func)
|
|
175
175
|
|
|
176
176
|
def wrapped_func(
|
|
177
|
-
|
|
178
|
-
|
|
177
|
+
x_map: Casebase[KeyType, ValueType],
|
|
178
|
+
y: ValueType,
|
|
179
179
|
) -> SimMap[KeyType, SimType]:
|
|
180
|
-
similarities = sim_func(
|
|
180
|
+
similarities = sim_func(x_map, y)
|
|
181
181
|
ranking = _similarities2ranking(similarities)
|
|
182
182
|
|
|
183
183
|
if min_similarity is not None:
|
|
@@ -200,11 +200,11 @@ def build(
|
|
|
200
200
|
|
|
201
201
|
def load(
|
|
202
202
|
import_names: Sequence[str] | str,
|
|
203
|
-
) -> list[
|
|
203
|
+
) -> list[SimMapFunc[Any, Any, Any]]:
|
|
204
204
|
if isinstance(import_names, str):
|
|
205
205
|
import_names = [import_names]
|
|
206
206
|
|
|
207
|
-
retrievers: list[
|
|
207
|
+
retrievers: list[SimMapFunc] = []
|
|
208
208
|
|
|
209
209
|
for import_path in import_names:
|
|
210
210
|
obj = load_python(import_path)
|
|
@@ -220,11 +220,11 @@ def load(
|
|
|
220
220
|
|
|
221
221
|
def load_map(
|
|
222
222
|
import_names: Collection[str] | str,
|
|
223
|
-
) -> dict[str,
|
|
223
|
+
) -> dict[str, SimMapFunc[Any, Any, Any]]:
|
|
224
224
|
if isinstance(import_names, str):
|
|
225
225
|
import_names = [import_names]
|
|
226
226
|
|
|
227
|
-
retrievers: dict[str,
|
|
227
|
+
retrievers: dict[str, SimMapFunc] = {}
|
|
228
228
|
|
|
229
229
|
for import_path in import_names:
|
|
230
230
|
obj = load_python(import_path)
|
|
@@ -28,9 +28,10 @@ SimSeq = Sequence[SimType]
|
|
|
28
28
|
SimSeqOrMap = SimMap[KeyType, SimType] | SimSeq[SimType]
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
# Parameter names must match so that the signature can be inspected, do not add `/` here!
|
|
31
32
|
class SimMapFunc(Protocol[KeyType, ValueType_contra, SimType_cov]):
|
|
32
33
|
def __call__(
|
|
33
|
-
self, x_map: Mapping[KeyType, ValueType_contra], y: ValueType_contra
|
|
34
|
+
self, x_map: Mapping[KeyType, ValueType_contra], y: ValueType_contra
|
|
34
35
|
) -> SimMap[KeyType, SimType_cov]:
|
|
35
36
|
...
|
|
36
37
|
|
|
@@ -42,9 +43,8 @@ class SimSeqFunc(Protocol[ValueType_contra, SimType_cov]):
|
|
|
42
43
|
...
|
|
43
44
|
|
|
44
45
|
|
|
45
|
-
# Parameter names must match so that the signature can be inspected, do not add `/` here!
|
|
46
46
|
class SimPairFunc(Protocol[ValueType_contra, SimType_cov]):
|
|
47
|
-
def __call__(self, x: ValueType_contra, y: ValueType_contra) -> SimType_cov:
|
|
47
|
+
def __call__(self, x: ValueType_contra, y: ValueType_contra, /) -> SimType_cov:
|
|
48
48
|
...
|
|
49
49
|
|
|
50
50
|
|
|
@@ -54,8 +54,6 @@ AnySimFunc = (
|
|
|
54
54
|
| SimPairFunc[ValueType, SimType]
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
-
RetrieveFunc = SimMapFunc[KeyType, ValueType, SimType]
|
|
58
|
-
|
|
59
57
|
|
|
60
58
|
class AggregatorFunc(Protocol[KeyType, SimType_contra]):
|
|
61
59
|
def __call__(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "cbrkit"
|
|
3
|
-
version = "0.6.
|
|
3
|
+
version = "0.6.2"
|
|
4
4
|
description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI."
|
|
5
5
|
authors = ["Mirko Lenz <mirko@mirkolenz.com>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -52,13 +52,13 @@ sentence-transformers = { version = "^2.2", optional = true }
|
|
|
52
52
|
spacy = { version = "^3.7", optional = true }
|
|
53
53
|
torch = { version = "^2.1.1", optional = true }
|
|
54
54
|
transformers = { version = "^4.35", optional = true }
|
|
55
|
-
typer = { version = "
|
|
55
|
+
typer = { version = ">=0.9, <1.0", extras = ["all"], optional = true }
|
|
56
56
|
uvicorn = { version = ">=0.24, <1.0", optional = true, extras = ["standard"] }
|
|
57
|
-
xmltodict = "
|
|
57
|
+
xmltodict = ">=0.13, <1.0"
|
|
58
58
|
|
|
59
59
|
[tool.poetry.group.dev.dependencies]
|
|
60
60
|
pytest = "^8.0.0"
|
|
61
|
-
pytest-cov = "^
|
|
61
|
+
pytest-cov = "^5.0.0"
|
|
62
62
|
|
|
63
63
|
[tool.poetry.group.docs.dependencies]
|
|
64
64
|
pdoc = "^14.4"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|