cbrkit 0.19.2__tar.gz → 0.20.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cbrkit-0.19.2 → cbrkit-0.20.0}/PKG-INFO +77 -58
- {cbrkit-0.19.2 → cbrkit-0.20.0}/README.md +27 -15
- cbrkit-0.20.0/pyproject.toml +111 -0
- cbrkit-0.20.0/setup.cfg +4 -0
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/__init__.py +4 -0
- cbrkit-0.20.0/src/cbrkit/__main__.py +3 -0
- cbrkit-0.20.0/src/cbrkit/adapt/__init__.py +18 -0
- cbrkit-0.20.0/src/cbrkit/adapt/_attribute_value.py +90 -0
- cbrkit-0.20.0/src/cbrkit/adapt/generic.py +86 -0
- cbrkit-0.20.0/src/cbrkit/adapt/numbers.py +52 -0
- cbrkit-0.20.0/src/cbrkit/adapt/strings.py +103 -0
- cbrkit-0.20.0/src/cbrkit/api.py +127 -0
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/cli.py +34 -12
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/eval/_common.py +2 -2
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/eval/_retrieval.py +2 -3
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/helpers.py +87 -5
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/loaders.py +71 -103
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/retrieval.py +16 -65
- cbrkit-0.20.0/src/cbrkit/reuse.py +345 -0
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/__init__.py +1 -2
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/_aggregator.py +6 -5
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/_attribute_value.py +15 -26
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/collections.py +2 -2
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/generic.py +2 -2
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/_isomorphism.py +8 -7
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/numbers.py +2 -2
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/strings/__init__.py +7 -10
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/strings/taxonomy.py +3 -3
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/typing.py +36 -9
- cbrkit-0.20.0/src/cbrkit.egg-info/PKG-INFO +390 -0
- cbrkit-0.20.0/src/cbrkit.egg-info/SOURCES.txt +42 -0
- cbrkit-0.20.0/src/cbrkit.egg-info/dependency_links.txt +1 -0
- cbrkit-0.20.0/src/cbrkit.egg-info/entry_points.txt +2 -0
- cbrkit-0.20.0/src/cbrkit.egg-info/requires.txt +74 -0
- cbrkit-0.20.0/src/cbrkit.egg-info/top_level.txt +1 -0
- cbrkit-0.20.0/tests/test_cycle.py +51 -0
- cbrkit-0.20.0/tests/test_retrieve.py +159 -0
- cbrkit-0.20.0/tests/test_reuse.py +158 -0
- cbrkit-0.19.2/cbrkit/__main__.py +0 -3
- cbrkit-0.19.2/cbrkit/adaptation.py +0 -17
- cbrkit-0.19.2/cbrkit/api.py +0 -74
- cbrkit-0.19.2/pyproject.toml +0 -130
- {cbrkit-0.19.2 → cbrkit-0.20.0}/LICENSE +0 -0
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/eval/__init__.py +0 -0
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/py.typed +0 -0
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/__init__.py +0 -0
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/_astar.py +0 -0
- {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/_model.py +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cbrkit
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
Version: 0.20.0
|
|
4
|
+
Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
|
|
5
|
+
Author-email: Mirko Lenz <mirko@mirkolenz.com>
|
|
6
|
+
Project-URL: Repository, https://github.com/wi2trier/cbrkit
|
|
7
|
+
Project-URL: Documentation, https://wi2trier.github.io/cbrkit/
|
|
8
|
+
Project-URL: Issues, https://github.com/wi2trier/cbrkit/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/wi2trier/cbrkit/releases
|
|
7
10
|
Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
|
|
8
|
-
Author: Mirko Lenz
|
|
9
|
-
Author-email: mirko@mirkolenz.com
|
|
10
|
-
Requires-Python: >=3.12,<4.0
|
|
11
11
|
Classifier: Development Status :: 4 - Beta
|
|
12
12
|
Classifier: Environment :: Console
|
|
13
13
|
Classifier: Framework :: Pytest
|
|
@@ -16,56 +16,64 @@ Classifier: Intended Audience :: Science/Research
|
|
|
16
16
|
Classifier: License :: OSI Approved :: MIT License
|
|
17
17
|
Classifier: Natural Language :: English
|
|
18
18
|
Classifier: Operating System :: OS Independent
|
|
19
|
-
Classifier: Programming Language :: Python :: 3
|
|
20
19
|
Classifier: Programming Language :: Python :: 3.12
|
|
21
20
|
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: 3
|
|
22
22
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
23
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
24
24
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
25
|
Classifier: Topic :: Utilities
|
|
26
26
|
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.12
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Requires-Dist: immutables<1,>=0.21
|
|
31
|
+
Requires-Dist: orjson<4,>=3
|
|
32
|
+
Requires-Dist: polars<2,>=1
|
|
33
|
+
Requires-Dist: pyyaml<7,>=6
|
|
34
|
+
Requires-Dist: xmltodict<1,>=0.13
|
|
27
35
|
Provides-Extra: all
|
|
36
|
+
Requires-Dist: cbrkit[api,cli,eval,graphs,llm,nlp,timeseries,transformers]; extra == "all"
|
|
37
|
+
Requires-Dist: numpy<2,>=1; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "all"
|
|
38
|
+
Requires-Dist: numpy<3,>=2; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "all"
|
|
39
|
+
Requires-Dist: numpy<3,>=2; sys_platform == "linux" and extra == "all"
|
|
40
|
+
Requires-Dist: pandas<3,>=2; extra == "all"
|
|
41
|
+
Requires-Dist: pydantic<3,>=2; extra == "all"
|
|
42
|
+
Requires-Dist: scipy<2,>=1; extra == "all"
|
|
28
43
|
Provides-Extra: api
|
|
44
|
+
Requires-Dist: cbrkit[cli]; extra == "api"
|
|
45
|
+
Requires-Dist: fastapi<1,>=0.100; extra == "api"
|
|
46
|
+
Requires-Dist: pydantic-settings<3,>=2; extra == "api"
|
|
47
|
+
Requires-Dist: uvicorn[standard]<1,>=0.30; extra == "api"
|
|
29
48
|
Provides-Extra: cli
|
|
49
|
+
Requires-Dist: rich<14,>=13; extra == "cli"
|
|
50
|
+
Requires-Dist: typer<1,>=0.9; extra == "cli"
|
|
30
51
|
Provides-Extra: eval
|
|
52
|
+
Requires-Dist: ranx<1,>=0.3; extra == "eval"
|
|
53
|
+
Provides-Extra: graphs
|
|
54
|
+
Requires-Dist: networkx<4,>=3; extra == "graphs"
|
|
55
|
+
Requires-Dist: rustworkx<1,>=0.15; extra == "graphs"
|
|
31
56
|
Provides-Extra: llm
|
|
57
|
+
Requires-Dist: cohere<6,>=5; extra == "llm"
|
|
58
|
+
Requires-Dist: ollama<1,>=0.3; extra == "llm"
|
|
59
|
+
Requires-Dist: openai<2,>=1; extra == "llm"
|
|
32
60
|
Provides-Extra: nlp
|
|
61
|
+
Requires-Dist: levenshtein<0.26,>=0.23; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "nlp"
|
|
62
|
+
Requires-Dist: levenshtein<1,>=0.26; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "nlp"
|
|
63
|
+
Requires-Dist: levenshtein<1,>=0.26; sys_platform == "linux" and extra == "nlp"
|
|
64
|
+
Requires-Dist: nltk<4,>=3; extra == "nlp"
|
|
65
|
+
Requires-Dist: spacy<3.8,>=3.7; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "nlp"
|
|
66
|
+
Requires-Dist: spacy<4,>=3.8; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "nlp"
|
|
67
|
+
Requires-Dist: spacy<4,>=3.8; sys_platform == "linux" and extra == "nlp"
|
|
33
68
|
Provides-Extra: timeseries
|
|
69
|
+
Requires-Dist: minineedle<4,>=3; extra == "timeseries"
|
|
70
|
+
Requires-Dist: dtaidistance<3,>=2; extra == "timeseries"
|
|
34
71
|
Provides-Extra: transformers
|
|
35
|
-
Requires-Dist:
|
|
36
|
-
Requires-Dist:
|
|
37
|
-
Requires-Dist:
|
|
38
|
-
Requires-Dist:
|
|
39
|
-
Requires-Dist:
|
|
40
|
-
Requires-Dist: levenshtein (>=0.23,<1) ; (sys_platform == "darwin" and platform_machine == "arm64") and (extra == "all" or extra == "nlp")
|
|
41
|
-
Requires-Dist: levenshtein (>=0.23,<1) ; (sys_platform == "linux") and (extra == "all" or extra == "nlp")
|
|
42
|
-
Requires-Dist: minineedle (>=3.1,<4.0) ; extra == "all" or extra == "timeseries"
|
|
43
|
-
Requires-Dist: networkx (>=3.0,<4.0) ; extra == "all"
|
|
44
|
-
Requires-Dist: nltk (>=3.8,<4.0) ; extra == "all" or extra == "nlp"
|
|
45
|
-
Requires-Dist: numpy (>=1.26,<3.0) ; extra == "all"
|
|
46
|
-
Requires-Dist: ollama (>=0.3,<1) ; extra == "all" or extra == "llm"
|
|
47
|
-
Requires-Dist: openai (>=1.50,<2.0) ; extra == "all" or extra == "llm"
|
|
48
|
-
Requires-Dist: orjson (>=3.9,<4.0)
|
|
49
|
-
Requires-Dist: pandas (>=2.1,<3.0)
|
|
50
|
-
Requires-Dist: polars (>=1.5,<2.0) ; extra == "all"
|
|
51
|
-
Requires-Dist: pyarrow (>=13.0)
|
|
52
|
-
Requires-Dist: pydantic (>=2.0,<3.0)
|
|
53
|
-
Requires-Dist: pyyaml (>=6.0,<7.0)
|
|
54
|
-
Requires-Dist: ranx (>=0.3,<1.0) ; extra == "all" or extra == "eval"
|
|
55
|
-
Requires-Dist: rich (>=13.7,<14.0) ; extra == "all" or extra == "api" or extra == "cli"
|
|
56
|
-
Requires-Dist: rustworkx (>=0.15,<1.0)
|
|
57
|
-
Requires-Dist: scipy (>=1.12,<2.0) ; extra == "all"
|
|
58
|
-
Requires-Dist: sentence-transformers (>=2.2,<4.0) ; extra == "all" or extra == "transformers"
|
|
59
|
-
Requires-Dist: spacy (>=3.8,<4.0) ; extra == "all" or extra == "nlp"
|
|
60
|
-
Requires-Dist: torch (>=2.2,<2.3) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "all" or extra == "transformers")
|
|
61
|
-
Requires-Dist: torch (>=2.2,<3.0) ; (sys_platform == "darwin" and platform_machine == "arm64") and (extra == "all" or extra == "transformers")
|
|
62
|
-
Requires-Dist: torch (>=2.2,<3.0) ; (sys_platform == "linux") and (extra == "all" or extra == "transformers")
|
|
63
|
-
Requires-Dist: transformers (>=4.35,<5.0) ; extra == "all" or extra == "transformers"
|
|
64
|
-
Requires-Dist: typer (>=0.9,<1.0) ; extra == "all" or extra == "api" or extra == "cli"
|
|
65
|
-
Requires-Dist: uvicorn[standard] (>=0.24,<1.0) ; extra == "all" or extra == "api"
|
|
66
|
-
Requires-Dist: xmltodict (>=0.13,<1.0)
|
|
67
|
-
Project-URL: Repository, https://github.com/wi2trier/cbrkit
|
|
68
|
-
Description-Content-Type: text/markdown
|
|
72
|
+
Requires-Dist: sentence-transformers<4,>=3; extra == "transformers"
|
|
73
|
+
Requires-Dist: torch<2.3,>=2.2; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "transformers"
|
|
74
|
+
Requires-Dist: torch<3,>=2.5; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "transformers"
|
|
75
|
+
Requires-Dist: torch<3,>=2.5; sys_platform == "linux" and extra == "transformers"
|
|
76
|
+
Requires-Dist: transformers<5,>=4; extra == "transformers"
|
|
69
77
|
|
|
70
78
|
<!-- markdownlint-disable MD033 MD041 -->
|
|
71
79
|
<h2><p align="center">CBRkit</p></h2>
|
|
@@ -109,6 +117,8 @@ The following modules are part of CBRkit:
|
|
|
109
117
|
- `cbrkit.loaders`: Functions for loading cases and queries.
|
|
110
118
|
- `cbrkit.sim`: Similarity generator functions for common data types like strings and numbers.
|
|
111
119
|
- `cbrkit.retrieval`: Functions for defining and applying retrieval pipelines.
|
|
120
|
+
- `cbrkit.adapt`: Adaptation generator functions for adapting cases based on a query.
|
|
121
|
+
- `cbrkit.reuse`: Functions for defining and applying reuse pipelines.
|
|
112
122
|
- `cbrkit.typing`: Generic type definitions for defining custom functions.
|
|
113
123
|
|
|
114
124
|
## Installation
|
|
@@ -127,25 +137,28 @@ pip install cbrkit[EXTRA_NAME,...]
|
|
|
127
137
|
|
|
128
138
|
where `EXTRA_NAME` is one of the following:
|
|
129
139
|
|
|
140
|
+
- `all`: All optional dependencies
|
|
141
|
+
- `api`: REST API Server
|
|
142
|
+
- `cli`: Command Line Interface (CLI)
|
|
143
|
+
- `eval`: Evaluation tools for common metrics like `precision` and `recall`
|
|
144
|
+
- `llm`: Large Language Models (LLM) APIs like Ollama and OpenAI
|
|
130
145
|
- `nlp`: Standalone NLP tools `levenshtein`, `nltk`, `openai`, and `spacy`
|
|
146
|
+
- `timeseries`: Time series similarity measures like `dtw` and `smith_waterman`
|
|
131
147
|
- `transformers`: Advanced NLP tools based on `pytorch` and `transformers`
|
|
132
|
-
- `cli`: Command Line Interface (CLI)
|
|
133
|
-
- `api`: REST API Server
|
|
134
|
-
- `all`: All of the above
|
|
135
148
|
|
|
136
149
|
## Loading Cases
|
|
137
150
|
|
|
138
151
|
The first step is to load cases and queries.
|
|
139
152
|
We provide predefined functions for the most common formats like CSV, JSON, and XML.
|
|
140
|
-
Additionally, CBRkit also integrates with `pandas` for loading data frames.
|
|
141
|
-
The following example shows how to load cases and queries from a CSV file using `
|
|
153
|
+
Additionally, CBRkit also integrates with `polars` and `pandas` for loading data frames.
|
|
154
|
+
The following example shows how to load cases and queries from a CSV file using `polars`:
|
|
142
155
|
|
|
143
156
|
```python
|
|
144
|
-
import
|
|
157
|
+
import polars as pl
|
|
145
158
|
import cbrkit
|
|
146
159
|
|
|
147
|
-
df =
|
|
148
|
-
casebase = cbrkit.loaders.
|
|
160
|
+
df = pl.read_csv("path/to/cases.csv")
|
|
161
|
+
casebase = cbrkit.loaders.polars(df)
|
|
149
162
|
```
|
|
150
163
|
|
|
151
164
|
When dealing with formats like JSON, the files can be loaded directly:
|
|
@@ -160,17 +173,14 @@ CBRkit expects the type of the queries to match the type of the cases.
|
|
|
160
173
|
You may define a single query directly in Python as follows
|
|
161
174
|
|
|
162
175
|
```python
|
|
163
|
-
# for pandas
|
|
164
|
-
query = pd.Series({"name": "John", "age": 25})
|
|
165
|
-
# for json
|
|
166
176
|
query = {"name": "John", "age": 25}
|
|
167
177
|
```
|
|
168
178
|
|
|
169
179
|
If you have a collection of queries, you can load them using the same loader functions as for the cases.
|
|
170
180
|
|
|
171
181
|
```python
|
|
172
|
-
# for
|
|
173
|
-
queries = cbrkit.loaders.
|
|
182
|
+
# for polars
|
|
183
|
+
queries = cbrkit.loaders.polars(pl.read_csv("path/to/queries.csv"))
|
|
174
184
|
# for json
|
|
175
185
|
queries = cbrkit.loaders.json("path/to/queries.json")
|
|
176
186
|
```
|
|
@@ -294,8 +304,6 @@ Our result has the following attributes:
|
|
|
294
304
|
- `ranking` A list of case indices sorted by their similarity score.
|
|
295
305
|
- `casebase` The casebase containing only the retrieved cases (useful for downstream tasks).
|
|
296
306
|
|
|
297
|
-
## Combining Multiple Retrieval Pipelines
|
|
298
|
-
|
|
299
307
|
In some cases, it is useful to combine multiple retrieval pipelines, for example when applying the MAC/FAC pattern where a cheap pre-filter is applied to the whole casebase before a more expensive similarity measure is applied on the remaining cases.
|
|
300
308
|
To use this pattern, first create the corresponding retrievers using the builder:
|
|
301
309
|
|
|
@@ -318,6 +326,18 @@ The result has the following two attributes:
|
|
|
318
326
|
Both `final` and each entry in `steps` have the same attributes as discussed previously.
|
|
319
327
|
The returned result also has these entries which are an alias for the corresponding entries in `final` (i.e., `result.ranking == result.final.ranking`).
|
|
320
328
|
|
|
329
|
+
## Adaptation Functions
|
|
330
|
+
|
|
331
|
+
Coming soon...
|
|
332
|
+
|
|
333
|
+
## Reuse
|
|
334
|
+
|
|
335
|
+
Coming soon...
|
|
336
|
+
|
|
337
|
+
## Evaluation
|
|
338
|
+
|
|
339
|
+
Coming soon...
|
|
340
|
+
|
|
321
341
|
## REST API and CLI
|
|
322
342
|
|
|
323
343
|
In order to use the built-in API and CLI, you need to define a retriever in a Python module using the function `cbrkit.retrieval.build()`.
|
|
@@ -368,4 +388,3 @@ It offers a single endpoint `/retrieve` that accepts POST requests with a JSON b
|
|
|
368
388
|
```
|
|
369
389
|
|
|
370
390
|
The server will return a JSON object containing the retrieval results for each query.
|
|
371
|
-
|
|
@@ -40,6 +40,8 @@ The following modules are part of CBRkit:
|
|
|
40
40
|
- `cbrkit.loaders`: Functions for loading cases and queries.
|
|
41
41
|
- `cbrkit.sim`: Similarity generator functions for common data types like strings and numbers.
|
|
42
42
|
- `cbrkit.retrieval`: Functions for defining and applying retrieval pipelines.
|
|
43
|
+
- `cbrkit.adapt`: Adaptation generator functions for adapting cases based on a query.
|
|
44
|
+
- `cbrkit.reuse`: Functions for defining and applying reuse pipelines.
|
|
43
45
|
- `cbrkit.typing`: Generic type definitions for defining custom functions.
|
|
44
46
|
|
|
45
47
|
## Installation
|
|
@@ -58,25 +60,28 @@ pip install cbrkit[EXTRA_NAME,...]
|
|
|
58
60
|
|
|
59
61
|
where `EXTRA_NAME` is one of the following:
|
|
60
62
|
|
|
63
|
+
- `all`: All optional dependencies
|
|
64
|
+
- `api`: REST API Server
|
|
65
|
+
- `cli`: Command Line Interface (CLI)
|
|
66
|
+
- `eval`: Evaluation tools for common metrics like `precision` and `recall`
|
|
67
|
+
- `llm`: Large Language Models (LLM) APIs like Ollama and OpenAI
|
|
61
68
|
- `nlp`: Standalone NLP tools `levenshtein`, `nltk`, `openai`, and `spacy`
|
|
69
|
+
- `timeseries`: Time series similarity measures like `dtw` and `smith_waterman`
|
|
62
70
|
- `transformers`: Advanced NLP tools based on `pytorch` and `transformers`
|
|
63
|
-
- `cli`: Command Line Interface (CLI)
|
|
64
|
-
- `api`: REST API Server
|
|
65
|
-
- `all`: All of the above
|
|
66
71
|
|
|
67
72
|
## Loading Cases
|
|
68
73
|
|
|
69
74
|
The first step is to load cases and queries.
|
|
70
75
|
We provide predefined functions for the most common formats like CSV, JSON, and XML.
|
|
71
|
-
Additionally, CBRkit also integrates with `pandas` for loading data frames.
|
|
72
|
-
The following example shows how to load cases and queries from a CSV file using `
|
|
76
|
+
Additionally, CBRkit also integrates with `polars` and `pandas` for loading data frames.
|
|
77
|
+
The following example shows how to load cases and queries from a CSV file using `polars`:
|
|
73
78
|
|
|
74
79
|
```python
|
|
75
|
-
import
|
|
80
|
+
import polars as pl
|
|
76
81
|
import cbrkit
|
|
77
82
|
|
|
78
|
-
df =
|
|
79
|
-
casebase = cbrkit.loaders.
|
|
83
|
+
df = pl.read_csv("path/to/cases.csv")
|
|
84
|
+
casebase = cbrkit.loaders.polars(df)
|
|
80
85
|
```
|
|
81
86
|
|
|
82
87
|
When dealing with formats like JSON, the files can be loaded directly:
|
|
@@ -91,17 +96,14 @@ CBRkit expects the type of the queries to match the type of the cases.
|
|
|
91
96
|
You may define a single query directly in Python as follows
|
|
92
97
|
|
|
93
98
|
```python
|
|
94
|
-
# for pandas
|
|
95
|
-
query = pd.Series({"name": "John", "age": 25})
|
|
96
|
-
# for json
|
|
97
99
|
query = {"name": "John", "age": 25}
|
|
98
100
|
```
|
|
99
101
|
|
|
100
102
|
If you have a collection of queries, you can load them using the same loader functions as for the cases.
|
|
101
103
|
|
|
102
104
|
```python
|
|
103
|
-
# for
|
|
104
|
-
queries = cbrkit.loaders.
|
|
105
|
+
# for polars
|
|
106
|
+
queries = cbrkit.loaders.polars(pl.read_csv("path/to/queries.csv"))
|
|
105
107
|
# for json
|
|
106
108
|
queries = cbrkit.loaders.json("path/to/queries.json")
|
|
107
109
|
```
|
|
@@ -225,8 +227,6 @@ Our result has the following attributes:
|
|
|
225
227
|
- `ranking` A list of case indices sorted by their similarity score.
|
|
226
228
|
- `casebase` The casebase containing only the retrieved cases (useful for downstream tasks).
|
|
227
229
|
|
|
228
|
-
## Combining Multiple Retrieval Pipelines
|
|
229
|
-
|
|
230
230
|
In some cases, it is useful to combine multiple retrieval pipelines, for example when applying the MAC/FAC pattern where a cheap pre-filter is applied to the whole casebase before a more expensive similarity measure is applied on the remaining cases.
|
|
231
231
|
To use this pattern, first create the corresponding retrievers using the builder:
|
|
232
232
|
|
|
@@ -249,6 +249,18 @@ The result has the following two attributes:
|
|
|
249
249
|
Both `final` and each entry in `steps` have the same attributes as discussed previously.
|
|
250
250
|
The returned result also has these entries which are an alias for the corresponding entries in `final` (i.e., `result.ranking == result.final.ranking`).
|
|
251
251
|
|
|
252
|
+
## Adaptation Functions
|
|
253
|
+
|
|
254
|
+
Coming soon...
|
|
255
|
+
|
|
256
|
+
## Reuse
|
|
257
|
+
|
|
258
|
+
Coming soon...
|
|
259
|
+
|
|
260
|
+
## Evaluation
|
|
261
|
+
|
|
262
|
+
Coming soon...
|
|
263
|
+
|
|
252
264
|
## REST API and CLI
|
|
253
265
|
|
|
254
266
|
In order to use the built-in API and CLI, you need to define a retriever in a Python module using the function `cbrkit.retrieval.build()`.
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "cbrkit"
|
|
3
|
+
version = "0.20.0"
|
|
4
|
+
description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
|
|
5
|
+
authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
keywords = [
|
|
8
|
+
"cbr",
|
|
9
|
+
"case-based reasoning",
|
|
10
|
+
"api",
|
|
11
|
+
"similarity",
|
|
12
|
+
"nlp",
|
|
13
|
+
"retrieval",
|
|
14
|
+
"cli",
|
|
15
|
+
"tool",
|
|
16
|
+
"library",
|
|
17
|
+
]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Development Status :: 4 - Beta",
|
|
20
|
+
"Environment :: Console",
|
|
21
|
+
"Framework :: Pytest",
|
|
22
|
+
"Intended Audience :: Developers",
|
|
23
|
+
"Intended Audience :: Science/Research",
|
|
24
|
+
"License :: OSI Approved :: MIT License",
|
|
25
|
+
"Natural Language :: English",
|
|
26
|
+
"Operating System :: OS Independent",
|
|
27
|
+
"Programming Language :: Python :: 3.12",
|
|
28
|
+
"Programming Language :: Python :: 3.13",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
31
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
32
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
33
|
+
"Topic :: Utilities",
|
|
34
|
+
"Typing :: Typed",
|
|
35
|
+
]
|
|
36
|
+
requires-python = ">=3.12"
|
|
37
|
+
dependencies = [
|
|
38
|
+
"immutables>=0.21,<1",
|
|
39
|
+
"orjson>=3,<4",
|
|
40
|
+
"polars>=1,<2",
|
|
41
|
+
"pyyaml>=6,<7",
|
|
42
|
+
"xmltodict>=0.13,<1",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.optional-dependencies]
|
|
46
|
+
all = [
|
|
47
|
+
"cbrkit[api,cli,eval,graphs,llm,nlp,timeseries,transformers]",
|
|
48
|
+
"numpy>=1,<2; sys_platform == 'darwin' and platform_machine == 'x86_64'",
|
|
49
|
+
"numpy>=2,<3; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
|
50
|
+
"numpy>=2,<3; sys_platform == 'linux'",
|
|
51
|
+
"pandas>=2,<3",
|
|
52
|
+
"pydantic>=2,<3",
|
|
53
|
+
"scipy>=1,<2",
|
|
54
|
+
]
|
|
55
|
+
api = [
|
|
56
|
+
"cbrkit[cli]",
|
|
57
|
+
"fastapi>=0.100,<1",
|
|
58
|
+
"pydantic-settings>=2,<3",
|
|
59
|
+
"uvicorn[standard]>=0.30,<1",
|
|
60
|
+
]
|
|
61
|
+
cli = ["rich>=13,<14", "typer>=0.9,<1"]
|
|
62
|
+
eval = ["ranx>=0.3,<1"]
|
|
63
|
+
graphs = ["networkx>=3,<4", "rustworkx>=0.15,<1"]
|
|
64
|
+
llm = ["cohere>=5,<6", "ollama>=0.3,<1", "openai>=1,<2"]
|
|
65
|
+
nlp = [
|
|
66
|
+
"levenshtein>=0.23,<0.26; sys_platform == 'darwin' and platform_machine == 'x86_64'",
|
|
67
|
+
"levenshtein>=0.26,<1; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
|
68
|
+
"levenshtein>=0.26,<1; sys_platform == 'linux'",
|
|
69
|
+
"nltk>=3,<4",
|
|
70
|
+
"spacy>=3.7,<3.8; sys_platform == 'darwin' and platform_machine == 'x86_64'",
|
|
71
|
+
"spacy>=3.8,<4; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
|
72
|
+
"spacy>=3.8,<4; sys_platform == 'linux'",
|
|
73
|
+
]
|
|
74
|
+
timeseries = ["minineedle>=3,<4", "dtaidistance>=2,<3"]
|
|
75
|
+
transformers = [
|
|
76
|
+
"sentence-transformers>=3,<4",
|
|
77
|
+
"torch>=2.2,<2.3; sys_platform == 'darwin' and platform_machine == 'x86_64'",
|
|
78
|
+
"torch>=2.5,<3; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
|
79
|
+
"torch>=2.5,<3; sys_platform == 'linux'",
|
|
80
|
+
"transformers>=4,<5",
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
[project.urls]
|
|
84
|
+
Repository = "https://github.com/wi2trier/cbrkit"
|
|
85
|
+
Documentation = "https://wi2trier.github.io/cbrkit/"
|
|
86
|
+
Issues = "https://github.com/wi2trier/cbrkit/issues"
|
|
87
|
+
Changelog = "https://github.com/wi2trier/cbrkit/releases"
|
|
88
|
+
|
|
89
|
+
[project.scripts]
|
|
90
|
+
cbrkit = "cbrkit.cli:app"
|
|
91
|
+
|
|
92
|
+
[dependency-groups]
|
|
93
|
+
test = ["pytest>=8,<9", "pytest-cov>=6,<7"]
|
|
94
|
+
docs = ["pdoc>=15,<16"]
|
|
95
|
+
|
|
96
|
+
[build-system]
|
|
97
|
+
requires = ["setuptools>=61"]
|
|
98
|
+
build-backend = "setuptools.build_meta"
|
|
99
|
+
|
|
100
|
+
[tool.pytest.ini_options]
|
|
101
|
+
addopts = "--cov cbrkit --cov-report term-missing --doctest-modules --ignore data --ignore examples --ignore result"
|
|
102
|
+
doctest_optionflags = "NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS"
|
|
103
|
+
|
|
104
|
+
[tool.uv]
|
|
105
|
+
default-groups = ["test", "docs"]
|
|
106
|
+
|
|
107
|
+
[tool.ruff]
|
|
108
|
+
target-version = "py312"
|
|
109
|
+
|
|
110
|
+
[tool.ruff.lint.pydocstyle]
|
|
111
|
+
convention = "google"
|
cbrkit-0.20.0/setup.cfg
ADDED
|
@@ -6,10 +6,12 @@
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from . import (
|
|
9
|
+
adapt,
|
|
9
10
|
eval,
|
|
10
11
|
helpers,
|
|
11
12
|
loaders,
|
|
12
13
|
retrieval,
|
|
14
|
+
reuse,
|
|
13
15
|
sim,
|
|
14
16
|
typing,
|
|
15
17
|
)
|
|
@@ -18,6 +20,8 @@ __all__ = [
|
|
|
18
20
|
"loaders",
|
|
19
21
|
"sim",
|
|
20
22
|
"retrieval",
|
|
23
|
+
"adapt",
|
|
24
|
+
"reuse",
|
|
21
25
|
"eval",
|
|
22
26
|
"typing",
|
|
23
27
|
"helpers",
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CBRkit contains a selection of adaptation functions for different data types.
|
|
3
|
+
Besides functions for standard data types like
|
|
4
|
+
numbers (`cbrkit.adapt.numbers`),
|
|
5
|
+
strings (`cbrkit.adapt.strings`),
|
|
6
|
+
and generic data (`cbrkit.adapt.generic`),
|
|
7
|
+
there is also a function for attribute-value data.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from . import generic, numbers, strings
|
|
11
|
+
from ._attribute_value import attribute_value
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"generic",
|
|
15
|
+
"strings",
|
|
16
|
+
"numbers",
|
|
17
|
+
"attribute_value",
|
|
18
|
+
]
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from collections.abc import Callable, Mapping, Sequence
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any, override
|
|
4
|
+
|
|
5
|
+
from ..helpers import get_metadata
|
|
6
|
+
from ..typing import (
|
|
7
|
+
AdaptPairFunc,
|
|
8
|
+
JsonDict,
|
|
9
|
+
SupportsMetadata,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = ["attribute_value"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def default_value_getter(obj: Any, key: Any) -> Any:
|
|
16
|
+
if hasattr(obj, "__getitem__"):
|
|
17
|
+
return obj[key]
|
|
18
|
+
else:
|
|
19
|
+
return getattr(obj, key)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def default_value_setter(obj: Any, key: Any, value: Any) -> None:
|
|
23
|
+
if hasattr(obj, "__setitem__"):
|
|
24
|
+
obj[key] = value
|
|
25
|
+
else:
|
|
26
|
+
setattr(obj, key, value)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(slots=True, frozen=True)
|
|
30
|
+
class attribute_value[V](AdaptPairFunc[V], SupportsMetadata):
|
|
31
|
+
"""Adapt values of attributes using specified adaptation functions.
|
|
32
|
+
|
|
33
|
+
This class allows for the adaptation of multiple attributes of a case by applying
|
|
34
|
+
one or more adaptation functions to each attribute. It supports different data structures
|
|
35
|
+
like mappings (dictionaries) and dataframes
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
attributes: A mapping of attribute names to either single adaptation functions or
|
|
39
|
+
sequences of adaptation functions that will be applied in order.
|
|
40
|
+
value_getter: Function to retrieve values from objects. Defaults to dictionary/attribute access.
|
|
41
|
+
value_setter: Function to set values on objects. Defaults to dictionary/attribute assignment.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
A new case with adapted attribute values.
|
|
45
|
+
|
|
46
|
+
Examples:
|
|
47
|
+
>>> func = attribute_value({
|
|
48
|
+
... "name": lambda x, y: x if x == y else y,
|
|
49
|
+
... "age": lambda x, y: x if x > y else y,
|
|
50
|
+
... })
|
|
51
|
+
>>> result = func(
|
|
52
|
+
... {"name": "Alice", "age": 30},
|
|
53
|
+
... {"name": "Peter", "age": 25}
|
|
54
|
+
... )
|
|
55
|
+
>>> result
|
|
56
|
+
{'name': 'Peter', 'age': 30}
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
attributes: Mapping[str, AdaptPairFunc[Any] | Sequence[AdaptPairFunc[Any]]]
|
|
60
|
+
value_getter: Callable[[Any, str], Any] = default_value_getter
|
|
61
|
+
value_setter: Callable[[Any, str, Any], None] = default_value_setter
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
@override
|
|
65
|
+
def metadata(self) -> JsonDict:
|
|
66
|
+
return {
|
|
67
|
+
"attributes": {
|
|
68
|
+
key: get_metadata(value) for key, value in self.attributes.items()
|
|
69
|
+
},
|
|
70
|
+
"value_getter": get_metadata(self.value_getter),
|
|
71
|
+
"value_setter": get_metadata(self.value_setter),
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
@override
|
|
75
|
+
def __call__(self, case: V, query: V) -> V:
|
|
76
|
+
for attr_name in self.attributes:
|
|
77
|
+
adapt_funcs = self.attributes[attr_name]
|
|
78
|
+
|
|
79
|
+
if not isinstance(adapt_funcs, Sequence):
|
|
80
|
+
adapt_funcs = [adapt_funcs]
|
|
81
|
+
|
|
82
|
+
case_attr_value = self.value_getter(case, attr_name)
|
|
83
|
+
query_attr_value = self.value_getter(query, attr_name)
|
|
84
|
+
|
|
85
|
+
for adapt_func in adapt_funcs:
|
|
86
|
+
case_attr_value = adapt_func(case_attr_value, query_attr_value)
|
|
87
|
+
|
|
88
|
+
self.value_setter(case, attr_name, case_attr_value)
|
|
89
|
+
|
|
90
|
+
return case
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Literal, override
|
|
4
|
+
|
|
5
|
+
from ..helpers import get_metadata
|
|
6
|
+
from ..typing import AdaptPairFunc, JsonDict, SupportsMetadata
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"pipe",
|
|
10
|
+
"null",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True, frozen=True)
|
|
15
|
+
class pipe[V](AdaptPairFunc[V], SupportsMetadata):
|
|
16
|
+
"""Chain multiple adaptation functions together.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
functions: List of adaptation functions to apply in order.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
The adapted value.
|
|
23
|
+
|
|
24
|
+
Examples:
|
|
25
|
+
>>> func = pipe([
|
|
26
|
+
... lambda x, y: x + y,
|
|
27
|
+
... lambda x, y: x * y,
|
|
28
|
+
... ])
|
|
29
|
+
>>> func(2, 3)
|
|
30
|
+
15
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
functions: list[AdaptPairFunc[V]]
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
@override
|
|
37
|
+
def metadata(self) -> JsonDict:
|
|
38
|
+
return {
|
|
39
|
+
"functions": [get_metadata(func) for func in self.functions],
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
@override
|
|
43
|
+
def __call__(self, case: V, query: V) -> V:
|
|
44
|
+
current_case = case
|
|
45
|
+
|
|
46
|
+
for func in self.functions:
|
|
47
|
+
current_case = func(current_case, query)
|
|
48
|
+
|
|
49
|
+
return current_case
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(slots=True, frozen=True)
|
|
53
|
+
class null[V](AdaptPairFunc[V], SupportsMetadata):
|
|
54
|
+
"""Perform a null adaptation and return the original case or query value.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
select: Either "case" or "query".
|
|
58
|
+
copy: Whether to copy the value before returning it.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
The original case value.
|
|
62
|
+
|
|
63
|
+
Examples:
|
|
64
|
+
>>> func = null()
|
|
65
|
+
>>> func(2, 3)
|
|
66
|
+
2
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
target: Literal["case", "query"] = "case"
|
|
70
|
+
copy: bool = False
|
|
71
|
+
|
|
72
|
+
@override
|
|
73
|
+
def __call__(self, case: V, query: V) -> V:
|
|
74
|
+
value: V
|
|
75
|
+
|
|
76
|
+
if self.target == "case":
|
|
77
|
+
value = case
|
|
78
|
+
elif self.target == "query":
|
|
79
|
+
value = query
|
|
80
|
+
else:
|
|
81
|
+
raise ValueError(f"Invalid target value: {self.target}")
|
|
82
|
+
|
|
83
|
+
if self.copy:
|
|
84
|
+
value = deepcopy(value)
|
|
85
|
+
|
|
86
|
+
return value
|