cbrkit 0.19.2__tar.gz → 0.20.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {cbrkit-0.19.2 → cbrkit-0.20.0}/PKG-INFO +77 -58
  2. {cbrkit-0.19.2 → cbrkit-0.20.0}/README.md +27 -15
  3. cbrkit-0.20.0/pyproject.toml +111 -0
  4. cbrkit-0.20.0/setup.cfg +4 -0
  5. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/__init__.py +4 -0
  6. cbrkit-0.20.0/src/cbrkit/__main__.py +3 -0
  7. cbrkit-0.20.0/src/cbrkit/adapt/__init__.py +18 -0
  8. cbrkit-0.20.0/src/cbrkit/adapt/_attribute_value.py +90 -0
  9. cbrkit-0.20.0/src/cbrkit/adapt/generic.py +86 -0
  10. cbrkit-0.20.0/src/cbrkit/adapt/numbers.py +52 -0
  11. cbrkit-0.20.0/src/cbrkit/adapt/strings.py +103 -0
  12. cbrkit-0.20.0/src/cbrkit/api.py +127 -0
  13. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/cli.py +34 -12
  14. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/eval/_common.py +2 -2
  15. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/eval/_retrieval.py +2 -3
  16. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/helpers.py +87 -5
  17. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/loaders.py +71 -103
  18. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/retrieval.py +16 -65
  19. cbrkit-0.20.0/src/cbrkit/reuse.py +345 -0
  20. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/__init__.py +1 -2
  21. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/_aggregator.py +6 -5
  22. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/_attribute_value.py +15 -26
  23. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/collections.py +2 -2
  24. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/generic.py +2 -2
  25. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/_isomorphism.py +8 -7
  26. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/numbers.py +2 -2
  27. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/strings/__init__.py +7 -10
  28. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/strings/taxonomy.py +3 -3
  29. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/typing.py +36 -9
  30. cbrkit-0.20.0/src/cbrkit.egg-info/PKG-INFO +390 -0
  31. cbrkit-0.20.0/src/cbrkit.egg-info/SOURCES.txt +42 -0
  32. cbrkit-0.20.0/src/cbrkit.egg-info/dependency_links.txt +1 -0
  33. cbrkit-0.20.0/src/cbrkit.egg-info/entry_points.txt +2 -0
  34. cbrkit-0.20.0/src/cbrkit.egg-info/requires.txt +74 -0
  35. cbrkit-0.20.0/src/cbrkit.egg-info/top_level.txt +1 -0
  36. cbrkit-0.20.0/tests/test_cycle.py +51 -0
  37. cbrkit-0.20.0/tests/test_retrieve.py +159 -0
  38. cbrkit-0.20.0/tests/test_reuse.py +158 -0
  39. cbrkit-0.19.2/cbrkit/__main__.py +0 -3
  40. cbrkit-0.19.2/cbrkit/adaptation.py +0 -17
  41. cbrkit-0.19.2/cbrkit/api.py +0 -74
  42. cbrkit-0.19.2/pyproject.toml +0 -130
  43. {cbrkit-0.19.2 → cbrkit-0.20.0}/LICENSE +0 -0
  44. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/eval/__init__.py +0 -0
  45. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/py.typed +0 -0
  46. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/__init__.py +0 -0
  47. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/_astar.py +0 -0
  48. {cbrkit-0.19.2 → cbrkit-0.20.0/src}/cbrkit/sim/graphs/_model.py +0 -0
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cbrkit
3
- Version: 0.19.2
4
- Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
5
- Home-page: https://wi2trier.github.io/cbrkit/
6
- License: MIT
3
+ Version: 0.20.0
4
+ Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI
5
+ Author-email: Mirko Lenz <mirko@mirkolenz.com>
6
+ Project-URL: Repository, https://github.com/wi2trier/cbrkit
7
+ Project-URL: Documentation, https://wi2trier.github.io/cbrkit/
8
+ Project-URL: Issues, https://github.com/wi2trier/cbrkit/issues
9
+ Project-URL: Changelog, https://github.com/wi2trier/cbrkit/releases
7
10
  Keywords: cbr,case-based reasoning,api,similarity,nlp,retrieval,cli,tool,library
8
- Author: Mirko Lenz
9
- Author-email: mirko@mirkolenz.com
10
- Requires-Python: >=3.12,<4.0
11
11
  Classifier: Development Status :: 4 - Beta
12
12
  Classifier: Environment :: Console
13
13
  Classifier: Framework :: Pytest
@@ -16,56 +16,64 @@ Classifier: Intended Audience :: Science/Research
16
16
  Classifier: License :: OSI Approved :: MIT License
17
17
  Classifier: Natural Language :: English
18
18
  Classifier: Operating System :: OS Independent
19
- Classifier: Programming Language :: Python :: 3
20
19
  Classifier: Programming Language :: Python :: 3.12
21
20
  Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: 3
22
22
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
23
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
24
24
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
25
  Classifier: Topic :: Utilities
26
26
  Classifier: Typing :: Typed
27
+ Requires-Python: >=3.12
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Requires-Dist: immutables<1,>=0.21
31
+ Requires-Dist: orjson<4,>=3
32
+ Requires-Dist: polars<2,>=1
33
+ Requires-Dist: pyyaml<7,>=6
34
+ Requires-Dist: xmltodict<1,>=0.13
27
35
  Provides-Extra: all
36
+ Requires-Dist: cbrkit[api,cli,eval,graphs,llm,nlp,timeseries,transformers]; extra == "all"
37
+ Requires-Dist: numpy<2,>=1; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "all"
38
+ Requires-Dist: numpy<3,>=2; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "all"
39
+ Requires-Dist: numpy<3,>=2; sys_platform == "linux" and extra == "all"
40
+ Requires-Dist: pandas<3,>=2; extra == "all"
41
+ Requires-Dist: pydantic<3,>=2; extra == "all"
42
+ Requires-Dist: scipy<2,>=1; extra == "all"
28
43
  Provides-Extra: api
44
+ Requires-Dist: cbrkit[cli]; extra == "api"
45
+ Requires-Dist: fastapi<1,>=0.100; extra == "api"
46
+ Requires-Dist: pydantic-settings<3,>=2; extra == "api"
47
+ Requires-Dist: uvicorn[standard]<1,>=0.30; extra == "api"
29
48
  Provides-Extra: cli
49
+ Requires-Dist: rich<14,>=13; extra == "cli"
50
+ Requires-Dist: typer<1,>=0.9; extra == "cli"
30
51
  Provides-Extra: eval
52
+ Requires-Dist: ranx<1,>=0.3; extra == "eval"
53
+ Provides-Extra: graphs
54
+ Requires-Dist: networkx<4,>=3; extra == "graphs"
55
+ Requires-Dist: rustworkx<1,>=0.15; extra == "graphs"
31
56
  Provides-Extra: llm
57
+ Requires-Dist: cohere<6,>=5; extra == "llm"
58
+ Requires-Dist: ollama<1,>=0.3; extra == "llm"
59
+ Requires-Dist: openai<2,>=1; extra == "llm"
32
60
  Provides-Extra: nlp
61
+ Requires-Dist: levenshtein<0.26,>=0.23; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "nlp"
62
+ Requires-Dist: levenshtein<1,>=0.26; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "nlp"
63
+ Requires-Dist: levenshtein<1,>=0.26; sys_platform == "linux" and extra == "nlp"
64
+ Requires-Dist: nltk<4,>=3; extra == "nlp"
65
+ Requires-Dist: spacy<3.8,>=3.7; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "nlp"
66
+ Requires-Dist: spacy<4,>=3.8; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "nlp"
67
+ Requires-Dist: spacy<4,>=3.8; sys_platform == "linux" and extra == "nlp"
33
68
  Provides-Extra: timeseries
69
+ Requires-Dist: minineedle<4,>=3; extra == "timeseries"
70
+ Requires-Dist: dtaidistance<3,>=2; extra == "timeseries"
34
71
  Provides-Extra: transformers
35
- Requires-Dist: cohere (>=5,<6) ; extra == "all" or extra == "llm"
36
- Requires-Dist: dtaidistance (>=2.3,<3.0) ; extra == "all" or extra == "timeseries"
37
- Requires-Dist: fastapi[all] (>=0.100,<1.0) ; extra == "all" or extra == "api"
38
- Requires-Dist: immutables (>=0.21,<1.0)
39
- Requires-Dist: levenshtein (>=0.23,<0.26) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "all" or extra == "nlp")
40
- Requires-Dist: levenshtein (>=0.23,<1) ; (sys_platform == "darwin" and platform_machine == "arm64") and (extra == "all" or extra == "nlp")
41
- Requires-Dist: levenshtein (>=0.23,<1) ; (sys_platform == "linux") and (extra == "all" or extra == "nlp")
42
- Requires-Dist: minineedle (>=3.1,<4.0) ; extra == "all" or extra == "timeseries"
43
- Requires-Dist: networkx (>=3.0,<4.0) ; extra == "all"
44
- Requires-Dist: nltk (>=3.8,<4.0) ; extra == "all" or extra == "nlp"
45
- Requires-Dist: numpy (>=1.26,<3.0) ; extra == "all"
46
- Requires-Dist: ollama (>=0.3,<1) ; extra == "all" or extra == "llm"
47
- Requires-Dist: openai (>=1.50,<2.0) ; extra == "all" or extra == "llm"
48
- Requires-Dist: orjson (>=3.9,<4.0)
49
- Requires-Dist: pandas (>=2.1,<3.0)
50
- Requires-Dist: polars (>=1.5,<2.0) ; extra == "all"
51
- Requires-Dist: pyarrow (>=13.0)
52
- Requires-Dist: pydantic (>=2.0,<3.0)
53
- Requires-Dist: pyyaml (>=6.0,<7.0)
54
- Requires-Dist: ranx (>=0.3,<1.0) ; extra == "all" or extra == "eval"
55
- Requires-Dist: rich (>=13.7,<14.0) ; extra == "all" or extra == "api" or extra == "cli"
56
- Requires-Dist: rustworkx (>=0.15,<1.0)
57
- Requires-Dist: scipy (>=1.12,<2.0) ; extra == "all"
58
- Requires-Dist: sentence-transformers (>=2.2,<4.0) ; extra == "all" or extra == "transformers"
59
- Requires-Dist: spacy (>=3.8,<4.0) ; extra == "all" or extra == "nlp"
60
- Requires-Dist: torch (>=2.2,<2.3) ; (sys_platform == "darwin" and platform_machine == "x86_64") and (extra == "all" or extra == "transformers")
61
- Requires-Dist: torch (>=2.2,<3.0) ; (sys_platform == "darwin" and platform_machine == "arm64") and (extra == "all" or extra == "transformers")
62
- Requires-Dist: torch (>=2.2,<3.0) ; (sys_platform == "linux") and (extra == "all" or extra == "transformers")
63
- Requires-Dist: transformers (>=4.35,<5.0) ; extra == "all" or extra == "transformers"
64
- Requires-Dist: typer (>=0.9,<1.0) ; extra == "all" or extra == "api" or extra == "cli"
65
- Requires-Dist: uvicorn[standard] (>=0.24,<1.0) ; extra == "all" or extra == "api"
66
- Requires-Dist: xmltodict (>=0.13,<1.0)
67
- Project-URL: Repository, https://github.com/wi2trier/cbrkit
68
- Description-Content-Type: text/markdown
72
+ Requires-Dist: sentence-transformers<4,>=3; extra == "transformers"
73
+ Requires-Dist: torch<2.3,>=2.2; (sys_platform == "darwin" and platform_machine == "x86_64") and extra == "transformers"
74
+ Requires-Dist: torch<3,>=2.5; (sys_platform == "darwin" and platform_machine == "arm64") and extra == "transformers"
75
+ Requires-Dist: torch<3,>=2.5; sys_platform == "linux" and extra == "transformers"
76
+ Requires-Dist: transformers<5,>=4; extra == "transformers"
69
77
 
70
78
  <!-- markdownlint-disable MD033 MD041 -->
71
79
  <h2><p align="center">CBRkit</p></h2>
@@ -109,6 +117,8 @@ The following modules are part of CBRkit:
109
117
  - `cbrkit.loaders`: Functions for loading cases and queries.
110
118
  - `cbrkit.sim`: Similarity generator functions for common data types like strings and numbers.
111
119
  - `cbrkit.retrieval`: Functions for defining and applying retrieval pipelines.
120
+ - `cbrkit.adapt`: Adaptation generator functions for adapting cases based on a query.
121
+ - `cbrkit.reuse`: Functions for defining and applying reuse pipelines.
112
122
  - `cbrkit.typing`: Generic type definitions for defining custom functions.
113
123
 
114
124
  ## Installation
@@ -127,25 +137,28 @@ pip install cbrkit[EXTRA_NAME,...]
127
137
 
128
138
  where `EXTRA_NAME` is one of the following:
129
139
 
140
+ - `all`: All optional dependencies
141
+ - `api`: REST API Server
142
+ - `cli`: Command Line Interface (CLI)
143
+ - `eval`: Evaluation tools for common metrics like `precision` and `recall`
144
+ - `llm`: Large Language Models (LLM) APIs like Ollama and OpenAI
130
145
  - `nlp`: Standalone NLP tools `levenshtein`, `nltk`, `openai`, and `spacy`
146
+ - `timeseries`: Time series similarity measures like `dtw` and `smith_waterman`
131
147
  - `transformers`: Advanced NLP tools based on `pytorch` and `transformers`
132
- - `cli`: Command Line Interface (CLI)
133
- - `api`: REST API Server
134
- - `all`: All of the above
135
148
 
136
149
  ## Loading Cases
137
150
 
138
151
  The first step is to load cases and queries.
139
152
  We provide predefined functions for the most common formats like CSV, JSON, and XML.
140
- Additionally, CBRkit also integrates with `pandas` for loading data frames.
141
- The following example shows how to load cases and queries from a CSV file using `pandas`:
153
+ Additionally, CBRkit also integrates with `polars` and `pandas` for loading data frames.
154
+ The following example shows how to load cases and queries from a CSV file using `polars`:
142
155
 
143
156
  ```python
144
- import pandas as pd
157
+ import polars as pl
145
158
  import cbrkit
146
159
 
147
- df = pd.read_csv("path/to/cases.csv")
148
- casebase = cbrkit.loaders.pandas(df)
160
+ df = pl.read_csv("path/to/cases.csv")
161
+ casebase = cbrkit.loaders.polars(df)
149
162
  ```
150
163
 
151
164
  When dealing with formats like JSON, the files can be loaded directly:
@@ -160,17 +173,14 @@ CBRkit expects the type of the queries to match the type of the cases.
160
173
  You may define a single query directly in Python as follows
161
174
 
162
175
  ```python
163
- # for pandas
164
- query = pd.Series({"name": "John", "age": 25})
165
- # for json
166
176
  query = {"name": "John", "age": 25}
167
177
  ```
168
178
 
169
179
  If you have a collection of queries, you can load them using the same loader functions as for the cases.
170
180
 
171
181
  ```python
172
- # for pandas
173
- queries = cbrkit.loaders.pandas(pd.read_csv("path/to/queries.csv"))
182
+ # for polars
183
+ queries = cbrkit.loaders.polars(pl.read_csv("path/to/queries.csv"))
174
184
  # for json
175
185
  queries = cbrkit.loaders.json("path/to/queries.json")
176
186
  ```
@@ -294,8 +304,6 @@ Our result has the following attributes:
294
304
  - `ranking` A list of case indices sorted by their similarity score.
295
305
  - `casebase` The casebase containing only the retrieved cases (useful for downstream tasks).
296
306
 
297
- ## Combining Multiple Retrieval Pipelines
298
-
299
307
  In some cases, it is useful to combine multiple retrieval pipelines, for example when applying the MAC/FAC pattern where a cheap pre-filter is applied to the whole casebase before a more expensive similarity measure is applied on the remaining cases.
300
308
  To use this pattern, first create the corresponding retrievers using the builder:
301
309
 
@@ -318,6 +326,18 @@ The result has the following two attributes:
318
326
  Both `final` and each entry in `steps` have the same attributes as discussed previously.
319
327
  The returned result also has these entries which are an alias for the corresponding entries in `final` (i.e., `result.ranking == result.final.ranking`).
320
328
 
329
+ ## Adaptation Functions
330
+
331
+ Coming soon...
332
+
333
+ ## Reuse
334
+
335
+ Coming soon...
336
+
337
+ ## Evaluation
338
+
339
+ Coming soon...
340
+
321
341
  ## REST API and CLI
322
342
 
323
343
  In order to use the built-in API and CLI, you need to define a retriever in a Python module using the function `cbrkit.retrieval.build()`.
@@ -368,4 +388,3 @@ It offers a single endpoint `/retrieve` that accepts POST requests with a JSON b
368
388
  ```
369
389
 
370
390
  The server will return a JSON object containing the retrieval results for each query.
371
-
@@ -40,6 +40,8 @@ The following modules are part of CBRkit:
40
40
  - `cbrkit.loaders`: Functions for loading cases and queries.
41
41
  - `cbrkit.sim`: Similarity generator functions for common data types like strings and numbers.
42
42
  - `cbrkit.retrieval`: Functions for defining and applying retrieval pipelines.
43
+ - `cbrkit.adapt`: Adaptation generator functions for adapting cases based on a query.
44
+ - `cbrkit.reuse`: Functions for defining and applying reuse pipelines.
43
45
  - `cbrkit.typing`: Generic type definitions for defining custom functions.
44
46
 
45
47
  ## Installation
@@ -58,25 +60,28 @@ pip install cbrkit[EXTRA_NAME,...]
58
60
 
59
61
  where `EXTRA_NAME` is one of the following:
60
62
 
63
+ - `all`: All optional dependencies
64
+ - `api`: REST API Server
65
+ - `cli`: Command Line Interface (CLI)
66
+ - `eval`: Evaluation tools for common metrics like `precision` and `recall`
67
+ - `llm`: Large Language Models (LLM) APIs like Ollama and OpenAI
61
68
  - `nlp`: Standalone NLP tools `levenshtein`, `nltk`, `openai`, and `spacy`
69
+ - `timeseries`: Time series similarity measures like `dtw` and `smith_waterman`
62
70
  - `transformers`: Advanced NLP tools based on `pytorch` and `transformers`
63
- - `cli`: Command Line Interface (CLI)
64
- - `api`: REST API Server
65
- - `all`: All of the above
66
71
 
67
72
  ## Loading Cases
68
73
 
69
74
  The first step is to load cases and queries.
70
75
  We provide predefined functions for the most common formats like CSV, JSON, and XML.
71
- Additionally, CBRkit also integrates with `pandas` for loading data frames.
72
- The following example shows how to load cases and queries from a CSV file using `pandas`:
76
+ Additionally, CBRkit also integrates with `polars` and `pandas` for loading data frames.
77
+ The following example shows how to load cases and queries from a CSV file using `polars`:
73
78
 
74
79
  ```python
75
- import pandas as pd
80
+ import polars as pl
76
81
  import cbrkit
77
82
 
78
- df = pd.read_csv("path/to/cases.csv")
79
- casebase = cbrkit.loaders.pandas(df)
83
+ df = pl.read_csv("path/to/cases.csv")
84
+ casebase = cbrkit.loaders.polars(df)
80
85
  ```
81
86
 
82
87
  When dealing with formats like JSON, the files can be loaded directly:
@@ -91,17 +96,14 @@ CBRkit expects the type of the queries to match the type of the cases.
91
96
  You may define a single query directly in Python as follows
92
97
 
93
98
  ```python
94
- # for pandas
95
- query = pd.Series({"name": "John", "age": 25})
96
- # for json
97
99
  query = {"name": "John", "age": 25}
98
100
  ```
99
101
 
100
102
  If you have a collection of queries, you can load them using the same loader functions as for the cases.
101
103
 
102
104
  ```python
103
- # for pandas
104
- queries = cbrkit.loaders.pandas(pd.read_csv("path/to/queries.csv"))
105
+ # for polars
106
+ queries = cbrkit.loaders.polars(pl.read_csv("path/to/queries.csv"))
105
107
  # for json
106
108
  queries = cbrkit.loaders.json("path/to/queries.json")
107
109
  ```
@@ -225,8 +227,6 @@ Our result has the following attributes:
225
227
  - `ranking` A list of case indices sorted by their similarity score.
226
228
  - `casebase` The casebase containing only the retrieved cases (useful for downstream tasks).
227
229
 
228
- ## Combining Multiple Retrieval Pipelines
229
-
230
230
  In some cases, it is useful to combine multiple retrieval pipelines, for example when applying the MAC/FAC pattern where a cheap pre-filter is applied to the whole casebase before a more expensive similarity measure is applied on the remaining cases.
231
231
  To use this pattern, first create the corresponding retrievers using the builder:
232
232
 
@@ -249,6 +249,18 @@ The result has the following two attributes:
249
249
  Both `final` and each entry in `steps` have the same attributes as discussed previously.
250
250
  The returned result also has these entries which are an alias for the corresponding entries in `final` (i.e., `result.ranking == result.final.ranking`).
251
251
 
252
+ ## Adaptation Functions
253
+
254
+ Coming soon...
255
+
256
+ ## Reuse
257
+
258
+ Coming soon...
259
+
260
+ ## Evaluation
261
+
262
+ Coming soon...
263
+
252
264
  ## REST API and CLI
253
265
 
254
266
  In order to use the built-in API and CLI, you need to define a retriever in a Python module using the function `cbrkit.retrieval.build()`.
@@ -0,0 +1,111 @@
1
+ [project]
2
+ name = "cbrkit"
3
+ version = "0.20.0"
4
+ description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI"
5
+ authors = [{ name = "Mirko Lenz", email = "mirko@mirkolenz.com" }]
6
+ readme = "README.md"
7
+ keywords = [
8
+ "cbr",
9
+ "case-based reasoning",
10
+ "api",
11
+ "similarity",
12
+ "nlp",
13
+ "retrieval",
14
+ "cli",
15
+ "tool",
16
+ "library",
17
+ ]
18
+ classifiers = [
19
+ "Development Status :: 4 - Beta",
20
+ "Environment :: Console",
21
+ "Framework :: Pytest",
22
+ "Intended Audience :: Developers",
23
+ "Intended Audience :: Science/Research",
24
+ "License :: OSI Approved :: MIT License",
25
+ "Natural Language :: English",
26
+ "Operating System :: OS Independent",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Programming Language :: Python :: 3.13",
29
+ "Programming Language :: Python :: 3",
30
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
31
+ "Topic :: Scientific/Engineering :: Information Analysis",
32
+ "Topic :: Software Development :: Libraries :: Python Modules",
33
+ "Topic :: Utilities",
34
+ "Typing :: Typed",
35
+ ]
36
+ requires-python = ">=3.12"
37
+ dependencies = [
38
+ "immutables>=0.21,<1",
39
+ "orjson>=3,<4",
40
+ "polars>=1,<2",
41
+ "pyyaml>=6,<7",
42
+ "xmltodict>=0.13,<1",
43
+ ]
44
+
45
+ [project.optional-dependencies]
46
+ all = [
47
+ "cbrkit[api,cli,eval,graphs,llm,nlp,timeseries,transformers]",
48
+ "numpy>=1,<2; sys_platform == 'darwin' and platform_machine == 'x86_64'",
49
+ "numpy>=2,<3; sys_platform == 'darwin' and platform_machine == 'arm64'",
50
+ "numpy>=2,<3; sys_platform == 'linux'",
51
+ "pandas>=2,<3",
52
+ "pydantic>=2,<3",
53
+ "scipy>=1,<2",
54
+ ]
55
+ api = [
56
+ "cbrkit[cli]",
57
+ "fastapi>=0.100,<1",
58
+ "pydantic-settings>=2,<3",
59
+ "uvicorn[standard]>=0.30,<1",
60
+ ]
61
+ cli = ["rich>=13,<14", "typer>=0.9,<1"]
62
+ eval = ["ranx>=0.3,<1"]
63
+ graphs = ["networkx>=3,<4", "rustworkx>=0.15,<1"]
64
+ llm = ["cohere>=5,<6", "ollama>=0.3,<1", "openai>=1,<2"]
65
+ nlp = [
66
+ "levenshtein>=0.23,<0.26; sys_platform == 'darwin' and platform_machine == 'x86_64'",
67
+ "levenshtein>=0.26,<1; sys_platform == 'darwin' and platform_machine == 'arm64'",
68
+ "levenshtein>=0.26,<1; sys_platform == 'linux'",
69
+ "nltk>=3,<4",
70
+ "spacy>=3.7,<3.8; sys_platform == 'darwin' and platform_machine == 'x86_64'",
71
+ "spacy>=3.8,<4; sys_platform == 'darwin' and platform_machine == 'arm64'",
72
+ "spacy>=3.8,<4; sys_platform == 'linux'",
73
+ ]
74
+ timeseries = ["minineedle>=3,<4", "dtaidistance>=2,<3"]
75
+ transformers = [
76
+ "sentence-transformers>=3,<4",
77
+ "torch>=2.2,<2.3; sys_platform == 'darwin' and platform_machine == 'x86_64'",
78
+ "torch>=2.5,<3; sys_platform == 'darwin' and platform_machine == 'arm64'",
79
+ "torch>=2.5,<3; sys_platform == 'linux'",
80
+ "transformers>=4,<5",
81
+ ]
82
+
83
+ [project.urls]
84
+ Repository = "https://github.com/wi2trier/cbrkit"
85
+ Documentation = "https://wi2trier.github.io/cbrkit/"
86
+ Issues = "https://github.com/wi2trier/cbrkit/issues"
87
+ Changelog = "https://github.com/wi2trier/cbrkit/releases"
88
+
89
+ [project.scripts]
90
+ cbrkit = "cbrkit.cli:app"
91
+
92
+ [dependency-groups]
93
+ test = ["pytest>=8,<9", "pytest-cov>=6,<7"]
94
+ docs = ["pdoc>=15,<16"]
95
+
96
+ [build-system]
97
+ requires = ["setuptools>=61"]
98
+ build-backend = "setuptools.build_meta"
99
+
100
+ [tool.pytest.ini_options]
101
+ addopts = "--cov cbrkit --cov-report term-missing --doctest-modules --ignore data --ignore examples --ignore result"
102
+ doctest_optionflags = "NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS"
103
+
104
+ [tool.uv]
105
+ default-groups = ["test", "docs"]
106
+
107
+ [tool.ruff]
108
+ target-version = "py312"
109
+
110
+ [tool.ruff.lint.pydocstyle]
111
+ convention = "google"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -6,10 +6,12 @@
6
6
  """
7
7
 
8
8
  from . import (
9
+ adapt,
9
10
  eval,
10
11
  helpers,
11
12
  loaders,
12
13
  retrieval,
14
+ reuse,
13
15
  sim,
14
16
  typing,
15
17
  )
@@ -18,6 +20,8 @@ __all__ = [
18
20
  "loaders",
19
21
  "sim",
20
22
  "retrieval",
23
+ "adapt",
24
+ "reuse",
21
25
  "eval",
22
26
  "typing",
23
27
  "helpers",
@@ -0,0 +1,3 @@
1
+ from .cli import app
2
+
3
+ app()
@@ -0,0 +1,18 @@
1
+ """
2
+ CBRkit contains a selection of adaptation functions for different data types.
3
+ Besides functions for standard data types like
4
+ numbers (`cbrkit.adapt.numbers`),
5
+ strings (`cbrkit.adapt.strings`),
6
+ and generic data (`cbrkit.adapt.generic`),
7
+ there is also a function for attribute-value data.
8
+ """
9
+
10
+ from . import generic, numbers, strings
11
+ from ._attribute_value import attribute_value
12
+
13
+ __all__ = [
14
+ "generic",
15
+ "strings",
16
+ "numbers",
17
+ "attribute_value",
18
+ ]
@@ -0,0 +1,90 @@
1
+ from collections.abc import Callable, Mapping, Sequence
2
+ from dataclasses import dataclass
3
+ from typing import Any, override
4
+
5
+ from ..helpers import get_metadata
6
+ from ..typing import (
7
+ AdaptPairFunc,
8
+ JsonDict,
9
+ SupportsMetadata,
10
+ )
11
+
12
+ __all__ = ["attribute_value"]
13
+
14
+
15
+ def default_value_getter(obj: Any, key: Any) -> Any:
16
+ if hasattr(obj, "__getitem__"):
17
+ return obj[key]
18
+ else:
19
+ return getattr(obj, key)
20
+
21
+
22
+ def default_value_setter(obj: Any, key: Any, value: Any) -> None:
23
+ if hasattr(obj, "__setitem__"):
24
+ obj[key] = value
25
+ else:
26
+ setattr(obj, key, value)
27
+
28
+
29
+ @dataclass(slots=True, frozen=True)
30
+ class attribute_value[V](AdaptPairFunc[V], SupportsMetadata):
31
+ """Adapt values of attributes using specified adaptation functions.
32
+
33
+ This class allows for the adaptation of multiple attributes of a case by applying
34
+ one or more adaptation functions to each attribute. It supports different data structures
35
+ like mappings (dictionaries) and dataframes
36
+
37
+ Args:
38
+ attributes: A mapping of attribute names to either single adaptation functions or
39
+ sequences of adaptation functions that will be applied in order.
40
+ value_getter: Function to retrieve values from objects. Defaults to dictionary/attribute access.
41
+ value_setter: Function to set values on objects. Defaults to dictionary/attribute assignment.
42
+
43
+ Returns:
44
+ A new case with adapted attribute values.
45
+
46
+ Examples:
47
+ >>> func = attribute_value({
48
+ ... "name": lambda x, y: x if x == y else y,
49
+ ... "age": lambda x, y: x if x > y else y,
50
+ ... })
51
+ >>> result = func(
52
+ ... {"name": "Alice", "age": 30},
53
+ ... {"name": "Peter", "age": 25}
54
+ ... )
55
+ >>> result
56
+ {'name': 'Peter', 'age': 30}
57
+ """
58
+
59
+ attributes: Mapping[str, AdaptPairFunc[Any] | Sequence[AdaptPairFunc[Any]]]
60
+ value_getter: Callable[[Any, str], Any] = default_value_getter
61
+ value_setter: Callable[[Any, str, Any], None] = default_value_setter
62
+
63
+ @property
64
+ @override
65
+ def metadata(self) -> JsonDict:
66
+ return {
67
+ "attributes": {
68
+ key: get_metadata(value) for key, value in self.attributes.items()
69
+ },
70
+ "value_getter": get_metadata(self.value_getter),
71
+ "value_setter": get_metadata(self.value_setter),
72
+ }
73
+
74
+ @override
75
+ def __call__(self, case: V, query: V) -> V:
76
+ for attr_name in self.attributes:
77
+ adapt_funcs = self.attributes[attr_name]
78
+
79
+ if not isinstance(adapt_funcs, Sequence):
80
+ adapt_funcs = [adapt_funcs]
81
+
82
+ case_attr_value = self.value_getter(case, attr_name)
83
+ query_attr_value = self.value_getter(query, attr_name)
84
+
85
+ for adapt_func in adapt_funcs:
86
+ case_attr_value = adapt_func(case_attr_value, query_attr_value)
87
+
88
+ self.value_setter(case, attr_name, case_attr_value)
89
+
90
+ return case
@@ -0,0 +1,86 @@
1
+ from copy import deepcopy
2
+ from dataclasses import dataclass
3
+ from typing import Literal, override
4
+
5
+ from ..helpers import get_metadata
6
+ from ..typing import AdaptPairFunc, JsonDict, SupportsMetadata
7
+
8
+ __all__ = [
9
+ "pipe",
10
+ "null",
11
+ ]
12
+
13
+
14
+ @dataclass(slots=True, frozen=True)
15
+ class pipe[V](AdaptPairFunc[V], SupportsMetadata):
16
+ """Chain multiple adaptation functions together.
17
+
18
+ Args:
19
+ functions: List of adaptation functions to apply in order.
20
+
21
+ Returns:
22
+ The adapted value.
23
+
24
+ Examples:
25
+ >>> func = pipe([
26
+ ... lambda x, y: x + y,
27
+ ... lambda x, y: x * y,
28
+ ... ])
29
+ >>> func(2, 3)
30
+ 15
31
+ """
32
+
33
+ functions: list[AdaptPairFunc[V]]
34
+
35
+ @property
36
+ @override
37
+ def metadata(self) -> JsonDict:
38
+ return {
39
+ "functions": [get_metadata(func) for func in self.functions],
40
+ }
41
+
42
+ @override
43
+ def __call__(self, case: V, query: V) -> V:
44
+ current_case = case
45
+
46
+ for func in self.functions:
47
+ current_case = func(current_case, query)
48
+
49
+ return current_case
50
+
51
+
52
+ @dataclass(slots=True, frozen=True)
53
+ class null[V](AdaptPairFunc[V], SupportsMetadata):
54
+ """Perform a null adaptation and return the original case or query value.
55
+
56
+ Args:
57
+ select: Either "case" or "query".
58
+ copy: Whether to copy the value before returning it.
59
+
60
+ Returns:
61
+ The original case value.
62
+
63
+ Examples:
64
+ >>> func = null()
65
+ >>> func(2, 3)
66
+ 2
67
+ """
68
+
69
+ target: Literal["case", "query"] = "case"
70
+ copy: bool = False
71
+
72
+ @override
73
+ def __call__(self, case: V, query: V) -> V:
74
+ value: V
75
+
76
+ if self.target == "case":
77
+ value = case
78
+ elif self.target == "query":
79
+ value = query
80
+ else:
81
+ raise ValueError(f"Invalid target value: {self.target}")
82
+
83
+ if self.copy:
84
+ value = deepcopy(value)
85
+
86
+ return value