cbrkit 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {cbrkit-0.3.0 → cbrkit-0.3.2}/PKG-INFO +66 -18
  2. cbrkit-0.3.2/README.md +104 -0
  3. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/__init__.py +5 -3
  4. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/global_sim/_aggregate.py +15 -1
  5. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/global_sim/_attribute_value.py +14 -1
  6. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/global_sim/graph/_astar.py +13 -1
  7. cbrkit-0.3.0/cbrkit/sim/_helpers.py → cbrkit-0.3.2/cbrkit/helpers.py +56 -1
  8. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/loaders.py +136 -0
  9. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/retrieval.py +66 -1
  10. cbrkit-0.3.2/cbrkit/sim/__init__.py +9 -0
  11. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/sim/collections.py +10 -1
  12. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/sim/generic.py +27 -0
  13. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/sim/numeric.py +20 -1
  14. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/sim/strings.py +83 -1
  15. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/sim/taxonomy.py +18 -0
  16. {cbrkit-0.3.0 → cbrkit-0.3.2}/pyproject.toml +22 -28
  17. cbrkit-0.3.0/README.md +0 -57
  18. cbrkit-0.3.0/cbrkit/sim/__init__.py +0 -16
  19. {cbrkit-0.3.0 → cbrkit-0.3.2}/LICENSE +0 -0
  20. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/__main__.py +0 -0
  21. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/api.py +0 -0
  22. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/cli.py +0 -0
  23. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/global_sim/__init__.py +0 -0
  24. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/global_sim/graph/__init__.py +0 -0
  25. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/global_sim/graph/_model.py +0 -0
  26. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/py.typed +0 -0
  27. {cbrkit-0.3.0 → cbrkit-0.3.2}/cbrkit/typing.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cbrkit
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
5
5
  Home-page: https://wi2trier.github.io/cbrkit/
6
6
  License: MIT
@@ -28,20 +28,21 @@ Provides-Extra: all
28
28
  Provides-Extra: api
29
29
  Provides-Extra: cli
30
30
  Provides-Extra: nlp
31
- Requires-Dist: fastapi[all] (>=0.104,<0.105) ; extra == "all" or extra == "api"
32
- Requires-Dist: levenshtein (>=0.23,<0.24) ; extra == "all" or extra == "nlp"
31
+ Provides-Extra: transformers
32
+ Requires-Dist: fastapi[all] (>=0.100,<1.0) ; extra == "all" or extra == "api"
33
+ Requires-Dist: levenshtein (>=0.23,<1.0) ; extra == "all" or extra == "nlp"
33
34
  Requires-Dist: nltk (>=3.8,<4.0) ; extra == "all" or extra == "nlp"
34
- Requires-Dist: openai (>=1.3,<2.0) ; extra == "all" or extra == "nlp"
35
+ Requires-Dist: openai (>=1.5,<2.0) ; extra == "all" or extra == "nlp"
35
36
  Requires-Dist: orjson (>=3.9,<4.0)
36
37
  Requires-Dist: pandas (>=2.1,<3.0)
37
- Requires-Dist: pyarrow (>=14.0,<15.0)
38
+ Requires-Dist: pyarrow (>=13.0)
38
39
  Requires-Dist: pyyaml (>=6.0,<7.0)
39
- Requires-Dist: sentence-transformers (>=2.2,<3.0) ; extra == "all" or extra == "nlp"
40
- Requires-Dist: spacy (>=3.7,<4.0) ; extra == "all" or extra == "nlp"
41
- Requires-Dist: torch (>=2.1.1,<3.0.0) ; extra == "all" or extra == "nlp"
42
- Requires-Dist: transformers (>=4.36,<5.0) ; extra == "all" or extra == "nlp"
40
+ Requires-Dist: sentence-transformers (>=2.2,<3.0) ; extra == "all" or extra == "transformers"
41
+ Requires-Dist: spacy (>=3.7,<4.0) ; extra == "all" or extra == "all" or extra == "nlp"
42
+ Requires-Dist: torch (>=2.1.1,<3.0.0) ; extra == "all" or extra == "transformers"
43
+ Requires-Dist: transformers (>=4.35,<5.0) ; extra == "all" or extra == "transformers"
43
44
  Requires-Dist: typer[all] (>=0.9,<0.10) ; extra == "all" or extra == "cli"
44
- Requires-Dist: uvicorn[standard] (>=0.24,<0.25) ; extra == "all" or extra == "api"
45
+ Requires-Dist: uvicorn[standard] (>=0.24,<1.0) ; extra == "all" or extra == "api"
45
46
  Requires-Dist: xmltodict (>=0.13,<0.14)
46
47
  Project-URL: Repository, https://github.com/wi2trier/cbrkit
47
48
  Description-Content-Type: text/markdown
@@ -64,11 +65,6 @@ Description-Content-Type: text/markdown
64
65
 
65
66
  # CBRkit
66
67
 
67
- > [!caution]
68
- > The project is under active development and does not yet adhere to semantic versioning.
69
- > Breaking changes may occur at any time for versions `0.x.y`.
70
- > Once the project reaches version `1.0`, semantic versioning will be applied.
71
-
72
68
  ## Installation
73
69
 
74
70
  The library is available on [PyPI](https://pypi.org/project/cbrkit/), so you can install it with `pip`:
@@ -85,7 +81,8 @@ pip install cbrkit[EXTRA_NAME,...]
85
81
 
86
82
  where `EXTRA_NAME` is one of the following:
87
83
 
88
- - `nlp`: Natural Language Processing (NLP), including `spacy`, `openai`, and `sentence-transformers`
84
+ - `nlp`: Standalone NLP tools `levenshtein`, `nltk`, `openai`, and `spacy`
85
+ - `transformers`: NLP tools based on `pytorch` and `transformers`
89
86
  - `cli`: Command Line Interface (CLI)
90
87
  - `api`: REST API Server
91
88
  - `all`: All of the above
@@ -95,12 +92,63 @@ where `EXTRA_NAME` is one of the following:
95
92
  CBRkit allows the definition of similarity metrics through _composition_.
96
93
  This means that you can easily build even complex similarities by mixing built-in and/or custom measures.
97
94
  CBRkit also includes predefined aggregation functions.
98
- A working retrieval example can be found as part of our [testing suite](https://github.com/wi2trier/cbrkit/tree/main/tests/test_retrieve.py).
99
-
95
+ To get started, we provide a [demo project](https://github.com/wi2trier/cbrkit-demo) that shows how to use the library in a real-world scenario.
100
96
  The following modules are part of CBRkit:
101
97
 
98
+ - `loaders`: Functions for loading cases and queries.
102
99
  - `sim`: Similarity generator functions for various data types (e.g., strings, numbers).
103
100
  - `global_sim`: Similarity generator functions for aggregating the above ones.
104
101
  - `retrieval`: Functions for retrieving cases based on a query.
105
102
  - `typing`: Generic type definitions for defining custom functions.
106
103
 
104
+ CBRkit is fully typed, so IDEs like VSCode and PyCharm can provide autocompletion and type checking.
105
+ We will explain all modules and their basic usage in the following sections.
106
+
107
+ ### Loading Cases and Queries
108
+
109
+ The first step is to load cases and queries.
110
+ We provide predefined functions for the most common formats like CSV, JSON, and XML.
111
+ Additionally, `cbrkit` also integrates with `pandas` for loading data frames.
112
+ The following example shows how to load cases and queries from a CSV file using `pandas`:
113
+
114
+ ```python
115
+ import pandas as pd
116
+ import cbrkit
117
+
118
+ df = pd.read_csv("path/to/cases.csv")
119
+ cases = cbrkit.loaders.dataframe(df)
120
+ ```
121
+
122
+ When dealing with formats like JSON, the files can be loaded directly:
123
+
124
+ ```python
125
+ cases = cbrkit.loaders.json("path/to/cases.json")
126
+ ```
127
+
128
+ Queries can either be loaded using the same loader functions.
129
+ CBRkit expects the type of the queries to match the type of the cases.
130
+
131
+ ```python
132
+ # for pandas
133
+ queries = cbrkit.loaders.dataframe(pd.read_csv("path/to/queries.csv"))
134
+ # for json
135
+ queries = cbrkit.loaders.json("path/to/queries.json")
136
+ ```
137
+
138
+ In case your query collection only contains a single query, you can use the `singleton` function to extract it.
139
+
140
+ ```python
141
+ query = cbrkit.singleton(queries)
142
+ ```
143
+
144
+ Alternatively, you can also create a query directly in Python:
145
+
146
+ ```python
147
+ # for pandas
148
+ query = pd.Series({"name": "John", "age": 25})
149
+ # for json
150
+ query = {"name": "John", "age": 25}
151
+ ```
152
+
153
+ ### Similarity Measures and Aggregation
154
+
cbrkit-0.3.2/README.md ADDED
@@ -0,0 +1,104 @@
1
+ <!-- markdownlint-disable MD033 MD041 -->
2
+ <h2><p align="center">CBRkit</p></h2>
3
+ <p align="center">
4
+ <img width="256px" alt="cbrkit logo" src="https://raw.githubusercontent.com/wi2trier/cbrkit/main/assets/logo.png" />
5
+ </p>
6
+ <p align="center">
7
+ <a href="https://pypi.org/project/cbrkit/">PyPI</a> |
8
+ <a href="https://wi2trier.github.io/cbrkit/">Docs</a> |
9
+ <a href="https://github.com/wi2trier/cbrkit/tree/main/tests/test_retrieve.py">Example</a>
10
+ </p>
11
+ <p align="center">
12
+ Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
13
+ </p>
14
+
15
+ ---
16
+
17
+ # CBRkit
18
+
19
+ ## Installation
20
+
21
+ The library is available on [PyPI](https://pypi.org/project/cbrkit/), so you can install it with `pip`:
22
+
23
+ ```shell
24
+ pip install cbrkit
25
+ ```
26
+
27
+ It comes with several optional dependencies for certain tasks like NLP which can be installed with:
28
+
29
+ ```shell
30
+ pip install cbrkit[EXTRA_NAME,...]
31
+ ```
32
+
33
+ where `EXTRA_NAME` is one of the following:
34
+
35
+ - `nlp`: Standalone NLP tools `levenshtein`, `nltk`, `openai`, and `spacy`
36
+ - `transformers`: NLP tools based on `pytorch` and `transformers`
37
+ - `cli`: Command Line Interface (CLI)
38
+ - `api`: REST API Server
39
+ - `all`: All of the above
40
+
41
+ ## Usage
42
+
43
+ CBRkit allows the definition of similarity metrics through _composition_.
44
+ This means that you can easily build even complex similarities by mixing built-in and/or custom measures.
45
+ CBRkit also includes predefined aggregation functions.
46
+ To get started, we provide a [demo project](https://github.com/wi2trier/cbrkit-demo) that shows how to use the library in a real-world scenario.
47
+ The following modules are part of CBRkit:
48
+
49
+ - `loaders`: Functions for loading cases and queries.
50
+ - `sim`: Similarity generator functions for various data types (e.g., strings, numbers).
51
+ - `global_sim`: Similarity generator functions for aggregating the above ones.
52
+ - `retrieval`: Functions for retrieving cases based on a query.
53
+ - `typing`: Generic type definitions for defining custom functions.
54
+
55
+ CBRkit is fully typed, so IDEs like VSCode and PyCharm can provide autocompletion and type checking.
56
+ We will explain all modules and their basic usage in the following sections.
57
+
58
+ ### Loading Cases and Queries
59
+
60
+ The first step is to load cases and queries.
61
+ We provide predefined functions for the most common formats like CSV, JSON, and XML.
62
+ Additionally, `cbrkit` also integrates with `pandas` for loading data frames.
63
+ The following example shows how to load cases and queries from a CSV file using `pandas`:
64
+
65
+ ```python
66
+ import pandas as pd
67
+ import cbrkit
68
+
69
+ df = pd.read_csv("path/to/cases.csv")
70
+ cases = cbrkit.loaders.dataframe(df)
71
+ ```
72
+
73
+ When dealing with formats like JSON, the files can be loaded directly:
74
+
75
+ ```python
76
+ cases = cbrkit.loaders.json("path/to/cases.json")
77
+ ```
78
+
79
+ Queries can either be loaded using the same loader functions.
80
+ CBRkit expects the type of the queries to match the type of the cases.
81
+
82
+ ```python
83
+ # for pandas
84
+ queries = cbrkit.loaders.dataframe(pd.read_csv("path/to/queries.csv"))
85
+ # for json
86
+ queries = cbrkit.loaders.json("path/to/queries.json")
87
+ ```
88
+
89
+ In case your query collection only contains a single query, you can use the `singleton` function to extract it.
90
+
91
+ ```python
92
+ query = cbrkit.singleton(queries)
93
+ ```
94
+
95
+ Alternatively, you can also create a query directly in Python:
96
+
97
+ ```python
98
+ # for pandas
99
+ query = pd.Series({"name": "John", "age": 25})
100
+ # for json
101
+ query = {"name": "John", "age": 25}
102
+ ```
103
+
104
+ ### Similarity Measures and Aggregation
@@ -5,12 +5,14 @@
5
5
 
6
6
  """
7
7
 
8
- from . import global_sim, loaders, retrieval, sim, typing
9
8
 
10
- __all__ = (
9
+ from . import global_sim, helpers, loaders, retrieval, sim, typing
10
+
11
+ __all__ = [
11
12
  "loaders",
12
13
  "sim",
13
14
  "global_sim",
14
15
  "typing",
15
16
  "retrieval",
16
- )
17
+ "helpers",
18
+ ]
@@ -2,7 +2,7 @@ import statistics
2
2
  from collections.abc import Mapping, Sequence
3
3
  from typing import Literal
4
4
 
5
- from cbrkit.sim._helpers import unpack_sim
5
+ from cbrkit.helpers import unpack_sim
6
6
  from cbrkit.typing import (
7
7
  AggregatorFunc,
8
8
  AnyFloat,
@@ -51,6 +51,20 @@ def aggregator(
51
51
  pooling_weights: SimSeqOrMap[KeyType, float] | None = None,
52
52
  default_pooling_weight: float = 1.0,
53
53
  ) -> AggregatorFunc[KeyType, AnyFloat]:
54
+ """
55
+ Aggregates local similarities to a global similarity using the specified pooling function.
56
+
57
+ Args:
58
+ pooling: The pooling function to use. It can be either a string representing the name of the pooling function or a custom pooling function (see `cbrkit.typing.PoolingFunc`).
59
+ pooling_weights: The weights to apply to the similarities during pooling. It can be a sequence or a mapping. If None, every local similarity is weighted equally.
60
+ default_pooling_weight: The default weight to use if a similarity key is not found in the pooling_weights mapping.
61
+
62
+ Examples:
63
+ >>> global_sim = aggregator("mean")
64
+ >>> global_sim([0.5, 0.75, 1.0])
65
+ 0.75
66
+ """
67
+
54
68
  pooling_func = _pooling_funcs[pooling] if isinstance(pooling, str) else pooling
55
69
 
56
70
  def wrapped_func(similarities: SimSeqOrMap[KeyType, AnyFloat]) -> float:
@@ -5,7 +5,7 @@ from typing import Any, Generic
5
5
 
6
6
  import pandas as pd
7
7
 
8
- from cbrkit.sim import sim2map
8
+ from cbrkit.helpers import sim2map
9
9
  from cbrkit.typing import (
10
10
  AggregatorFunc,
11
11
  AnySimFunc,
@@ -60,6 +60,19 @@ def attribute_value(
60
60
  value_getter: Callable[[Any, str], Any] = _value_getter,
61
61
  key_getter: Callable[[Any], Iterator[str]] = _key_getter,
62
62
  ) -> SimMapFunc[Any, AttributeValueData, AttributeValueSim[SimType]]:
63
+ """
64
+ Similarity function that computes the attribute value similarity between two cases.
65
+
66
+ Args:
67
+ attributes: A mapping of attribute names to the similarity functions to be used for those attributes. Takes precedence over types.
68
+ types: A mapping of attribute types to the similarity functions to be used for those types.
69
+ types_fallback: A similarity function to be used as a fallback when no specific similarity function
70
+ is defined for an attribute type.
71
+ aggregator: A function that aggregates the local similarity scores for each attribute into a single global similarity.
72
+ value_getter: A function that retrieves the value of an attribute from a case.
73
+ key_getter: A function that retrieves the attribute names from a target case.
74
+ """
75
+
63
76
  attributes_map: Mapping[str, AnySimFunc[KeyType, Any, SimType]] = (
64
77
  {} if attributes is None else attributes
65
78
  )
@@ -16,7 +16,7 @@ from cbrkit.global_sim.graph._model import (
16
16
  NodeData,
17
17
  NodeKey,
18
18
  )
19
- from cbrkit.sim._helpers import unpack_sims
19
+ from cbrkit.helpers import unpack_sims
20
20
  from cbrkit.typing import Casebase, FloatProtocol, KeyType, SimPairFunc, SimType
21
21
 
22
22
  logger = logging.getLogger(__name__)
@@ -149,6 +149,18 @@ def astar(
149
149
  edge_sim_func: SimPairFunc[EdgeData, SimType],
150
150
  queue_limit: int,
151
151
  ) -> dict[KeyType, GraphSim[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]]:
152
+ """
153
+ Performs the A* algorithm proposed by [Bergmann and Gil (2014)](https://doi.org/10.1016/j.is.2012.07.005) to compute the similarity between a query graph and the graphs in the casebase.
154
+
155
+ Args:
156
+ x_map: A casebase of graphs
157
+ y: Query graph
158
+ node_sim_func: A similarity function for graph nodes
159
+ edge_sim_func: A similarity function for graph edges
160
+ queue_limit: Limits the queue size which prunes the search space. This leads to a faster search and less memory usage but also introduces a similarity error.
161
+
162
+ """
163
+
152
164
  results = {
153
165
  key: _astar_single(
154
166
  x,
@@ -1,5 +1,5 @@
1
1
  from abc import ABC
2
- from collections.abc import Iterable, Mapping, Sequence
2
+ from collections.abc import Collection, Iterable, Mapping, Sequence
3
3
  from inspect import signature as inspect_signature
4
4
  from typing import Any, cast
5
5
 
@@ -22,16 +22,71 @@ __all__ = [
22
22
  "unpack_sim",
23
23
  "unpack_sims",
24
24
  "AbstractFloat",
25
+ "singleton",
25
26
  ]
26
27
 
27
28
 
29
+ def singleton(x: Mapping[Any, ValueType] | Collection[ValueType]) -> ValueType:
30
+ """
31
+ Return the only element of the input, or raise an error if there are multiple elements.
32
+
33
+ Args:
34
+ x: The input collection or mapping.
35
+
36
+ Returns:
37
+ The only element of the input.
38
+
39
+ Examples:
40
+ >>> singleton([1])
41
+ 1
42
+ >>> singleton({1: "a"})
43
+ 'a'
44
+
45
+ Raises:
46
+ ValueError: If the input has more than one element.
47
+ TypeError: If the input is not a collection or mapping.
48
+ """
49
+ if len(x) != 1:
50
+ raise ValueError(f"Expected exactly one element, but got {len(x)}")
51
+
52
+ if isinstance(x, Mapping):
53
+ return next(iter(x.values()))
54
+ elif isinstance(x, Collection):
55
+ return next(iter(x))
56
+ else:
57
+ raise TypeError(f"Expected a Mapping or Collection, but got {type(x)}")
58
+
59
+
28
60
  def dist2sim(distance: float) -> float:
61
+ """Convert a distance to a similarity.
62
+
63
+ Args:
64
+ distance: The distance to convert
65
+
66
+ Examples:
67
+ >>> dist2sim(1.)
68
+ 0.5
69
+ """
29
70
  return 1 / (1 + distance)
30
71
 
31
72
 
32
73
  def sim2seq(
33
74
  func: SimPairFunc[ValueType, SimType] | SimSeqFunc[ValueType, SimType],
34
75
  ) -> SimSeqFunc[ValueType, SimType]:
76
+ """
77
+ Converts a similarity function that operates on pairs of values into a similarity function that operates on sequences of values.
78
+
79
+ Args:
80
+ func: The similarity function to be converted.
81
+
82
+ Examples:
83
+ >>> def sim_func(x: int, y: int) -> float:
84
+ ... return abs(x - y) / max(x, y)
85
+ ...
86
+ >>> seq_func = sim2seq(sim_func)
87
+ >>> seq_func([(1, 2), (3, 4), (5, 6)])
88
+ [0.5, 0.25, 0.16666666666666666]
89
+ """
35
90
  signature = inspect_signature(func)
36
91
 
37
92
  if len(signature.parameters) == 2:
@@ -76,10 +76,35 @@ class DataFrameCasebase(abc.Mapping):
76
76
 
77
77
 
78
78
  def dataframe(df: DataFrame) -> Casebase[Any, pd.Series]:
79
+ """Converts a pandas DataFrame into a Casebase.
80
+
81
+ Args:
82
+ df: pandas DataFrame.
83
+
84
+ Returns:
85
+ Returns a Casebase as a DataFrameCasebase.
86
+
87
+ Examples:
88
+ >>> file_path = "./data/cars-1k.csv"
89
+ >>> df = pd.read_csv(file_path)
90
+ >>> result = dataframe(df)
91
+ """
79
92
  return DataFrameCasebase(df)
80
93
 
81
94
 
82
95
  def csv(path: FilePath) -> dict[int, dict[str, str]]:
96
+ """Reads a csv file and converts it into a dict representation
97
+
98
+ Args:
99
+ path: File path of the csv file
100
+
101
+ Returns:
102
+ Dict representation of the csv file.
103
+
104
+ Examples:
105
+ >>> file_path = "./data/cars-1k.csv"
106
+ >>> result = csv(file_path)
107
+ """
83
108
  data: dict[int, dict[str, str]] = {}
84
109
 
85
110
  with open(path) as fp:
@@ -99,6 +124,18 @@ def _csv_pandas(path: FilePath) -> dict[int, pd.Series]:
99
124
 
100
125
 
101
126
  def json(path: FilePath) -> dict[Any, Any]:
127
+ """Reads a json file and converts it into a dict representation
128
+
129
+ Args:
130
+ path: File path of the json file
131
+
132
+ Returns:
133
+ Dict representation of the json file.
134
+
135
+ Examples:
136
+ >>> file_path = "data/cars-1k.json" # doctest: +SKIP
137
+ >>> json(file_path) # doctest: +SKIP
138
+ """
102
139
  with open(path, "rb") as fp:
103
140
  data = orjson.loads(fp.read())
104
141
 
@@ -111,11 +148,35 @@ def json(path: FilePath) -> dict[Any, Any]:
111
148
 
112
149
 
113
150
  def toml(path: FilePath) -> dict[str, Any]:
151
+ """Reads a toml file and parses it into a dict representation
152
+
153
+ Args:
154
+ path: File path of the toml file
155
+
156
+ Returns:
157
+ Dict representation of the toml file.
158
+
159
+ Examples:
160
+ >>> file_path = "./data/file.toml" # doctest: +SKIP
161
+ >>> toml(file_path) # doctest: +SKIP
162
+ """
114
163
  with open(path, "rb") as fp:
115
164
  return tomllib.load(fp)
116
165
 
117
166
 
118
167
  def yaml(path: FilePath) -> dict[Any, Any]:
168
+ """Reads a yaml file and parses it into a dict representation
169
+
170
+ Args:
171
+ path: File path of the yaml file
172
+
173
+ Returns:
174
+ Dict representation of the yaml file.
175
+
176
+ Examples:
177
+ >>> file_path = "./data/cars-1k.yaml"
178
+ >>> result = yaml(file_path)
179
+ """
119
180
  data: dict[Any, Any] = {}
120
181
 
121
182
  with open(path, "rb") as fp:
@@ -132,11 +193,35 @@ def yaml(path: FilePath) -> dict[Any, Any]:
132
193
 
133
194
 
134
195
  def txt(path: FilePath) -> str:
196
+ """Reads a text file and converts it into a string
197
+
198
+ Args:
199
+ path: File path of the text file
200
+
201
+ Returns:
202
+ String representation of the text file.
203
+
204
+ Examples:
205
+ >>> file_path = "data/file.txt" # doctest: +SKIP
206
+ >>> txt(file_path) # doctest: +SKIP
207
+ """
135
208
  with open(path) as fp:
136
209
  return fp.read()
137
210
 
138
211
 
139
212
  def xml(path: FilePath) -> dict[str, Any]:
213
+ """Reads a xml file and parses it into a dict representation
214
+
215
+ Args:
216
+ path: File path of the xml file
217
+
218
+ Returns:
219
+ Dict representation of the xml file.
220
+
221
+ Examples:
222
+ >>> file_path = "data/file.xml" # doctest: +SKIP
223
+ >>> result = xml(file_path) # doctest: +SKIP
224
+ """
140
225
  with open(path, "rb") as fp:
141
226
  data = xmltodict.parse(fp.read())
142
227
 
@@ -174,6 +259,18 @@ _single_loaders: dict[str, SingleLoader] = {
174
259
 
175
260
 
176
261
  def data(path: FilePath) -> dict[str, Any]:
262
+ """Reads files of types json, toml, yaml, and yml and parses it into a dict representation
263
+
264
+ Args:
265
+ path: Path of the file
266
+
267
+ Returns:
268
+ Dict representation of the file.
269
+
270
+ Examples:
271
+ >>> yaml_file = "./data/cars-1k.yaml"
272
+ >>> result = data(yaml_file)
273
+ """
177
274
  if isinstance(path, str):
178
275
  path = Path(path)
179
276
 
@@ -185,6 +282,18 @@ def data(path: FilePath) -> dict[str, Any]:
185
282
 
186
283
 
187
284
  def path(path: FilePath, pattern: str | None = None) -> Casebase[Any, Any]:
285
+ """Converts a path into a Casebase. The path can be a folder or a file.
286
+
287
+ Args:
288
+ path: Path of the file.
289
+
290
+ Returns:
291
+ Returns a Casebase.
292
+
293
+ Examples:
294
+ >>> file_path = "./data/cars-1k.csv"
295
+ >>> result = path(file_path)
296
+ """
188
297
  if isinstance(path, str):
189
298
  path = Path(path)
190
299
 
@@ -204,6 +313,19 @@ def path(path: FilePath, pattern: str | None = None) -> Casebase[Any, Any]:
204
313
 
205
314
 
206
315
  def file(path: Path) -> Casebase[Any, Any] | None:
316
+ """Converts a file into a Casebase. The file can be of type csv, json, toml, yaml, or yml.
317
+
318
+ Args:
319
+ path: Path of the file.
320
+
321
+ Returns:
322
+ Returns a Casebase.
323
+
324
+ Examples:
325
+ >>> from pathlib import Path
326
+ >>> file_path = Path("./data/cars-1k.csv")
327
+ >>> result = file(file_path)
328
+ """
207
329
  if path.suffix not in _batch_loaders:
208
330
  return None
209
331
 
@@ -214,6 +336,20 @@ def file(path: Path) -> Casebase[Any, Any] | None:
214
336
 
215
337
 
216
338
  def folder(path: Path, pattern: str) -> Casebase[Any, Any] | None:
339
+ """Converts the files of a folder into a Casebase. The files can be of type txt, csv, json, toml, yaml, or yml.
340
+
341
+ Args:
342
+ path: Path of the folder.
343
+ pattern: Relative pattern for the files.
344
+
345
+ Returns:
346
+ Returns a Casebase.
347
+
348
+ Examples:
349
+ >>> from pathlib import Path
350
+ >>> folder_path = Path("./data")
351
+ >>> result = folder(folder_path, ".csv")
352
+ """
217
353
  cb: Casebase[Any, Any] = {}
218
354
 
219
355
  for file in path.glob(pattern):
@@ -2,8 +2,8 @@ from collections.abc import Callable, Collection, Mapping, Sequence
2
2
  from dataclasses import dataclass
3
3
  from typing import Any, Generic
4
4
 
5
+ from cbrkit.helpers import sim2map, unpack_sim
5
6
  from cbrkit.loaders import python as load_python
6
- from cbrkit.sim._helpers import sim2map, unpack_sim
7
7
  from cbrkit.typing import (
8
8
  AnySimFunc,
9
9
  Casebase,
@@ -78,6 +78,40 @@ def apply(
78
78
  retrievers: RetrieveFunc[KeyType, ValueType, SimType]
79
79
  | Sequence[RetrieveFunc[KeyType, ValueType, SimType]],
80
80
  ) -> Result[KeyType, ValueType, SimType]:
81
+ """Applies a query to a Casebase using retriever functions.
82
+
83
+ Args:
84
+ casebase: The casebase for the query.
85
+ query: The query that will be applied to the casebase
86
+ retrievers: Retriever functions that will retrieve similar cases (compared to the query) from the casebase
87
+
88
+ Returns:
89
+ Returns an object of type Result.
90
+
91
+ Examples:
92
+ >>> import cbrkit
93
+ >>> import pandas as pd
94
+ >>> df = pd.read_csv("./data/cars-1k.csv")
95
+ >>> casebase = cbrkit.loaders.dataframe(df)
96
+ >>> query = casebase[42]
97
+ >>> retriever = cbrkit.retrieval.build(
98
+ ... cbrkit.global_sim.attribute_value(
99
+ ... attributes={
100
+ ... "price": cbrkit.sim.numeric.linear(max=100000),
101
+ ... "year": cbrkit.sim.numeric.linear(max=50),
102
+ ... "manufacturer": cbrkit.sim.taxonomy.load(
103
+ ... "./data/cars-taxonomy.yaml",
104
+ ... measure=cbrkit.sim.taxonomy.wu_palmer(),
105
+ ... ),
106
+ ... "miles": cbrkit.sim.numeric.linear(max=1000000),
107
+ ... },
108
+ ... types_fallback=cbrkit.sim.generic.equality(),
109
+ ... aggregator=cbrkit.global_sim.aggregator(pooling="mean"),
110
+ ... ),
111
+ ... limit=5,
112
+ ... )
113
+ >>> result = cbrkit.retrieval.apply(casebase, query, retriever)
114
+ """
81
115
  if not isinstance(retrievers, Sequence):
82
116
  retrievers = [retrievers]
83
117
 
@@ -99,6 +133,37 @@ def build(
99
133
  similarity_func: AnySimFunc[KeyType, ValueType, SimType],
100
134
  limit: int | None = None,
101
135
  ) -> RetrieveFunc[KeyType, ValueType, SimType]:
136
+ """Based on the similarity function this function creates a retriever function.
137
+
138
+ Args:
139
+ similarity_func: Similarity function to compute the similarity between cases.
140
+ limit: Retriever function will return the top limit cases.
141
+
142
+ Returns:
143
+ Returns the retriever function.
144
+
145
+ Examples:
146
+ >>> import cbrkit
147
+ >>> retriever = cbrkit.retrieval.build(
148
+ ... cbrkit.global_sim.attribute_value(
149
+ ... attributes={
150
+ ... "price": cbrkit.sim.numeric.linear(max=100000),
151
+ ... "year": cbrkit.sim.numeric.linear(max=50),
152
+ ... "model": cbrkit.global_sim.attribute_value(
153
+ ... attributes={
154
+ ... "make": cbrkit.sim.generic.equality(),
155
+ ... "manufacturer": cbrkit.sim.taxonomy.load(
156
+ ... "./data/cars-taxonomy.yaml",
157
+ ... measure=cbrkit.sim.taxonomy.wu_palmer(),
158
+ ... ),
159
+ ... }
160
+ ... ),
161
+ ... },
162
+ ... aggregator=cbrkit.global_sim.aggregator(pooling="mean"),
163
+ ... ),
164
+ ... limit=5,
165
+ ... )
166
+ """
102
167
  sim_func = sim2map(similarity_func)
103
168
 
104
169
  def wrapped_func(
@@ -0,0 +1,9 @@
1
+ from . import collections, generic, numeric, strings, taxonomy
2
+
3
+ __all__ = [
4
+ "collections",
5
+ "generic",
6
+ "numeric",
7
+ "strings",
8
+ "taxonomy",
9
+ ]
@@ -1,11 +1,20 @@
1
1
  from collections.abc import Collection, Set
2
2
  from typing import Any
3
3
 
4
- from cbrkit.sim._helpers import dist2sim
4
+ from cbrkit.helpers import dist2sim
5
5
  from cbrkit.typing import SimPairFunc
6
6
 
7
+ __all__ = ["jaccard"]
8
+
7
9
 
8
10
  def jaccard() -> SimPairFunc[Collection[Any], float]:
11
+ """Jaccard similarity function.
12
+
13
+ Examples:
14
+ >>> sim = jaccard()
15
+ >>> sim(["a", "b", "c", "d"], ["a", "b", "c"])
16
+ 0.8
17
+ """
9
18
  from nltk.metrics import jaccard_distance
10
19
 
11
20
  def wrapped_func(x: Collection[Any], y: Collection[Any]) -> float:
@@ -7,12 +7,29 @@ from cbrkit.typing import (
7
7
  ValueType,
8
8
  )
9
9
 
10
+ __all__ = ["table", "equality"]
11
+
10
12
 
11
13
  def table(
12
14
  entries: Sequence[tuple[ValueType, ValueType, float]],
13
15
  symmetric: bool = True,
14
16
  default: float = 0.0,
15
17
  ) -> SimPairFunc[ValueType, float]:
18
+ """Allows to import a similarity values from a table.
19
+
20
+ Args:
21
+ entries: Sequence[tuple[a, b, sim(a, b)]
22
+ symmetric: If True, the table is assumed to be symmetric, i.e. sim(a, b) = sim(b, a)
23
+ default: Default similarity value for pairs not in the table
24
+
25
+ Examples:
26
+ >>> sim = table([("a", "b", 0.5), ("b", "c", 0.7)], symmetric=True, default=0.0)
27
+ >>> sim("b", "a")
28
+ 0.5
29
+ >>> sim("a", "c")
30
+ 0.0
31
+ """
32
+
16
33
  table: defaultdict[ValueType, defaultdict[ValueType, float]] = defaultdict(
17
34
  lambda: defaultdict(lambda: default)
18
35
  )
@@ -30,6 +47,16 @@ def table(
30
47
 
31
48
 
32
49
  def equality() -> SimPairFunc[Any, float]:
50
+ """Equality similarity function. Returns 1.0 if the two values are equal, 0.0 otherwise.
51
+
52
+ Examples:
53
+ >>> sim = equality()
54
+ >>> sim("b", "a")
55
+ 0.0
56
+ >>> sim("a", "a")
57
+ 1.0
58
+ """
59
+
33
60
  def wrapped_func(x: Any, y: Any) -> float:
34
61
  return 1.0 if x == y else 0.0
35
62
 
@@ -15,6 +15,9 @@ def linear(max: float, min: float = 0.0) -> SimPairFunc[Number, float]:
15
15
  min: Minimum bound of the interval
16
16
 
17
17
  ![linear](../../assets/numeric/linear.png)
18
+ >>> sim = linear(100)
19
+ >>> sim(50, 60)
20
+ 0.9
18
21
  """
19
22
 
20
23
  def wrapped_func(x: Number, y: Number) -> float:
@@ -37,6 +40,12 @@ def threshold(threshold: float) -> SimPairFunc[Number, float]:
37
40
  threshold: If the absolute difference between the two values is less than or equal to this value, the similarity is 1.0, otherwise it is 0.0
38
41
 
39
42
  ![threshold](../../assets/numeric/threshold.png)
43
+ Examples:
44
+ >>> sim = threshold(10)
45
+ >>> sim(50, 60)
46
+ 1.0
47
+ >>> sim(50, 61)
48
+ 0.0
40
49
  """
41
50
 
42
51
  def wrapped_func(x: Number, y: Number) -> float:
@@ -49,9 +58,13 @@ def exponential(alpha: float = 1.0) -> SimPairFunc[Number, float]:
49
58
  """Exponential similarity function.
50
59
 
51
60
  Args:
52
- alpha: Controls the growth of the exponential function for the similarity. The larger alpha is, the faster the function grows.
61
+ alpha: Controls the growth of the exponential function for the similarity. The larger alpha is, the faster the similarity decreases.
53
62
 
54
63
  ![exponential](../../assets/numeric/exponential.png)
64
+ Examples:
65
+ >>> sim = exponential(0.1)
66
+ >>> sim(50, 60)
67
+ 0.36787944117144233
55
68
  """
56
69
 
57
70
  def wrapped_func(x: Number, y: Number) -> float:
@@ -68,6 +81,12 @@ def sigmoid(alpha: float = 1.0, theta: float = 1.0) -> SimPairFunc[Number, float
68
81
  theta: Specifies the point at which the similarity value is 0.5.
69
82
 
70
83
  ![sigmoid](../../assets/numeric/sigmoid.png)
84
+ Examples:
85
+ >>> sim = sigmoid(1, 10)
86
+ >>> sim(50, 60)
87
+ 0.5
88
+ >>> sim(50, 58)
89
+ 0.8807970779778823
71
90
  """
72
91
 
73
92
  def wrapped_func(x: Number, y: Number) -> float:
@@ -12,8 +12,24 @@ from cbrkit.typing import (
12
12
  SimSeqFunc,
13
13
  )
14
14
 
15
+ __all__ = [
16
+ "spacy",
17
+ "sentence_transformers",
18
+ "openai",
19
+ "levenshtein",
20
+ "jaro",
21
+ "jaro_winkler",
22
+ "table",
23
+ ]
24
+
15
25
 
16
26
  def _cosine(u, v) -> float:
27
+ """Cosine similarity between two vectors
28
+
29
+ Args:
30
+ u: First vector
31
+ v: Second vector
32
+ """
17
33
  import numpy as np
18
34
  import scipy.spatial.distance as scipy_dist
19
35
 
@@ -28,6 +44,11 @@ def _unique_items(pairs: Sequence[tuple[str, str]]) -> list[str]:
28
44
 
29
45
 
30
46
  def spacy(model_name: str = "en_core_web_lg") -> SimSeqFunc[str, float]:
47
+ """[spaCy](https://spacy.io/usage/linguistic-features/#vectors-similarity) based semantic similarity using word vectors. It calculates the similarity between given text pairs.
48
+
49
+ Args:
50
+ model_name: Name of the [spaCy model](https://spacy.io/usage/models) to use to generate word vectors. Defaults to "en_core_web_lg".
51
+ """
31
52
  from spacy import load as spacy_load
32
53
 
33
54
  nlp = spacy_load(model_name)
@@ -46,6 +67,11 @@ def spacy(model_name: str = "en_core_web_lg") -> SimSeqFunc[str, float]:
46
67
 
47
68
 
48
69
  def sentence_transformers(model_name: str) -> SimSeqFunc[str, float]:
70
+ """[Sentence-Transformers](https://www.sbert.net/) based semantic similarity using word vectors. It calculates the similarity between given text pairs.
71
+
72
+ Args:
73
+ model_name: Name of the [pretrained model](https://www.sbert.net/docs/pretrained_models.html) to use to generate word vectors. It calculates the cosine similarity between given text pairs.
74
+ """
49
75
  from sentence_transformers import SentenceTransformer
50
76
 
51
77
  model = SentenceTransformer(model_name)
@@ -61,6 +87,11 @@ def sentence_transformers(model_name: str) -> SimSeqFunc[str, float]:
61
87
 
62
88
 
63
89
  def openai(model_name: str) -> SimSeqFunc[str, float]:
90
+ """Semantic similarity using word vectors generated by one of OpenAI's embedding models. It calculates the cosine similarity between given text pairs.
91
+
92
+ Args:
93
+ model_name: Name of the [embedding model](https://platform.openai.com/docs/models/embeddings) to use to generate word vectors.
94
+ """
64
95
  import numpy as np
65
96
  from openai import Client
66
97
 
@@ -78,6 +109,18 @@ def openai(model_name: str) -> SimSeqFunc[str, float]:
78
109
 
79
110
 
80
111
  def levenshtein(score_cutoff: float | None = None) -> SimPairFunc[str, float]:
112
+ """Similarity function that calculates a normalized indel similarity between two strings based on [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance).
113
+
114
+ Args:
115
+ score_cutoff: If the similarity is less than this value, the function will return 0.0.
116
+ Examples:
117
+ >>> sim = levenshtein()
118
+ >>> sim("kitten", "sitting")
119
+ 0.6153846153846154
120
+ >>> sim = levenshtein(score_cutoff=0.8)
121
+ >>> sim("kitten", "sitting")
122
+ 0.0
123
+ """
81
124
  import Levenshtein
82
125
 
83
126
  def wrapped_func(x: str, y: str) -> float:
@@ -87,6 +130,18 @@ def levenshtein(score_cutoff: float | None = None) -> SimPairFunc[str, float]:
87
130
 
88
131
 
89
132
  def jaro(score_cutoff: float | None = None) -> SimPairFunc[str, float]:
133
+ """Jaro similarity function to compute similarity between two strings.
134
+
135
+ Args:
136
+ score_cutoff: If the similarity is less than this value, the function will return 0.0.
137
+ Examples:
138
+ >>> sim = jaro()
139
+ >>> sim("kitten", "sitting")
140
+ 0.746031746031746
141
+ >>> sim = jaro(score_cutoff=0.8)
142
+ >>> sim("kitten", "sitting")
143
+ 0.0
144
+ """
90
145
  import Levenshtein
91
146
 
92
147
  def wrapped_func(x: str, y: str) -> float:
@@ -96,8 +151,21 @@ def jaro(score_cutoff: float | None = None) -> SimPairFunc[str, float]:
96
151
 
97
152
 
98
153
  def jaro_winkler(
99
- score_cutoff: float | None = None, prefix_weight: float | None = None
154
+ score_cutoff: float | None = None, prefix_weight: float = 0.1
100
155
  ) -> SimPairFunc[str, float]:
156
+ """Jaro-Winkler similarity function to compute similarity between two strings.
157
+
158
+ Args:
159
+ score_cutoff: If the similarity is less than this value, the function will return 0.0.
160
+ prefix_weight: Weight used for the common prefix of the two strings. Has to be between 0 and 0.25. Default is 0.1.
161
+ Examples:
162
+ >>> sim = jaro_winkler()
163
+ >>> sim("kitten", "sitting")
164
+ 0.746031746031746
165
+ >>> sim = jaro_winkler(score_cutoff=0.8)
166
+ >>> sim("kitten", "sitting")
167
+ 0.0
168
+ """
101
169
  import Levenshtein
102
170
 
103
171
  def wrapped_func(x: str, y: str) -> float:
@@ -113,6 +181,20 @@ def table(
113
181
  symmetric: bool = True,
114
182
  default: float = 0.0,
115
183
  ) -> SimPairFunc[str, float]:
184
+ """Allows to import a similarity values from a table.
185
+
186
+ Args:
187
+ entries: Sequence[tuple[a, b, sim(a, b)]
188
+ symmetric: If True, the table is assumed to be symmetric, i.e. sim(a, b) = sim(b, a)
189
+ default: Default similarity value for pairs not in the table
190
+
191
+ Examples:
192
+ >>> sim = table([("a", "b", 0.5), ("b", "c", 0.7)], symmetric=True, default=0.0)
193
+ >>> sim("b", "a")
194
+ 0.5
195
+ >>> sim("a", "c")
196
+ 0.0
197
+ """
116
198
  if isinstance(entries, FilePath):
117
199
  if isinstance(entries, str):
118
200
  entries = Path(entries)
@@ -4,6 +4,8 @@ from typing import Optional, Protocol, TypedDict, cast
4
4
  from cbrkit.loaders import data as load_data
5
5
  from cbrkit.typing import FilePath, SimPairFunc
6
6
 
7
+ __all__ = ["Taxonomy", "TaxonomyNode", "TaxonomyFunc", "load", "wu_palmer"]
8
+
7
9
 
8
10
  class SerializedNode(TypedDict, total=False):
9
11
  key: str
@@ -76,6 +78,15 @@ class TaxonomyFunc(Protocol):
76
78
 
77
79
 
78
80
  def wu_palmer() -> TaxonomyFunc:
81
+ """Wu & Palmer similarity measure of two nodes in a taxonomy.
82
+ >>> taxonomy = Taxonomy("./data/cars-taxonomy.yaml")
83
+ >>> sim = wu_palmer()
84
+ >>> sim(taxonomy, "audi", "porsche")
85
+ 0.5
86
+ >>> sim(taxonomy, "audi", "bmw")
87
+ 0.0
88
+ """
89
+
79
90
  def wrapped_func(taxonomy: Taxonomy, x: str, y: str) -> float:
80
91
  node1 = taxonomy.nodes[x]
81
92
  node2 = taxonomy.nodes[y]
@@ -92,6 +103,13 @@ _taxonomy_func = wu_palmer()
92
103
  def load(
93
104
  path: FilePath, measure: TaxonomyFunc = _taxonomy_func
94
105
  ) -> SimPairFunc[str, float]:
106
+ """Load a taxonomy and return a function that measures the similarity.
107
+ >>> sim = load("./data/cars-taxonomy.yaml", measure=wu_palmer())
108
+ >>> sim("audi", "porsche")
109
+ 0.5
110
+ >>> sim("audi", "bmw")
111
+ 0.0
112
+ """
95
113
  taxonomy = Taxonomy(path)
96
114
 
97
115
  def wrapped_func(x: str, y: str) -> float:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cbrkit"
3
- version = "0.3.0"
3
+ version = "0.3.2"
4
4
  description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI."
5
5
  authors = ["Mirko Lenz <mirko@mirkolenz.com>"]
6
6
  license = "MIT"
@@ -40,53 +40,47 @@ cbrkit = "cbrkit.cli:app"
40
40
 
41
41
  [tool.poetry.dependencies]
42
42
  python = ">=3.11, <3.13"
43
+ fastapi = { version = ">=0.100, <1.0", optional = true, extras = ["all"] }
44
+ levenshtein = { version = ">=0.23, <1.0", optional = true }
45
+ nltk = { version = "^3.8", optional = true }
46
+ openai = { version = "^1.5", optional = true }
47
+ orjson = "^3.9"
43
48
  pandas = "^2.1"
49
+ pyarrow = ">=13.0"
44
50
  pyyaml = "^6.0"
45
- orjson = "^3.9"
46
- xmltodict = "^0.13"
47
- pyarrow = "^14.0"
48
- typer = { version = "^0.9", extras = ["all"], optional = true }
49
- fastapi = { version = "^0.104", optional = true, extras = ["all"] }
50
- uvicorn = { version = "^0.24", optional = true, extras = ["standard"] }
51
- spacy = { version = "^3.7", optional = true }
52
- nltk = { version = "^3.8", optional = true }
53
- levenshtein = { version = "^0.23", optional = true }
54
51
  sentence-transformers = { version = "^2.2", optional = true }
55
- openai = { version = "^1.3", optional = true }
52
+ spacy = { version = "^3.7", optional = true }
56
53
  torch = { version = "^2.1.1", optional = true }
57
- transformers = { version = "^4.36", optional = true }
54
+ transformers = { version = "^4.35", optional = true }
55
+ typer = { version = "^0.9", extras = ["all"], optional = true }
56
+ uvicorn = { version = ">=0.24, <1.0", optional = true, extras = ["standard"] }
57
+ xmltodict = "^0.13"
58
58
 
59
59
  [tool.poetry.group.dev.dependencies]
60
- pytest = "^7.4"
60
+ pytest = "^8.0.0"
61
61
  pytest-cov = "^4.1"
62
62
 
63
63
  [tool.poetry.group.docs.dependencies]
64
- pdoc = "^14.1"
64
+ pdoc = "^14.4"
65
65
 
66
66
  [tool.poetry.extras]
67
67
  all = [
68
- "typer",
69
68
  "fastapi",
70
- "uvicorn",
71
- "spacy",
72
- "nltk",
73
69
  "levenshtein",
74
- "sentence-transformers",
70
+ "nltk",
75
71
  "openai",
72
+ "sentence-transformers",
73
+ "spacy",
74
+ "spacy",
76
75
  "torch",
77
76
  "transformers",
77
+ "typer",
78
+ "uvicorn",
78
79
  ]
79
80
  cli = ["typer"]
80
81
  api = ["fastapi", "uvicorn"]
81
- nlp = [
82
- "spacy",
83
- "nltk",
84
- "levenshtein",
85
- "sentence-transformers",
86
- "openai",
87
- "torch",
88
- "transformers",
89
- ]
82
+ nlp = ["levenshtein", "nltk", "openai", "spacy"]
83
+ transformers = ["sentence-transformers", "torch", "transformers"]
90
84
 
91
85
  [tool.pytest.ini_options]
92
86
  addopts = "--cov cbrkit --cov-report term-missing --doctest-modules --ignore cbrkit/cli.py --ignore cbrkit/api.py --ignore result"
cbrkit-0.3.0/README.md DELETED
@@ -1,57 +0,0 @@
1
- <!-- markdownlint-disable MD033 MD041 -->
2
- <h2><p align="center">CBRkit</p></h2>
3
- <p align="center">
4
- <img width="256px" alt="cbrkit logo" src="https://raw.githubusercontent.com/wi2trier/cbrkit/main/assets/logo.png" />
5
- </p>
6
- <p align="center">
7
- <a href="https://pypi.org/project/cbrkit/">PyPI</a> |
8
- <a href="https://wi2trier.github.io/cbrkit/">Docs</a> |
9
- <a href="https://github.com/wi2trier/cbrkit/tree/main/tests/test_retrieve.py">Example</a>
10
- </p>
11
- <p align="center">
12
- Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
13
- </p>
14
-
15
- ---
16
-
17
- # CBRkit
18
-
19
- > [!caution]
20
- > The project is under active development and does not yet adhere to semantic versioning.
21
- > Breaking changes may occur at any time for versions `0.x.y`.
22
- > Once the project reaches version `1.0`, semantic versioning will be applied.
23
-
24
- ## Installation
25
-
26
- The library is available on [PyPI](https://pypi.org/project/cbrkit/), so you can install it with `pip`:
27
-
28
- ```shell
29
- pip install cbrkit
30
- ```
31
-
32
- It comes with several optional dependencies for certain tasks like NLP which can be installed with:
33
-
34
- ```shell
35
- pip install cbrkit[EXTRA_NAME,...]
36
- ```
37
-
38
- where `EXTRA_NAME` is one of the following:
39
-
40
- - `nlp`: Natural Language Processing (NLP), including `spacy`, `openai`, and `sentence-transformers`
41
- - `cli`: Command Line Interface (CLI)
42
- - `api`: REST API Server
43
- - `all`: All of the above
44
-
45
- ## Usage
46
-
47
- CBRkit allows the definition of similarity metrics through _composition_.
48
- This means that you can easily build even complex similarities by mixing built-in and/or custom measures.
49
- CBRkit also includes predefined aggregation functions.
50
- A working retrieval example can be found as part of our [testing suite](https://github.com/wi2trier/cbrkit/tree/main/tests/test_retrieve.py).
51
-
52
- The following modules are part of CBRkit:
53
-
54
- - `sim`: Similarity generator functions for various data types (e.g., strings, numbers).
55
- - `global_sim`: Similarity generator functions for aggregating the above ones.
56
- - `retrieval`: Functions for retrieving cases based on a query.
57
- - `typing`: Generic type definitions for defining custom functions.
@@ -1,16 +0,0 @@
1
- from . import collections, generic, numeric, strings, taxonomy
2
- from ._helpers import AbstractFloat, dist2sim, sim2map, sim2seq, unpack_sim, unpack_sims
3
-
4
- __all__ = [
5
- "collections",
6
- "generic",
7
- "numeric",
8
- "strings",
9
- "taxonomy",
10
- "dist2sim",
11
- "sim2map",
12
- "sim2seq",
13
- "unpack_sim",
14
- "unpack_sims",
15
- "AbstractFloat",
16
- ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes