cbrkit 0.7.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cbrkit
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
5
5
  Home-page: https://wi2trier.github.io/cbrkit/
6
6
  License: MIT
@@ -28,9 +28,12 @@ Provides-Extra: all
28
28
  Provides-Extra: api
29
29
  Provides-Extra: cli
30
30
  Provides-Extra: nlp
31
+ Provides-Extra: timeseries
31
32
  Provides-Extra: transformers
33
+ Requires-Dist: dtaidistance (>=2.3.11,<3.0.0) ; extra == "all" or extra == "timeseries"
32
34
  Requires-Dist: fastapi[all] (>=0.100,<1.0) ; extra == "all" or extra == "api"
33
35
  Requires-Dist: levenshtein (>=0.23,<1.0) ; extra == "all" or extra == "nlp"
36
+ Requires-Dist: minineedle (>=3.1.5,<4.0.0) ; extra == "all" or extra == "timeseries"
34
37
  Requires-Dist: nltk (>=3.8,<4.0) ; extra == "all" or extra == "nlp"
35
38
  Requires-Dist: openai (>=1.5,<2.0) ; extra == "all" or extra == "nlp"
36
39
  Requires-Dist: orjson (>=3.9,<4.0)
@@ -38,7 +41,7 @@ Requires-Dist: pandas (>=2.1,<3.0)
38
41
  Requires-Dist: pyarrow (>=13.0)
39
42
  Requires-Dist: pyyaml (>=6.0,<7.0)
40
43
  Requires-Dist: sentence-transformers (>=2.2,<3.0) ; extra == "all" or extra == "transformers"
41
- Requires-Dist: spacy (>=3.7,<4.0) ; extra == "all" or extra == "all" or extra == "nlp"
44
+ Requires-Dist: spacy (>=3.7,<4.0) ; extra == "all" or extra == "nlp"
42
45
  Requires-Dist: torch (>=2.1.1,<3.0.0) ; extra == "all" or extra == "transformers"
43
46
  Requires-Dist: transformers (>=4.35,<5.0) ; extra == "all" or extra == "transformers"
44
47
  Requires-Dist: typer[all] (>=0.9,<1.0) ; extra == "all" or extra == "cli"
@@ -5,7 +5,6 @@
5
5
 
6
6
  """
7
7
 
8
-
9
8
  from . import helpers, loaders, retrieval, sim, typing
10
9
 
11
10
  __all__ = [
@@ -2,7 +2,6 @@
2
2
  .. include:: ../cli.md
3
3
  """
4
4
 
5
-
6
5
  from pathlib import Path
7
6
 
8
7
  try:
@@ -0,0 +1,91 @@
1
+ from collections.abc import Collection, Sequence, Set
2
+ from typing import Any
3
+
4
+ from cbrkit.helpers import dist2sim
5
+ from cbrkit.typing import SimPairFunc
6
+
7
+ Number = float | int
8
+
9
+ __all__ = ["jaccard", "smith_waterman", "dtw"]
10
+
11
+
12
+ def jaccard() -> SimPairFunc[Collection[Any], float]:
13
+ """Jaccard similarity function.
14
+
15
+ Examples:
16
+ >>> sim = jaccard()
17
+ >>> sim(["a", "b", "c", "d"], ["a", "b", "c"])
18
+ 0.8
19
+ """
20
+ from nltk.metrics import jaccard_distance
21
+
22
+ def wrapped_func(x: Collection[Any], y: Collection[Any]) -> float:
23
+ if not isinstance(x, Set):
24
+ x = set(x)
25
+ if not isinstance(y, Set):
26
+ y = set(y)
27
+
28
+ return dist2sim(jaccard_distance(x, y))
29
+
30
+ return wrapped_func
31
+
32
+
33
+ def smith_waterman(
34
+ match_score: int = 2, mismatch_penalty: int = -1, gap_penalty: int = -1
35
+ ) -> SimPairFunc[Sequence[Any], float]:
36
+ """
37
+ Performs the Smith-Waterman alignment with configurable scoring parameters. If no element matches it returns 0.0.
38
+
39
+ Args:
40
+ match_score: Score for matching characters. Defaults to 2.
41
+ mismatch_penalty: Penalty for mismatching characters. Defaults to -1.
42
+ gap_penalty: Penalty for gaps. Defaults to -1.
43
+
44
+ Example:
45
+ >>> sim = smith_waterman()
46
+ >>> sim("abcde", "fghe")
47
+ 2
48
+ """
49
+ from minineedle import core, smith
50
+
51
+ def wrapped_func(x: Sequence[Any], y: Sequence[Any]) -> float:
52
+ try:
53
+ alignment = smith.SmithWaterman(x, y)
54
+ alignment.change_matrix(
55
+ core.ScoreMatrix(
56
+ match=match_score, miss=mismatch_penalty, gap=gap_penalty
57
+ )
58
+ )
59
+ alignment.align()
60
+
61
+ return alignment.get_score()
62
+ except ZeroDivisionError:
63
+ return 0.0
64
+
65
+ return wrapped_func
66
+
67
+
68
+ def dtw() -> SimPairFunc[Collection[int], float]:
69
+ """Dynamic Time Warping similarity function.
70
+
71
+ Examples:
72
+ >>> sim = dtw()
73
+ >>> sim([1, 2, 3], [1, 2, 3, 4])
74
+ 0.5
75
+ """
76
+ import numpy as np
77
+ from dtaidistance import dtw
78
+
79
+ def wrapped_func(
80
+ x: Collection[Number] | np.ndarray, y: Collection[Number] | np.ndarray
81
+ ) -> float:
82
+ if not isinstance(x, np.ndarray):
83
+ x = np.array(x)
84
+ if not isinstance(y, np.ndarray):
85
+ y = np.array(y)
86
+
87
+ distance = dtw.distance(x, y)
88
+
89
+ return dist2sim(distance)
90
+
91
+ return wrapped_func
@@ -32,20 +32,17 @@ SimSeqOrMap = SimMap[KeyType, SimType] | SimSeq[SimType]
32
32
  class SimMapFunc(Protocol[KeyType, ValueType_contra, SimType_cov]):
33
33
  def __call__(
34
34
  self, x_map: Mapping[KeyType, ValueType_contra], y: ValueType_contra
35
- ) -> SimMap[KeyType, SimType_cov]:
36
- ...
35
+ ) -> SimMap[KeyType, SimType_cov]: ...
37
36
 
38
37
 
39
38
  class SimSeqFunc(Protocol[ValueType_contra, SimType_cov]):
40
39
  def __call__(
41
40
  self, pairs: Sequence[tuple[ValueType_contra, ValueType_contra]], /
42
- ) -> SimSeq[SimType_cov]:
43
- ...
41
+ ) -> SimSeq[SimType_cov]: ...
44
42
 
45
43
 
46
44
  class SimPairFunc(Protocol[ValueType_contra, SimType_cov]):
47
- def __call__(self, x: ValueType_contra, y: ValueType_contra, /) -> SimType_cov:
48
- ...
45
+ def __call__(self, x: ValueType_contra, y: ValueType_contra, /) -> SimType_cov: ...
49
46
 
50
47
 
51
48
  AnySimFunc = (
@@ -60,8 +57,7 @@ class AggregatorFunc(Protocol[KeyType, SimType_contra]):
60
57
  self,
61
58
  similarities: SimSeqOrMap[KeyType, SimType_contra],
62
59
  /,
63
- ) -> float:
64
- ...
60
+ ) -> float: ...
65
61
 
66
62
 
67
63
  class PoolingFunc(Protocol):
@@ -69,5 +65,4 @@ class PoolingFunc(Protocol):
69
65
  self,
70
66
  similarities: SimSeq[float],
71
67
  /,
72
- ) -> float:
73
- ...
68
+ ) -> float: ...
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cbrkit"
3
- version = "0.7.0"
3
+ version = "0.8.0"
4
4
  description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI."
5
5
  authors = ["Mirko Lenz <mirko@mirkolenz.com>"]
6
6
  license = "MIT"
@@ -54,11 +54,14 @@ torch = { version = "^2.1.1", optional = true }
54
54
  transformers = { version = "^4.35", optional = true }
55
55
  typer = { version = ">=0.9, <1.0", extras = ["all"], optional = true }
56
56
  uvicorn = { version = ">=0.24, <1.0", optional = true, extras = ["standard"] }
57
+ dtaidistance = { version = "^2.3.11", optional = true }
58
+ minineedle ={ version = "^3.1.5", optional = true }
57
59
  xmltodict = ">=0.13, <1.0"
58
60
 
59
61
  [tool.poetry.group.dev.dependencies]
60
62
  pytest = "^8.0.0"
61
63
  pytest-cov = "^5.0.0"
64
+ ruff = "^0.3.4"
62
65
 
63
66
  [tool.poetry.group.docs.dependencies]
64
67
  pdoc = "^14.4"
@@ -71,16 +74,18 @@ all = [
71
74
  "openai",
72
75
  "sentence-transformers",
73
76
  "spacy",
74
- "spacy",
75
77
  "torch",
76
78
  "transformers",
77
79
  "typer",
78
80
  "uvicorn",
81
+ "minineedle",
82
+ "dtaidistance",
79
83
  ]
80
84
  cli = ["typer"]
81
85
  api = ["fastapi", "uvicorn"]
82
86
  nlp = ["levenshtein", "nltk", "openai", "spacy"]
83
87
  transformers = ["sentence-transformers", "torch", "transformers"]
88
+ timeseries = ["minineedle", "dtaidistance"]
84
89
 
85
90
  [tool.pytest.ini_options]
86
91
  addopts = "--cov cbrkit --cov-report term-missing --doctest-modules --ignore cbrkit/cli.py --ignore cbrkit/api.py --ignore result"
@@ -1,28 +0,0 @@
1
- from collections.abc import Collection, Set
2
- from typing import Any
3
-
4
- from cbrkit.helpers import dist2sim
5
- from cbrkit.typing import SimPairFunc
6
-
7
- __all__ = ["jaccard"]
8
-
9
-
10
- def jaccard() -> SimPairFunc[Collection[Any], float]:
11
- """Jaccard similarity function.
12
-
13
- Examples:
14
- >>> sim = jaccard()
15
- >>> sim(["a", "b", "c", "d"], ["a", "b", "c"])
16
- 0.8
17
- """
18
- from nltk.metrics import jaccard_distance
19
-
20
- def wrapped_func(x: Collection[Any], y: Collection[Any]) -> float:
21
- if not isinstance(x, Set):
22
- x = set(x)
23
- if not isinstance(y, Set):
24
- y = set(y)
25
-
26
- return dist2sim(jaccard_distance(x, y))
27
-
28
- return wrapped_func
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes