cbrkit 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {cbrkit-0.2.0 → cbrkit-0.3.0}/PKG-INFO +1 -1
  2. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/global_sim/__init__.py +2 -0
  3. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/global_sim/_attribute_value.py +3 -3
  4. cbrkit-0.3.0/cbrkit/global_sim/graph/__init__.py +25 -0
  5. cbrkit-0.2.0/cbrkit/global_sim/_graph/astar.py → cbrkit-0.3.0/cbrkit/global_sim/graph/_astar.py +24 -24
  6. cbrkit-0.2.0/cbrkit/global_sim/_graph/model.py → cbrkit-0.3.0/cbrkit/global_sim/graph/_model.py +1 -1
  7. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/loaders.py +21 -6
  8. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/retrieval.py +3 -3
  9. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/taxonomy.py +3 -1
  10. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/typing.py +2 -2
  11. {cbrkit-0.2.0 → cbrkit-0.3.0}/pyproject.toml +1 -1
  12. cbrkit-0.2.0/cbrkit/global_sim/_graph/__init__.py +0 -0
  13. {cbrkit-0.2.0 → cbrkit-0.3.0}/LICENSE +0 -0
  14. {cbrkit-0.2.0 → cbrkit-0.3.0}/README.md +0 -0
  15. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/__init__.py +0 -0
  16. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/__main__.py +0 -0
  17. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/api.py +0 -0
  18. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/cli.py +0 -0
  19. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/global_sim/_aggregate.py +0 -0
  20. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/py.typed +0 -0
  21. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/__init__.py +0 -0
  22. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/_helpers.py +0 -0
  23. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/collections.py +0 -0
  24. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/generic.py +0 -0
  25. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/numeric.py +0 -0
  26. {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/strings.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cbrkit
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI.
5
5
  Home-page: https://wi2trier.github.io/cbrkit/
6
6
  License: MIT
@@ -1,3 +1,4 @@
1
+ from . import graph
1
2
  from ._aggregate import PoolingName, aggregator
2
3
  from ._attribute_value import AttributeValueData, AttributeValueSim, attribute_value
3
4
 
@@ -7,4 +8,5 @@ __all__ = [
7
8
  "AttributeValueSim",
8
9
  "aggregator",
9
10
  "PoolingName",
11
+ "graph",
10
12
  ]
@@ -8,9 +8,9 @@ import pandas as pd
8
8
  from cbrkit.sim import sim2map
9
9
  from cbrkit.typing import (
10
10
  AggregatorFunc,
11
- AnnotatedFloat,
12
11
  AnySimFunc,
13
12
  Casebase,
13
+ FloatProtocol,
14
14
  KeyType,
15
15
  SimMap,
16
16
  SimMapFunc,
@@ -43,8 +43,8 @@ def _value_getter(obj: AttributeValueData, key: Any) -> Any:
43
43
  return getattr(obj, key)
44
44
 
45
45
 
46
- @dataclass(frozen=True)
47
- class AttributeValueSim(AnnotatedFloat, Generic[SimType]):
46
+ @dataclass(slots=True, frozen=True)
47
+ class AttributeValueSim(FloatProtocol, Generic[SimType]):
48
48
  value: float
49
49
  by_attribute: Mapping[str, SimType]
50
50
 
@@ -0,0 +1,25 @@
1
+ from ._astar import GraphMapping, GraphSim, astar
2
+ from ._model import (
3
+ EdgeData,
4
+ EdgeKey,
5
+ EdgeProtocol,
6
+ Graph,
7
+ GraphData,
8
+ NodeData,
9
+ NodeKey,
10
+ NodeProtocol,
11
+ )
12
+
13
+ __all__ = [
14
+ "NodeKey",
15
+ "NodeData",
16
+ "EdgeKey",
17
+ "EdgeData",
18
+ "GraphData",
19
+ "EdgeProtocol",
20
+ "NodeProtocol",
21
+ "Graph",
22
+ "GraphMapping",
23
+ "GraphSim",
24
+ "astar",
25
+ ]
@@ -8,7 +8,7 @@ from collections.abc import Iterable
8
8
  from dataclasses import dataclass, field
9
9
  from typing import Any, Generic, cast
10
10
 
11
- from cbrkit.global_sim._graph.model import (
11
+ from cbrkit.global_sim.graph._model import (
12
12
  EdgeData,
13
13
  EdgeKey,
14
14
  Graph,
@@ -17,12 +17,12 @@ from cbrkit.global_sim._graph.model import (
17
17
  NodeKey,
18
18
  )
19
19
  from cbrkit.sim._helpers import unpack_sims
20
- from cbrkit.typing import Casebase, KeyType, SimPairFunc, SimType
20
+ from cbrkit.typing import Casebase, FloatProtocol, KeyType, SimPairFunc, SimType
21
21
 
22
22
  logger = logging.getLogger(__name__)
23
23
 
24
24
 
25
- @dataclass
25
+ @dataclass(slots=True)
26
26
  class GraphMapping(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
27
27
  """Store all mappings and perform integrity checks on them"""
28
28
 
@@ -107,7 +107,7 @@ class GraphMapping(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
107
107
  self.edge_mappings[x] = y
108
108
 
109
109
 
110
- @dataclass
110
+ @dataclass(slots=True)
111
111
  class SearchNode(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
112
112
  """Specific search node"""
113
113
 
@@ -136,17 +136,21 @@ class SearchNode(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
136
136
  self.edges.remove(cast(EdgeKey, q))
137
137
 
138
138
 
139
- def run(
139
+ @dataclass(slots=True, frozen=True)
140
+ class GraphSim(FloatProtocol, Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
141
+ value: float
142
+ mapping: GraphMapping[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]
143
+
144
+
145
+ def astar(
140
146
  x_map: Casebase[KeyType, Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]],
141
147
  y: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
142
148
  node_sim_func: SimPairFunc[NodeData, SimType],
143
149
  edge_sim_func: SimPairFunc[EdgeData, SimType],
144
150
  queue_limit: int,
145
- ) -> dict[KeyType, float]:
146
- similarities: dict[KeyType, float] = {}
147
-
151
+ ) -> dict[KeyType, GraphSim[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]]:
148
152
  results = {
149
- key: astar_search(
153
+ key: _astar_single(
150
154
  x,
151
155
  y,
152
156
  node_sim_func,
@@ -156,15 +160,17 @@ def run(
156
160
  for key, x in x_map.items()
157
161
  }
158
162
 
159
- for key, result in results.items():
160
- similarities[key] = g(result, node_sim_func, edge_sim_func)
161
- # TODO: Add mapping to similarity
162
-
163
- return similarities
163
+ return {
164
+ key: GraphSim(
165
+ g(result, node_sim_func, edge_sim_func),
166
+ result.mapping,
167
+ )
168
+ for key, result in results.items()
169
+ }
164
170
 
165
171
 
166
172
  # According to Bergmann and Gil, 2014
167
- def astar_search(
173
+ def _astar_single(
168
174
  x: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
169
175
  y: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
170
176
  node_sim_func: SimPairFunc[NodeData, SimType],
@@ -172,17 +178,13 @@ def astar_search(
172
178
  queue_limit: int,
173
179
  ):
174
180
  """Perform an A* analysis of the x base and the y"""
175
- q: list[SearchNode[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]] = []
176
181
  s0 = SearchNode(GraphMapping(x, y))
177
-
178
- bisect.insort(q, s0, key=lambda x: x.f)
182
+ q = [s0]
179
183
 
180
184
  while q[-1].nodes or q[-1].edges:
181
185
  q = _expand(q, x, y, node_sim_func, edge_sim_func, queue_limit)
182
186
 
183
- best_q = q[-1]
184
-
185
- return best_q
187
+ return q[-1]
186
188
 
187
189
 
188
190
  def _expand(
@@ -197,7 +199,7 @@ def _expand(
197
199
 
198
200
  s = q[-1]
199
201
  mapped = False
200
- query_obj, iterator = select1(s, x, y)
202
+ query_obj, iterator = select1(s, x)
201
203
 
202
204
  if query_obj and iterator:
203
205
  for case_obj in iterator:
@@ -221,11 +223,9 @@ def _expand(
221
223
  return q[len(q) - queue_limit :] if queue_limit > 0 else q
222
224
 
223
225
 
224
- # TODO: Check node types here
225
226
  def select1(
226
227
  s: SearchNode[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
227
228
  x: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
228
- y: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
229
229
  ) -> tuple[
230
230
  NodeKey | EdgeKey | None,
231
231
  Iterable[NodeKey | EdgeKey] | None,
@@ -19,7 +19,7 @@ class NodeProtocol(Hashable, Protocol[NodeData]):
19
19
  data: NodeData
20
20
 
21
21
 
22
- @dataclass
22
+ @dataclass(slots=True)
23
23
  class Graph(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
24
24
  nodes: dict[NodeKey, NodeProtocol[NodeData]]
25
25
  edges: dict[EdgeKey, EdgeProtocol[EdgeData, NodeKey]]
@@ -53,6 +53,8 @@ def python(import_name: str) -> Any:
53
53
 
54
54
 
55
55
  class DataFrameCasebase(abc.Mapping):
56
+ __slots__ = ("df",)
57
+
56
58
  df: DataFrame
57
59
 
58
60
  def __init__(self, df: DataFrame) -> None:
@@ -96,9 +98,16 @@ def _csv_pandas(path: FilePath) -> dict[int, pd.Series]:
96
98
  return cast(dict[int, pd.Series], dataframe(df))
97
99
 
98
100
 
99
- def json(path: FilePath) -> dict[str, Any]:
101
+ def json(path: FilePath) -> dict[Any, Any]:
100
102
  with open(path, "rb") as fp:
101
- return orjson.loads(fp.read())
103
+ data = orjson.loads(fp.read())
104
+
105
+ if isinstance(data, list):
106
+ return dict(enumerate(data))
107
+ elif isinstance(data, dict):
108
+ return data
109
+ else:
110
+ raise TypeError(f"Invalid data type: {type(data)}")
102
111
 
103
112
 
104
113
  def toml(path: FilePath) -> dict[str, Any]:
@@ -106,12 +115,18 @@ def toml(path: FilePath) -> dict[str, Any]:
106
115
  return tomllib.load(fp)
107
116
 
108
117
 
109
- def yaml(path: FilePath) -> dict[str, Any]:
110
- data: dict[str, Any] = {}
118
+ def yaml(path: FilePath) -> dict[Any, Any]:
119
+ data: dict[Any, Any] = {}
111
120
 
112
121
  with open(path, "rb") as fp:
113
- for doc in yamllib.safe_load_all(fp):
114
- data |= doc
122
+ for doc_idx, doc in enumerate(yamllib.safe_load_all(fp)):
123
+ if isinstance(doc, list):
124
+ for idx, item in enumerate(doc):
125
+ data[doc_idx + idx] = item
126
+ elif isinstance(doc, dict):
127
+ data |= doc
128
+ else:
129
+ raise TypeError(f"Invalid document type: {type(doc)}")
115
130
 
116
131
  return data
117
132
 
@@ -29,7 +29,7 @@ def _similarities2ranking(
29
29
  return sorted(sim_map, key=lambda key: unpack_sim(sim_map[key]), reverse=True)
30
30
 
31
31
 
32
- @dataclass
32
+ @dataclass(slots=True)
33
33
  class _Result(Generic[KeyType, ValueType, SimType]):
34
34
  similarities: SimMap[KeyType, SimType]
35
35
  ranking: list[KeyType]
@@ -47,9 +47,9 @@ class _Result(Generic[KeyType, ValueType, SimType]):
47
47
  return cls(similarities=similarities, ranking=ranking, casebase=casebase)
48
48
 
49
49
 
50
- @dataclass
50
+ @dataclass(slots=True)
51
51
  class Result(Generic[KeyType, ValueType, SimType]):
52
- _final: _Result[KeyType, ValueType, SimType]
52
+ final: _Result[KeyType, ValueType, SimType]
53
53
  intermediate: list[_Result[KeyType, ValueType, SimType]]
54
54
 
55
55
  def __init__(
@@ -11,7 +11,7 @@ class SerializedNode(TypedDict, total=False):
11
11
  children: list["SerializedNode | str"]
12
12
 
13
13
 
14
- @dataclass
14
+ @dataclass(slots=True)
15
15
  class TaxonomyNode:
16
16
  key: str
17
17
  weight: float | None
@@ -21,6 +21,8 @@ class TaxonomyNode:
21
21
 
22
22
 
23
23
  class Taxonomy:
24
+ __slots__ = ("root", "nodes")
25
+
24
26
  root: TaxonomyNode
25
27
  nodes: dict[str, TaxonomyNode]
26
28
 
@@ -6,11 +6,11 @@ from typing import (
6
6
  )
7
7
 
8
8
 
9
- class AnnotatedFloat(Protocol):
9
+ class FloatProtocol(Protocol):
10
10
  value: float
11
11
 
12
12
 
13
- AnyFloat = float | AnnotatedFloat
13
+ AnyFloat = float | FloatProtocol
14
14
 
15
15
  FilePath = str | Path
16
16
  KeyType = TypeVar("KeyType")
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "cbrkit"
3
- version = "0.2.0"
3
+ version = "0.3.0"
4
4
  description = "Customizable Case-Based Reasoning (CBR) toolkit for Python with a built-in API and CLI."
5
5
  authors = ["Mirko Lenz <mirko@mirkolenz.com>"]
6
6
  license = "MIT"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes