cbrkit 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cbrkit-0.2.0 → cbrkit-0.3.0}/PKG-INFO +1 -1
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/global_sim/__init__.py +2 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/global_sim/_attribute_value.py +3 -3
- cbrkit-0.3.0/cbrkit/global_sim/graph/__init__.py +25 -0
- cbrkit-0.2.0/cbrkit/global_sim/_graph/astar.py → cbrkit-0.3.0/cbrkit/global_sim/graph/_astar.py +24 -24
- cbrkit-0.2.0/cbrkit/global_sim/_graph/model.py → cbrkit-0.3.0/cbrkit/global_sim/graph/_model.py +1 -1
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/loaders.py +21 -6
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/retrieval.py +3 -3
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/taxonomy.py +3 -1
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/typing.py +2 -2
- {cbrkit-0.2.0 → cbrkit-0.3.0}/pyproject.toml +1 -1
- cbrkit-0.2.0/cbrkit/global_sim/_graph/__init__.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/LICENSE +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/README.md +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/__init__.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/__main__.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/api.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/cli.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/global_sim/_aggregate.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/py.typed +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/__init__.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/_helpers.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/collections.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/generic.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/numeric.py +0 -0
- {cbrkit-0.2.0 → cbrkit-0.3.0}/cbrkit/sim/strings.py +0 -0
|
@@ -8,9 +8,9 @@ import pandas as pd
|
|
|
8
8
|
from cbrkit.sim import sim2map
|
|
9
9
|
from cbrkit.typing import (
|
|
10
10
|
AggregatorFunc,
|
|
11
|
-
AnnotatedFloat,
|
|
12
11
|
AnySimFunc,
|
|
13
12
|
Casebase,
|
|
13
|
+
FloatProtocol,
|
|
14
14
|
KeyType,
|
|
15
15
|
SimMap,
|
|
16
16
|
SimMapFunc,
|
|
@@ -43,8 +43,8 @@ def _value_getter(obj: AttributeValueData, key: Any) -> Any:
|
|
|
43
43
|
return getattr(obj, key)
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
@dataclass(frozen=True)
|
|
47
|
-
class AttributeValueSim(
|
|
46
|
+
@dataclass(slots=True, frozen=True)
|
|
47
|
+
class AttributeValueSim(FloatProtocol, Generic[SimType]):
|
|
48
48
|
value: float
|
|
49
49
|
by_attribute: Mapping[str, SimType]
|
|
50
50
|
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from ._astar import GraphMapping, GraphSim, astar
|
|
2
|
+
from ._model import (
|
|
3
|
+
EdgeData,
|
|
4
|
+
EdgeKey,
|
|
5
|
+
EdgeProtocol,
|
|
6
|
+
Graph,
|
|
7
|
+
GraphData,
|
|
8
|
+
NodeData,
|
|
9
|
+
NodeKey,
|
|
10
|
+
NodeProtocol,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"NodeKey",
|
|
15
|
+
"NodeData",
|
|
16
|
+
"EdgeKey",
|
|
17
|
+
"EdgeData",
|
|
18
|
+
"GraphData",
|
|
19
|
+
"EdgeProtocol",
|
|
20
|
+
"NodeProtocol",
|
|
21
|
+
"Graph",
|
|
22
|
+
"GraphMapping",
|
|
23
|
+
"GraphSim",
|
|
24
|
+
"astar",
|
|
25
|
+
]
|
cbrkit-0.2.0/cbrkit/global_sim/_graph/astar.py → cbrkit-0.3.0/cbrkit/global_sim/graph/_astar.py
RENAMED
|
@@ -8,7 +8,7 @@ from collections.abc import Iterable
|
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
9
|
from typing import Any, Generic, cast
|
|
10
10
|
|
|
11
|
-
from cbrkit.global_sim.
|
|
11
|
+
from cbrkit.global_sim.graph._model import (
|
|
12
12
|
EdgeData,
|
|
13
13
|
EdgeKey,
|
|
14
14
|
Graph,
|
|
@@ -17,12 +17,12 @@ from cbrkit.global_sim._graph.model import (
|
|
|
17
17
|
NodeKey,
|
|
18
18
|
)
|
|
19
19
|
from cbrkit.sim._helpers import unpack_sims
|
|
20
|
-
from cbrkit.typing import Casebase, KeyType, SimPairFunc, SimType
|
|
20
|
+
from cbrkit.typing import Casebase, FloatProtocol, KeyType, SimPairFunc, SimType
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
@dataclass
|
|
25
|
+
@dataclass(slots=True)
|
|
26
26
|
class GraphMapping(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
|
|
27
27
|
"""Store all mappings and perform integrity checks on them"""
|
|
28
28
|
|
|
@@ -107,7 +107,7 @@ class GraphMapping(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
|
|
|
107
107
|
self.edge_mappings[x] = y
|
|
108
108
|
|
|
109
109
|
|
|
110
|
-
@dataclass
|
|
110
|
+
@dataclass(slots=True)
|
|
111
111
|
class SearchNode(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
|
|
112
112
|
"""Specific search node"""
|
|
113
113
|
|
|
@@ -136,17 +136,21 @@ class SearchNode(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
|
|
|
136
136
|
self.edges.remove(cast(EdgeKey, q))
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
|
|
139
|
+
@dataclass(slots=True, frozen=True)
|
|
140
|
+
class GraphSim(FloatProtocol, Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
|
|
141
|
+
value: float
|
|
142
|
+
mapping: GraphMapping[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def astar(
|
|
140
146
|
x_map: Casebase[KeyType, Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]],
|
|
141
147
|
y: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
|
|
142
148
|
node_sim_func: SimPairFunc[NodeData, SimType],
|
|
143
149
|
edge_sim_func: SimPairFunc[EdgeData, SimType],
|
|
144
150
|
queue_limit: int,
|
|
145
|
-
) -> dict[KeyType,
|
|
146
|
-
similarities: dict[KeyType, float] = {}
|
|
147
|
-
|
|
151
|
+
) -> dict[KeyType, GraphSim[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]]:
|
|
148
152
|
results = {
|
|
149
|
-
key:
|
|
153
|
+
key: _astar_single(
|
|
150
154
|
x,
|
|
151
155
|
y,
|
|
152
156
|
node_sim_func,
|
|
@@ -156,15 +160,17 @@ def run(
|
|
|
156
160
|
for key, x in x_map.items()
|
|
157
161
|
}
|
|
158
162
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
163
|
+
return {
|
|
164
|
+
key: GraphSim(
|
|
165
|
+
g(result, node_sim_func, edge_sim_func),
|
|
166
|
+
result.mapping,
|
|
167
|
+
)
|
|
168
|
+
for key, result in results.items()
|
|
169
|
+
}
|
|
164
170
|
|
|
165
171
|
|
|
166
172
|
# According to Bergmann and Gil, 2014
|
|
167
|
-
def
|
|
173
|
+
def _astar_single(
|
|
168
174
|
x: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
|
|
169
175
|
y: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
|
|
170
176
|
node_sim_func: SimPairFunc[NodeData, SimType],
|
|
@@ -172,17 +178,13 @@ def astar_search(
|
|
|
172
178
|
queue_limit: int,
|
|
173
179
|
):
|
|
174
180
|
"""Perform an A* analysis of the x base and the y"""
|
|
175
|
-
q: list[SearchNode[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]] = []
|
|
176
181
|
s0 = SearchNode(GraphMapping(x, y))
|
|
177
|
-
|
|
178
|
-
bisect.insort(q, s0, key=lambda x: x.f)
|
|
182
|
+
q = [s0]
|
|
179
183
|
|
|
180
184
|
while q[-1].nodes or q[-1].edges:
|
|
181
185
|
q = _expand(q, x, y, node_sim_func, edge_sim_func, queue_limit)
|
|
182
186
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
return best_q
|
|
187
|
+
return q[-1]
|
|
186
188
|
|
|
187
189
|
|
|
188
190
|
def _expand(
|
|
@@ -197,7 +199,7 @@ def _expand(
|
|
|
197
199
|
|
|
198
200
|
s = q[-1]
|
|
199
201
|
mapped = False
|
|
200
|
-
query_obj, iterator = select1(s, x
|
|
202
|
+
query_obj, iterator = select1(s, x)
|
|
201
203
|
|
|
202
204
|
if query_obj and iterator:
|
|
203
205
|
for case_obj in iterator:
|
|
@@ -221,11 +223,9 @@ def _expand(
|
|
|
221
223
|
return q[len(q) - queue_limit :] if queue_limit > 0 else q
|
|
222
224
|
|
|
223
225
|
|
|
224
|
-
# TODO: Check node types here
|
|
225
226
|
def select1(
|
|
226
227
|
s: SearchNode[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
|
|
227
228
|
x: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
|
|
228
|
-
y: Graph[GraphData, NodeKey, NodeData, EdgeKey, EdgeData],
|
|
229
229
|
) -> tuple[
|
|
230
230
|
NodeKey | EdgeKey | None,
|
|
231
231
|
Iterable[NodeKey | EdgeKey] | None,
|
cbrkit-0.2.0/cbrkit/global_sim/_graph/model.py → cbrkit-0.3.0/cbrkit/global_sim/graph/_model.py
RENAMED
|
@@ -19,7 +19,7 @@ class NodeProtocol(Hashable, Protocol[NodeData]):
|
|
|
19
19
|
data: NodeData
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
@dataclass
|
|
22
|
+
@dataclass(slots=True)
|
|
23
23
|
class Graph(Generic[GraphData, NodeKey, NodeData, EdgeKey, EdgeData]):
|
|
24
24
|
nodes: dict[NodeKey, NodeProtocol[NodeData]]
|
|
25
25
|
edges: dict[EdgeKey, EdgeProtocol[EdgeData, NodeKey]]
|
|
@@ -53,6 +53,8 @@ def python(import_name: str) -> Any:
|
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
class DataFrameCasebase(abc.Mapping):
|
|
56
|
+
__slots__ = ("df",)
|
|
57
|
+
|
|
56
58
|
df: DataFrame
|
|
57
59
|
|
|
58
60
|
def __init__(self, df: DataFrame) -> None:
|
|
@@ -96,9 +98,16 @@ def _csv_pandas(path: FilePath) -> dict[int, pd.Series]:
|
|
|
96
98
|
return cast(dict[int, pd.Series], dataframe(df))
|
|
97
99
|
|
|
98
100
|
|
|
99
|
-
def json(path: FilePath) -> dict[
|
|
101
|
+
def json(path: FilePath) -> dict[Any, Any]:
|
|
100
102
|
with open(path, "rb") as fp:
|
|
101
|
-
|
|
103
|
+
data = orjson.loads(fp.read())
|
|
104
|
+
|
|
105
|
+
if isinstance(data, list):
|
|
106
|
+
return dict(enumerate(data))
|
|
107
|
+
elif isinstance(data, dict):
|
|
108
|
+
return data
|
|
109
|
+
else:
|
|
110
|
+
raise TypeError(f"Invalid data type: {type(data)}")
|
|
102
111
|
|
|
103
112
|
|
|
104
113
|
def toml(path: FilePath) -> dict[str, Any]:
|
|
@@ -106,12 +115,18 @@ def toml(path: FilePath) -> dict[str, Any]:
|
|
|
106
115
|
return tomllib.load(fp)
|
|
107
116
|
|
|
108
117
|
|
|
109
|
-
def yaml(path: FilePath) -> dict[
|
|
110
|
-
data: dict[
|
|
118
|
+
def yaml(path: FilePath) -> dict[Any, Any]:
|
|
119
|
+
data: dict[Any, Any] = {}
|
|
111
120
|
|
|
112
121
|
with open(path, "rb") as fp:
|
|
113
|
-
for doc in yamllib.safe_load_all(fp):
|
|
114
|
-
|
|
122
|
+
for doc_idx, doc in enumerate(yamllib.safe_load_all(fp)):
|
|
123
|
+
if isinstance(doc, list):
|
|
124
|
+
for idx, item in enumerate(doc):
|
|
125
|
+
data[doc_idx + idx] = item
|
|
126
|
+
elif isinstance(doc, dict):
|
|
127
|
+
data |= doc
|
|
128
|
+
else:
|
|
129
|
+
raise TypeError(f"Invalid document type: {type(doc)}")
|
|
115
130
|
|
|
116
131
|
return data
|
|
117
132
|
|
|
@@ -29,7 +29,7 @@ def _similarities2ranking(
|
|
|
29
29
|
return sorted(sim_map, key=lambda key: unpack_sim(sim_map[key]), reverse=True)
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
@dataclass
|
|
32
|
+
@dataclass(slots=True)
|
|
33
33
|
class _Result(Generic[KeyType, ValueType, SimType]):
|
|
34
34
|
similarities: SimMap[KeyType, SimType]
|
|
35
35
|
ranking: list[KeyType]
|
|
@@ -47,9 +47,9 @@ class _Result(Generic[KeyType, ValueType, SimType]):
|
|
|
47
47
|
return cls(similarities=similarities, ranking=ranking, casebase=casebase)
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
@dataclass
|
|
50
|
+
@dataclass(slots=True)
|
|
51
51
|
class Result(Generic[KeyType, ValueType, SimType]):
|
|
52
|
-
|
|
52
|
+
final: _Result[KeyType, ValueType, SimType]
|
|
53
53
|
intermediate: list[_Result[KeyType, ValueType, SimType]]
|
|
54
54
|
|
|
55
55
|
def __init__(
|
|
@@ -11,7 +11,7 @@ class SerializedNode(TypedDict, total=False):
|
|
|
11
11
|
children: list["SerializedNode | str"]
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
@dataclass
|
|
14
|
+
@dataclass(slots=True)
|
|
15
15
|
class TaxonomyNode:
|
|
16
16
|
key: str
|
|
17
17
|
weight: float | None
|
|
@@ -21,6 +21,8 @@ class TaxonomyNode:
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class Taxonomy:
|
|
24
|
+
__slots__ = ("root", "nodes")
|
|
25
|
+
|
|
24
26
|
root: TaxonomyNode
|
|
25
27
|
nodes: dict[str, TaxonomyNode]
|
|
26
28
|
|
|
@@ -6,11 +6,11 @@ from typing import (
|
|
|
6
6
|
)
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class
|
|
9
|
+
class FloatProtocol(Protocol):
|
|
10
10
|
value: float
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
AnyFloat = float |
|
|
13
|
+
AnyFloat = float | FloatProtocol
|
|
14
14
|
|
|
15
15
|
FilePath = str | Path
|
|
16
16
|
KeyType = TypeVar("KeyType")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|