alpha-avatar-plugins-memory 0.6.2__tar.gz → 0.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/PKG-INFO +1 -1
- alpha_avatar_plugins_memory-0.6.3/alphaavatar/plugins/memory/graph/__init__.py +24 -0
- alpha_avatar_plugins_memory-0.6.3/alphaavatar/plugins/memory/graph/graph_alias.py +144 -0
- alpha_avatar_plugins_memory-0.6.3/alphaavatar/plugins/memory/graph/graph_builder.py +230 -0
- alpha_avatar_plugins_memory-0.6.3/alphaavatar/plugins/memory/graph/graph_lookup.py +232 -0
- alpha_avatar_plugins_memory-0.6.3/alphaavatar/plugins/memory/graph/graph_store.py +362 -0
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/alphaavatar/plugins/memory/memory_markdown.py +47 -17
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/alphaavatar/plugins/memory/memory_op.py +38 -12
- alpha_avatar_plugins_memory-0.6.3/alphaavatar/plugins/memory/memory_prompts.py +576 -0
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/alphaavatar/plugins/memory/memory_runtime.py +236 -395
- alpha_avatar_plugins_memory-0.6.3/alphaavatar/plugins/memory/runner/lancedb_runner.py +663 -0
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/alphaavatar/plugins/memory/version.py +1 -1
- alpha_avatar_plugins_memory-0.6.2/alphaavatar/plugins/memory/memory_prompts.py +0 -560
- alpha_avatar_plugins_memory-0.6.2/alphaavatar/plugins/memory/runner/lancedb_runner.py +0 -240
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/.gitignore +0 -0
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/README.md +0 -0
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/alphaavatar/plugins/memory/__init__.py +0 -0
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/alphaavatar/plugins/memory/log.py +0 -0
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/alphaavatar/plugins/memory/runner/__init__.py +0 -0
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/alphaavatar/plugins/memory/runner/qdrant_runner.py +0 -0
- {alpha_avatar_plugins_memory-0.6.2 → alpha_avatar_plugins_memory-0.6.3}/pyproject.toml +0 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Copyright 2026 AlphaAvatar project
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from .graph_alias import save_graph_aliases
|
|
15
|
+
from .graph_builder import build_graph_from_mentions
|
|
16
|
+
from .graph_lookup import GraphLookup
|
|
17
|
+
from .graph_store import save_memory_graph_stubs
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"save_graph_aliases",
|
|
21
|
+
"build_graph_from_mentions",
|
|
22
|
+
"GraphLookup",
|
|
23
|
+
"save_memory_graph_stubs",
|
|
24
|
+
]
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# Copyright 2026 AlphaAvatar project
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import json
|
|
15
|
+
import pathlib
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _json_dumps(value: Any) -> str:
|
|
20
|
+
return json.dumps(value, ensure_ascii=False, default=str)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _json_loads(line: str) -> dict[str, Any] | None:
|
|
24
|
+
try:
|
|
25
|
+
data = json.loads(line)
|
|
26
|
+
return data if isinstance(data, dict) else None
|
|
27
|
+
except Exception:
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _read_alias_map(path: pathlib.Path) -> dict[str, dict[str, Any]]:
|
|
32
|
+
if not path.exists():
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
out: dict[str, dict[str, Any]] = {}
|
|
36
|
+
|
|
37
|
+
for line in path.read_text(encoding="utf-8").splitlines():
|
|
38
|
+
line = line.strip()
|
|
39
|
+
if not line:
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
row = _json_loads(line)
|
|
43
|
+
if not row:
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
alias_key = str(row.get("alias_key", "")).strip()
|
|
47
|
+
canonical_key = str(row.get("canonical_key", "")).strip()
|
|
48
|
+
|
|
49
|
+
if not alias_key or not canonical_key:
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
out[alias_key] = row
|
|
53
|
+
|
|
54
|
+
return out
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _write_aliases(path: pathlib.Path, aliases: dict[str, dict[str, Any]]) -> None:
|
|
58
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
|
|
60
|
+
rows = sorted(
|
|
61
|
+
aliases.values(),
|
|
62
|
+
key=lambda x: (
|
|
63
|
+
str(x.get("canonical_key", "")),
|
|
64
|
+
str(x.get("alias_key", "")),
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
text = "\n".join(_json_dumps(row) for row in rows)
|
|
69
|
+
path.write_text(text + ("\n" if text else ""), encoding="utf-8")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def save_graph_aliases(
|
|
73
|
+
*,
|
|
74
|
+
graph_path: str | pathlib.Path,
|
|
75
|
+
aliases: list[dict[str, Any]],
|
|
76
|
+
) -> dict[str, Any]:
|
|
77
|
+
graph_path = pathlib.Path(graph_path)
|
|
78
|
+
aliases_path = graph_path / "aliases.jsonl"
|
|
79
|
+
|
|
80
|
+
alias_map = _read_alias_map(aliases_path)
|
|
81
|
+
|
|
82
|
+
updated = 0
|
|
83
|
+
skipped = 0
|
|
84
|
+
conflicts: list[dict[str, Any]] = []
|
|
85
|
+
|
|
86
|
+
for alias in aliases:
|
|
87
|
+
alias_key = str(alias.get("alias_key", "")).strip()
|
|
88
|
+
canonical_key = str(alias.get("canonical_key", "")).strip()
|
|
89
|
+
|
|
90
|
+
if not alias_key or not canonical_key or alias_key == canonical_key:
|
|
91
|
+
skipped += 1
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
# local detector keys must already be scoped before aliasing
|
|
95
|
+
if alias_key.startswith(("face:", "voice:", "object:")) and ":local:" not in alias_key:
|
|
96
|
+
skipped += 1
|
|
97
|
+
conflicts.append(
|
|
98
|
+
{
|
|
99
|
+
"alias_key": alias_key,
|
|
100
|
+
"canonical_key": canonical_key,
|
|
101
|
+
"reason": "unscoped_local_key",
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
old = alias_map.get(alias_key)
|
|
107
|
+
|
|
108
|
+
if old:
|
|
109
|
+
old_canonical = str(old.get("canonical_key", "")).strip()
|
|
110
|
+
old_weight = float(old.get("weight", 0.0))
|
|
111
|
+
new_weight = float(alias.get("weight", 0.0))
|
|
112
|
+
|
|
113
|
+
# If conflict, only replace when new confidence is higher.
|
|
114
|
+
if old_canonical and old_canonical != canonical_key and new_weight < old_weight:
|
|
115
|
+
conflicts.append(
|
|
116
|
+
{
|
|
117
|
+
"alias_key": alias_key,
|
|
118
|
+
"old_canonical_key": old_canonical,
|
|
119
|
+
"new_canonical_key": canonical_key,
|
|
120
|
+
"old_weight": old_weight,
|
|
121
|
+
"new_weight": new_weight,
|
|
122
|
+
"reason": "lower_confidence_conflict",
|
|
123
|
+
}
|
|
124
|
+
)
|
|
125
|
+
skipped += 1
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
merged = dict(old)
|
|
129
|
+
merged.update(alias)
|
|
130
|
+
alias_map[alias_key] = merged
|
|
131
|
+
else:
|
|
132
|
+
alias_map[alias_key] = alias
|
|
133
|
+
|
|
134
|
+
updated += 1
|
|
135
|
+
|
|
136
|
+
_write_aliases(aliases_path, alias_map)
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
"aliases_file": str(aliases_path),
|
|
140
|
+
"aliases": len(alias_map),
|
|
141
|
+
"updated": updated,
|
|
142
|
+
"skipped": skipped,
|
|
143
|
+
"conflicts": conflicts,
|
|
144
|
+
}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# Copyright 2026 AlphaAvatar project
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import hashlib
|
|
15
|
+
from collections.abc import Iterable
|
|
16
|
+
from itertools import combinations
|
|
17
|
+
|
|
18
|
+
from alphaavatar.agents.memory import MemoryItem
|
|
19
|
+
from alphaavatar.agents.memory.schema.graph import (
|
|
20
|
+
GraphNodeMention,
|
|
21
|
+
MemoryGraphLink,
|
|
22
|
+
MemoryGraphNode,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
LOCAL_NODE_TYPES = {"face", "voice", "speaker", "object"}
|
|
26
|
+
|
|
27
|
+
GLOBAL_KEY_PREFIXES = (
|
|
28
|
+
"user:",
|
|
29
|
+
"tool:",
|
|
30
|
+
"project:",
|
|
31
|
+
"concept:",
|
|
32
|
+
"artifact:",
|
|
33
|
+
"image:",
|
|
34
|
+
"audio:",
|
|
35
|
+
"turn:",
|
|
36
|
+
"memory_item:",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _is_global_key(key: str) -> bool:
|
|
41
|
+
return key.startswith(GLOBAL_KEY_PREFIXES)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _is_scoped_local_key(key: str) -> bool:
|
|
45
|
+
return ":local:" in key
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _normalize_node_type(node_type: str | None) -> str:
|
|
49
|
+
value = str(node_type or "text").strip().lower()
|
|
50
|
+
if value == "speaker":
|
|
51
|
+
return "voice"
|
|
52
|
+
return value
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _norm_text(value: str) -> str:
|
|
56
|
+
return " ".join(str(value or "").strip().lower().split())
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _scope_local_key(
|
|
60
|
+
*,
|
|
61
|
+
key: str,
|
|
62
|
+
node_type: str,
|
|
63
|
+
session_id: str,
|
|
64
|
+
) -> tuple[str, str]:
|
|
65
|
+
"""
|
|
66
|
+
Return:
|
|
67
|
+
- scoped/global graph key
|
|
68
|
+
- key_scope: global | session
|
|
69
|
+
"""
|
|
70
|
+
key = str(key or "").strip()
|
|
71
|
+
node_type = _normalize_node_type(node_type)
|
|
72
|
+
|
|
73
|
+
if not key:
|
|
74
|
+
return "", "unknown"
|
|
75
|
+
|
|
76
|
+
if _is_global_key(key) or _is_scoped_local_key(key):
|
|
77
|
+
return key, "global" if _is_global_key(key) else "session"
|
|
78
|
+
|
|
79
|
+
if node_type in LOCAL_NODE_TYPES:
|
|
80
|
+
# key examples:
|
|
81
|
+
# face:tmp_1 -> tmp_1
|
|
82
|
+
# voice:speaker_1 -> speaker_1
|
|
83
|
+
# tmp_1 -> tmp_1
|
|
84
|
+
if ":" in key:
|
|
85
|
+
_, local_id = key.split(":", 1)
|
|
86
|
+
else:
|
|
87
|
+
local_id = key
|
|
88
|
+
|
|
89
|
+
return f"{node_type}:local:{session_id}:{local_id}", "session"
|
|
90
|
+
|
|
91
|
+
return key, "global"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _stable_key(*, node_type: str, content: str) -> str:
|
|
95
|
+
raw = f"{node_type}:{_norm_text(content)}"
|
|
96
|
+
digest = hashlib.sha256(raw.encode("utf-8", errors="ignore")).hexdigest()[:16]
|
|
97
|
+
return f"{node_type}:{digest}"
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def normalize_mention(
|
|
101
|
+
mention: GraphNodeMention,
|
|
102
|
+
*,
|
|
103
|
+
session_id: str,
|
|
104
|
+
) -> MemoryGraphNode | None:
|
|
105
|
+
content = str(mention.content or "").strip()
|
|
106
|
+
if not content:
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
node_type = _normalize_node_type(mention.type)
|
|
110
|
+
|
|
111
|
+
raw_key = str(mention.key or "").strip()
|
|
112
|
+
if raw_key:
|
|
113
|
+
key, key_scope = _scope_local_key(
|
|
114
|
+
key=raw_key,
|
|
115
|
+
node_type=node_type,
|
|
116
|
+
session_id=session_id,
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
if node_type in LOCAL_NODE_TYPES:
|
|
120
|
+
digest = hashlib.sha256(
|
|
121
|
+
f"{node_type}:{_norm_text(content)}".encode("utf-8", errors="ignore")
|
|
122
|
+
).hexdigest()[:16]
|
|
123
|
+
key = f"{node_type}:local:{session_id}:{digest}"
|
|
124
|
+
key_scope = "session"
|
|
125
|
+
else:
|
|
126
|
+
key = _stable_key(node_type=node_type, content=content)
|
|
127
|
+
key_scope = "global"
|
|
128
|
+
|
|
129
|
+
extra_data = {}
|
|
130
|
+
if raw_key:
|
|
131
|
+
extra_data.setdefault("raw_key", raw_key)
|
|
132
|
+
extra_data.setdefault("key_scope", key_scope)
|
|
133
|
+
|
|
134
|
+
return MemoryGraphNode(
|
|
135
|
+
key=key,
|
|
136
|
+
type=node_type,
|
|
137
|
+
content=content,
|
|
138
|
+
weight=mention.weight,
|
|
139
|
+
extra_data=extra_data,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def build_memory_item_node(item: MemoryItem) -> MemoryGraphNode:
|
|
144
|
+
return MemoryGraphNode(
|
|
145
|
+
key=f"memory_item:{item.memory_id}",
|
|
146
|
+
type="text",
|
|
147
|
+
content=item.value,
|
|
148
|
+
weight=1.0,
|
|
149
|
+
extra_data={
|
|
150
|
+
"node_kind": "memory_item",
|
|
151
|
+
"memory_id": item.memory_id,
|
|
152
|
+
"session_id": item.session_id,
|
|
153
|
+
"object_ids": item.object_ids,
|
|
154
|
+
"memory_type": str(item.memory_type),
|
|
155
|
+
"topic": item.topic,
|
|
156
|
+
"timestamp": item.timestamp,
|
|
157
|
+
},
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _is_memory_item_node(node: MemoryGraphNode) -> bool:
|
|
162
|
+
return (node.extra_data or {}).get("node_kind") == "memory_item"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def build_graph_from_mentions(
|
|
166
|
+
*,
|
|
167
|
+
item: MemoryItem,
|
|
168
|
+
mentions: Iterable[GraphNodeMention],
|
|
169
|
+
) -> tuple[list[MemoryGraphNode], list[MemoryGraphLink]]:
|
|
170
|
+
item_node = build_memory_item_node(item)
|
|
171
|
+
|
|
172
|
+
nodes_by_key: dict[str, MemoryGraphNode] = {
|
|
173
|
+
item_node.key: item_node,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
for mention in mentions:
|
|
177
|
+
node = normalize_mention(mention, session_id=item.session_id)
|
|
178
|
+
if node is None:
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
old = nodes_by_key.get(node.key)
|
|
182
|
+
if old is not None:
|
|
183
|
+
old.weight = max(old.weight, node.weight)
|
|
184
|
+
old.extra_data.update(node.extra_data)
|
|
185
|
+
else:
|
|
186
|
+
nodes_by_key[node.key] = node
|
|
187
|
+
|
|
188
|
+
nodes = list(nodes_by_key.values())
|
|
189
|
+
mention_nodes = [node for node in nodes if not _is_memory_item_node(node)]
|
|
190
|
+
|
|
191
|
+
links: list[MemoryGraphLink] = []
|
|
192
|
+
|
|
193
|
+
# memory item -> node
|
|
194
|
+
for node in mention_nodes:
|
|
195
|
+
links.append(
|
|
196
|
+
MemoryGraphLink(
|
|
197
|
+
source_id=item_node.id,
|
|
198
|
+
target_id=node.id,
|
|
199
|
+
source_key=item_node.key,
|
|
200
|
+
target_key=node.key,
|
|
201
|
+
weight=node.weight,
|
|
202
|
+
extra_data={
|
|
203
|
+
"memory_id": item.memory_id,
|
|
204
|
+
"session_id": item.session_id,
|
|
205
|
+
"object_ids": item.object_ids,
|
|
206
|
+
"link_kind": "memory_item_contains_node",
|
|
207
|
+
},
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# node -> node weak co-occurrence links
|
|
212
|
+
max_pair_nodes = 12
|
|
213
|
+
for left, right in combinations(mention_nodes[:max_pair_nodes], 2):
|
|
214
|
+
links.append(
|
|
215
|
+
MemoryGraphLink(
|
|
216
|
+
source_id=left.id,
|
|
217
|
+
target_id=right.id,
|
|
218
|
+
source_key=left.key,
|
|
219
|
+
target_key=right.key,
|
|
220
|
+
weight=min(left.weight, right.weight),
|
|
221
|
+
extra_data={
|
|
222
|
+
"memory_id": item.memory_id,
|
|
223
|
+
"session_id": item.session_id,
|
|
224
|
+
"object_ids": item.object_ids,
|
|
225
|
+
"link_kind": "co_occurs_in_memory_item",
|
|
226
|
+
},
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
return nodes, links
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# Copyright 2026 AlphaAvatar project
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import json
|
|
15
|
+
import pathlib
|
|
16
|
+
from collections import defaultdict, deque
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _json_loads(line: str) -> dict[str, Any] | None:
|
|
21
|
+
try:
|
|
22
|
+
data = json.loads(line)
|
|
23
|
+
return data if isinstance(data, dict) else None
|
|
24
|
+
except Exception:
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _read_jsonl(path: pathlib.Path) -> list[dict[str, Any]]:
|
|
29
|
+
if not path.exists():
|
|
30
|
+
return []
|
|
31
|
+
|
|
32
|
+
rows: list[dict[str, Any]] = []
|
|
33
|
+
for line in path.read_text(encoding="utf-8").splitlines():
|
|
34
|
+
line = line.strip()
|
|
35
|
+
if not line:
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
item = _json_loads(line)
|
|
39
|
+
if item:
|
|
40
|
+
rows.append(item)
|
|
41
|
+
|
|
42
|
+
return rows
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _norm_list(value: Any) -> list[str]:
|
|
46
|
+
if value is None:
|
|
47
|
+
return []
|
|
48
|
+
values = value if isinstance(value, list) else [value]
|
|
49
|
+
out: list[str] = []
|
|
50
|
+
seen: set[str] = set()
|
|
51
|
+
|
|
52
|
+
for x in values:
|
|
53
|
+
s = str(x).strip()
|
|
54
|
+
if not s or s in seen:
|
|
55
|
+
continue
|
|
56
|
+
seen.add(s)
|
|
57
|
+
out.append(s)
|
|
58
|
+
|
|
59
|
+
return out
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class GraphLookup:
|
|
63
|
+
def __init__(self, graph_path: str | pathlib.Path):
|
|
64
|
+
self.graph_path = pathlib.Path(graph_path)
|
|
65
|
+
|
|
66
|
+
self.nodes_path = self.graph_path / "nodes.jsonl"
|
|
67
|
+
self.links_path = self.graph_path / "links.jsonl"
|
|
68
|
+
self.node_items_path = self.graph_path / "node_items.jsonl"
|
|
69
|
+
self.aliases_path = self.graph_path / "aliases.jsonl"
|
|
70
|
+
|
|
71
|
+
self._aliases = self._load_aliases()
|
|
72
|
+
self._reverse_aliases = self._build_reverse_aliases()
|
|
73
|
+
self._neighbors = self._load_neighbors()
|
|
74
|
+
self._node_items = self._load_node_items()
|
|
75
|
+
|
|
76
|
+
def _load_aliases(self) -> dict[str, str]:
|
|
77
|
+
aliases: dict[str, str] = {}
|
|
78
|
+
|
|
79
|
+
for row in _read_jsonl(self.aliases_path):
|
|
80
|
+
alias_key = str(row.get("alias_key", "")).strip()
|
|
81
|
+
canonical_key = str(row.get("canonical_key", "")).strip()
|
|
82
|
+
|
|
83
|
+
if alias_key and canonical_key and alias_key != canonical_key:
|
|
84
|
+
aliases[alias_key] = canonical_key
|
|
85
|
+
|
|
86
|
+
return aliases
|
|
87
|
+
|
|
88
|
+
def _build_reverse_aliases(self) -> dict[str, list[str]]:
|
|
89
|
+
reverse: dict[str, list[str]] = defaultdict(list)
|
|
90
|
+
|
|
91
|
+
for alias_key, canonical_key in self._aliases.items():
|
|
92
|
+
reverse[canonical_key].append(alias_key)
|
|
93
|
+
|
|
94
|
+
return dict(reverse)
|
|
95
|
+
|
|
96
|
+
def _load_neighbors(self) -> dict[str, list[tuple[str, float]]]:
|
|
97
|
+
neighbors: dict[str, list[tuple[str, float]]] = defaultdict(list)
|
|
98
|
+
|
|
99
|
+
for row in _read_jsonl(self.links_path):
|
|
100
|
+
source_key = str(row.get("source_key", "")).strip()
|
|
101
|
+
target_key = str(row.get("target_key", "")).strip()
|
|
102
|
+
weight = float(row.get("weight", 1.0))
|
|
103
|
+
|
|
104
|
+
if not source_key or not target_key or source_key == target_key:
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
neighbors[source_key].append((target_key, weight))
|
|
108
|
+
neighbors[target_key].append((source_key, weight))
|
|
109
|
+
|
|
110
|
+
return dict(neighbors)
|
|
111
|
+
|
|
112
|
+
def _load_node_items(self) -> dict[str, list[dict[str, Any]]]:
|
|
113
|
+
node_items: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
114
|
+
|
|
115
|
+
for row in _read_jsonl(self.node_items_path):
|
|
116
|
+
node_key = str(row.get("node_key", "")).strip()
|
|
117
|
+
if not node_key:
|
|
118
|
+
continue
|
|
119
|
+
node_items[node_key].append(row)
|
|
120
|
+
|
|
121
|
+
return dict(node_items)
|
|
122
|
+
|
|
123
|
+
def resolve_keys(self, node_key: str) -> list[str]:
|
|
124
|
+
"""
|
|
125
|
+
Return canonical key + known aliases.
|
|
126
|
+
This avoids rewriting VDB immediately after merge.
|
|
127
|
+
"""
|
|
128
|
+
node_key = str(node_key).strip()
|
|
129
|
+
if not node_key:
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
canonical = self._aliases.get(node_key, node_key)
|
|
133
|
+
|
|
134
|
+
keys = [canonical, node_key]
|
|
135
|
+
keys.extend(self._reverse_aliases.get(canonical, []))
|
|
136
|
+
|
|
137
|
+
out: list[str] = []
|
|
138
|
+
seen: set[str] = set()
|
|
139
|
+
for key in keys:
|
|
140
|
+
if key and key not in seen:
|
|
141
|
+
seen.add(key)
|
|
142
|
+
out.append(key)
|
|
143
|
+
|
|
144
|
+
return out
|
|
145
|
+
|
|
146
|
+
def expand_node_keys(
|
|
147
|
+
self,
|
|
148
|
+
*,
|
|
149
|
+
node_keys: list[str],
|
|
150
|
+
max_hops: int = 1,
|
|
151
|
+
max_neighbors_per_node: int = 16,
|
|
152
|
+
min_weight: float = 0.0,
|
|
153
|
+
) -> list[str]:
|
|
154
|
+
"""
|
|
155
|
+
Expand graph nodes through weak links.
|
|
156
|
+
|
|
157
|
+
max_hops=0: only resolved keys
|
|
158
|
+
max_hops=1: include direct neighbors
|
|
159
|
+
max_hops=2: include neighbors of neighbors
|
|
160
|
+
"""
|
|
161
|
+
start_keys: list[str] = []
|
|
162
|
+
for key in node_keys:
|
|
163
|
+
start_keys.extend(self.resolve_keys(key))
|
|
164
|
+
|
|
165
|
+
visited: set[str] = set()
|
|
166
|
+
ordered: list[str] = []
|
|
167
|
+
|
|
168
|
+
queue = deque((key, 0) for key in start_keys)
|
|
169
|
+
|
|
170
|
+
while queue:
|
|
171
|
+
key, hop = queue.popleft()
|
|
172
|
+
if key in visited:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
visited.add(key)
|
|
176
|
+
ordered.append(key)
|
|
177
|
+
|
|
178
|
+
if hop >= max_hops:
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
neighbors = sorted(
|
|
182
|
+
self._neighbors.get(key, []),
|
|
183
|
+
key=lambda x: x[1],
|
|
184
|
+
reverse=True,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
for neighbor_key, weight in neighbors[:max_neighbors_per_node]:
|
|
188
|
+
if weight < min_weight:
|
|
189
|
+
continue
|
|
190
|
+
if neighbor_key not in visited:
|
|
191
|
+
queue.append((neighbor_key, hop + 1))
|
|
192
|
+
|
|
193
|
+
return ordered
|
|
194
|
+
|
|
195
|
+
def find_memory_ids_by_node_keys(
|
|
196
|
+
self,
|
|
197
|
+
*,
|
|
198
|
+
node_keys: list[str],
|
|
199
|
+
object_ids: list[str] | None = None,
|
|
200
|
+
session_id: str | None = None,
|
|
201
|
+
memory_type: str | None = None,
|
|
202
|
+
limit: int = 100,
|
|
203
|
+
) -> list[str]:
|
|
204
|
+
wanted_objects = set(_norm_list(object_ids))
|
|
205
|
+
|
|
206
|
+
out: list[str] = []
|
|
207
|
+
seen: set[str] = set()
|
|
208
|
+
|
|
209
|
+
for node_key in node_keys:
|
|
210
|
+
for row in self._node_items.get(node_key, []):
|
|
211
|
+
if session_id and str(row.get("session_id", "")) != session_id:
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
if memory_type and str(row.get("memory_type", "")) != memory_type:
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
if wanted_objects:
|
|
218
|
+
row_objects = set(_norm_list(row.get("object_ids")))
|
|
219
|
+
if not (wanted_objects & row_objects):
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
memory_id = str(row.get("memory_id", "")).strip()
|
|
223
|
+
if not memory_id or memory_id in seen:
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
seen.add(memory_id)
|
|
227
|
+
out.append(memory_id)
|
|
228
|
+
|
|
229
|
+
if len(out) >= limit:
|
|
230
|
+
return out
|
|
231
|
+
|
|
232
|
+
return out
|