agmem 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/METADATA +338 -26
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/RECORD +32 -16
- memvcs/__init__.py +1 -1
- memvcs/cli.py +1 -1
- memvcs/coordinator/server.py +18 -2
- memvcs/core/agents.py +411 -0
- memvcs/core/archaeology.py +410 -0
- memvcs/core/collaboration.py +435 -0
- memvcs/core/compliance.py +427 -0
- memvcs/core/compression_metrics.py +248 -0
- memvcs/core/confidence.py +379 -0
- memvcs/core/daemon.py +735 -0
- memvcs/core/delta.py +45 -23
- memvcs/core/distiller.py +3 -12
- memvcs/core/fast_similarity.py +404 -0
- memvcs/core/federated.py +13 -2
- memvcs/core/gardener.py +8 -68
- memvcs/core/pack.py +1 -1
- memvcs/core/privacy_validator.py +187 -0
- memvcs/core/private_search.py +327 -0
- memvcs/core/protocol_builder.py +198 -0
- memvcs/core/search_index.py +538 -0
- memvcs/core/semantic_graph.py +388 -0
- memvcs/core/session.py +520 -0
- memvcs/core/timetravel.py +430 -0
- memvcs/integrations/mcp_server.py +775 -4
- memvcs/integrations/web_ui/server.py +424 -0
- memvcs/integrations/web_ui/websocket.py +223 -0
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/WHEEL +0 -0
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/entry_points.txt +0 -0
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Semantic Memory Graph - Knowledge graph with relationships and embeddings.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Memory node graph building
|
|
6
|
+
- Relationship inference
|
|
7
|
+
- Semantic clustering
|
|
8
|
+
- Graph-based search
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import json
|
|
13
|
+
import math
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class MemoryNode:
|
|
23
|
+
"""A node in the semantic memory graph."""
|
|
24
|
+
|
|
25
|
+
node_id: str
|
|
26
|
+
path: str
|
|
27
|
+
memory_type: str
|
|
28
|
+
title: str
|
|
29
|
+
content_hash: str
|
|
30
|
+
created_at: str
|
|
31
|
+
tags: List[str] = field(default_factory=list)
|
|
32
|
+
embedding: Optional[List[float]] = None
|
|
33
|
+
|
|
34
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
35
|
+
return {
|
|
36
|
+
"node_id": self.node_id,
|
|
37
|
+
"path": self.path,
|
|
38
|
+
"memory_type": self.memory_type,
|
|
39
|
+
"title": self.title,
|
|
40
|
+
"content_hash": self.content_hash,
|
|
41
|
+
"created_at": self.created_at,
|
|
42
|
+
"tags": self.tags,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class MemoryEdge:
|
|
48
|
+
"""An edge between memory nodes."""
|
|
49
|
+
|
|
50
|
+
source_id: str
|
|
51
|
+
target_id: str
|
|
52
|
+
relationship: str # "references", "similar", "precedes", "related"
|
|
53
|
+
weight: float = 1.0
|
|
54
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
55
|
+
|
|
56
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
57
|
+
return {
|
|
58
|
+
"source": self.source_id,
|
|
59
|
+
"target": self.target_id,
|
|
60
|
+
"relationship": self.relationship,
|
|
61
|
+
"weight": self.weight,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SemanticGraphBuilder:
|
|
66
|
+
"""Builds a semantic graph from memory files."""
|
|
67
|
+
|
|
68
|
+
def __init__(self, repo_root: Path):
|
|
69
|
+
self.repo_root = Path(repo_root)
|
|
70
|
+
self.nodes: Dict[str, MemoryNode] = {}
|
|
71
|
+
self.edges: List[MemoryEdge] = []
|
|
72
|
+
|
|
73
|
+
def build_graph(self) -> Tuple[List[MemoryNode], List[MemoryEdge]]:
|
|
74
|
+
"""Build the complete semantic graph."""
|
|
75
|
+
from memvcs.core.repository import Repository
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
repo = Repository(self.repo_root)
|
|
79
|
+
current_dir = repo.current_dir
|
|
80
|
+
|
|
81
|
+
# Build nodes
|
|
82
|
+
for filepath in current_dir.rglob("*"):
|
|
83
|
+
if filepath.is_file():
|
|
84
|
+
node = self._create_node(filepath, current_dir)
|
|
85
|
+
if node:
|
|
86
|
+
self.nodes[node.node_id] = node
|
|
87
|
+
|
|
88
|
+
# Build edges
|
|
89
|
+
self._infer_reference_edges()
|
|
90
|
+
self._infer_similarity_edges()
|
|
91
|
+
self._infer_temporal_edges()
|
|
92
|
+
|
|
93
|
+
return list(self.nodes.values()), self.edges
|
|
94
|
+
except Exception:
|
|
95
|
+
return [], []
|
|
96
|
+
|
|
97
|
+
def _create_node(self, filepath: Path, base_dir: Path) -> Optional[MemoryNode]:
|
|
98
|
+
"""Create a node from a file."""
|
|
99
|
+
try:
|
|
100
|
+
rel_path = str(filepath.relative_to(base_dir))
|
|
101
|
+
content = filepath.read_text(encoding="utf-8", errors="replace")
|
|
102
|
+
|
|
103
|
+
# Determine memory type
|
|
104
|
+
memory_type = "unknown"
|
|
105
|
+
for mt in ["episodic", "semantic", "procedural"]:
|
|
106
|
+
if mt in filepath.parts:
|
|
107
|
+
memory_type = mt
|
|
108
|
+
break
|
|
109
|
+
|
|
110
|
+
# Extract title
|
|
111
|
+
title = filepath.stem
|
|
112
|
+
for line in content.split("\n")[:5]:
|
|
113
|
+
if line.startswith("# "):
|
|
114
|
+
title = line[2:].strip()
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
# Extract tags from YAML frontmatter or content
|
|
118
|
+
tags = self._extract_tags(content)
|
|
119
|
+
|
|
120
|
+
content_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
|
|
121
|
+
mtime = datetime.fromtimestamp(filepath.stat().st_mtime, tz=timezone.utc).isoformat()
|
|
122
|
+
|
|
123
|
+
return MemoryNode(
|
|
124
|
+
node_id=content_hash,
|
|
125
|
+
path=rel_path,
|
|
126
|
+
memory_type=memory_type,
|
|
127
|
+
title=title,
|
|
128
|
+
content_hash=content_hash,
|
|
129
|
+
created_at=mtime,
|
|
130
|
+
tags=tags,
|
|
131
|
+
)
|
|
132
|
+
except Exception:
|
|
133
|
+
return None
|
|
134
|
+
|
|
135
|
+
def _extract_tags(self, content: str) -> List[str]:
|
|
136
|
+
"""Extract tags from content."""
|
|
137
|
+
import re
|
|
138
|
+
|
|
139
|
+
tags = set()
|
|
140
|
+
|
|
141
|
+
# Look for YAML frontmatter tags
|
|
142
|
+
if content.startswith("---"):
|
|
143
|
+
match = re.search(r"tags:\s*\[([^\]]+)\]", content[:1000])
|
|
144
|
+
if match:
|
|
145
|
+
tags.update(t.strip().strip("'\"") for t in match.group(1).split(","))
|
|
146
|
+
|
|
147
|
+
# Look for hashtags
|
|
148
|
+
hashtags = re.findall(r"#(\w+)", content)
|
|
149
|
+
tags.update(hashtags[:10]) # Limit hashtags
|
|
150
|
+
|
|
151
|
+
return list(tags)[:20]
|
|
152
|
+
|
|
153
|
+
def _infer_reference_edges(self) -> None:
|
|
154
|
+
"""Find explicit references between memories."""
|
|
155
|
+
import re
|
|
156
|
+
|
|
157
|
+
for node in self.nodes.values():
|
|
158
|
+
try:
|
|
159
|
+
filepath = self.repo_root / ".mem" / "current" / node.path
|
|
160
|
+
if not filepath.exists():
|
|
161
|
+
filepath = self.repo_root / node.path
|
|
162
|
+
if not filepath.exists():
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
content = filepath.read_text(encoding="utf-8", errors="replace")
|
|
166
|
+
|
|
167
|
+
# Find markdown links
|
|
168
|
+
links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", content)
|
|
169
|
+
for _, target in links:
|
|
170
|
+
if not target.startswith("http"):
|
|
171
|
+
# Internal link
|
|
172
|
+
target_path = target.lstrip("./")
|
|
173
|
+
for other_node in self.nodes.values():
|
|
174
|
+
if (
|
|
175
|
+
other_node.path.endswith(target_path)
|
|
176
|
+
or target_path in other_node.path
|
|
177
|
+
):
|
|
178
|
+
self.edges.append(
|
|
179
|
+
MemoryEdge(
|
|
180
|
+
source_id=node.node_id,
|
|
181
|
+
target_id=other_node.node_id,
|
|
182
|
+
relationship="references",
|
|
183
|
+
weight=1.0,
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
break
|
|
187
|
+
except Exception:
|
|
188
|
+
pass
|
|
189
|
+
|
|
190
|
+
def _infer_similarity_edges(self) -> None:
|
|
191
|
+
"""Find similar memories based on tags."""
|
|
192
|
+
nodes_list = list(self.nodes.values())
|
|
193
|
+
|
|
194
|
+
for i, node1 in enumerate(nodes_list):
|
|
195
|
+
for node2 in nodes_list[i + 1 :]:
|
|
196
|
+
if node1.tags and node2.tags:
|
|
197
|
+
common_tags = set(node1.tags) & set(node2.tags)
|
|
198
|
+
if common_tags:
|
|
199
|
+
weight = len(common_tags) / max(len(node1.tags), len(node2.tags))
|
|
200
|
+
if weight >= 0.3: # Threshold
|
|
201
|
+
self.edges.append(
|
|
202
|
+
MemoryEdge(
|
|
203
|
+
source_id=node1.node_id,
|
|
204
|
+
target_id=node2.node_id,
|
|
205
|
+
relationship="similar",
|
|
206
|
+
weight=weight,
|
|
207
|
+
metadata={"common_tags": list(common_tags)},
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def _infer_temporal_edges(self) -> None:
|
|
212
|
+
"""Find temporal relationships between memories."""
|
|
213
|
+
# Sort by creation time
|
|
214
|
+
sorted_nodes = sorted(self.nodes.values(), key=lambda n: n.created_at)
|
|
215
|
+
|
|
216
|
+
# Connect sequential memories of the same type
|
|
217
|
+
by_type: Dict[str, List[MemoryNode]] = defaultdict(list)
|
|
218
|
+
for node in sorted_nodes:
|
|
219
|
+
by_type[node.memory_type].append(node)
|
|
220
|
+
|
|
221
|
+
for nodes_list in by_type.values():
|
|
222
|
+
for i in range(len(nodes_list) - 1):
|
|
223
|
+
self.edges.append(
|
|
224
|
+
MemoryEdge(
|
|
225
|
+
source_id=nodes_list[i].node_id,
|
|
226
|
+
target_id=nodes_list[i + 1].node_id,
|
|
227
|
+
relationship="precedes",
|
|
228
|
+
weight=0.5,
|
|
229
|
+
)
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class SemanticClusterer:
|
|
234
|
+
"""Clusters memories based on semantic similarity."""
|
|
235
|
+
|
|
236
|
+
def __init__(self, nodes: List[MemoryNode], edges: List[MemoryEdge]):
|
|
237
|
+
self.nodes = {n.node_id: n for n in nodes}
|
|
238
|
+
self.edges = edges
|
|
239
|
+
|
|
240
|
+
def cluster_by_tags(self, min_cluster_size: int = 2) -> Dict[str, List[str]]:
|
|
241
|
+
"""Cluster nodes by shared tags."""
|
|
242
|
+
# Build tag -> nodes mapping
|
|
243
|
+
tag_to_nodes: Dict[str, Set[str]] = defaultdict(set)
|
|
244
|
+
for node in self.nodes.values():
|
|
245
|
+
for tag in node.tags:
|
|
246
|
+
tag_to_nodes[tag].add(node.node_id)
|
|
247
|
+
|
|
248
|
+
# Filter to meaningful clusters
|
|
249
|
+
clusters = {}
|
|
250
|
+
for tag, node_ids in tag_to_nodes.items():
|
|
251
|
+
if len(node_ids) >= min_cluster_size:
|
|
252
|
+
clusters[tag] = list(node_ids)
|
|
253
|
+
|
|
254
|
+
return clusters
|
|
255
|
+
|
|
256
|
+
def cluster_by_type(self) -> Dict[str, List[str]]:
|
|
257
|
+
"""Cluster nodes by memory type."""
|
|
258
|
+
clusters: Dict[str, List[str]] = defaultdict(list)
|
|
259
|
+
for node in self.nodes.values():
|
|
260
|
+
clusters[node.memory_type].append(node.node_id)
|
|
261
|
+
return dict(clusters)
|
|
262
|
+
|
|
263
|
+
def find_communities(self, min_connections: int = 2) -> List[Set[str]]:
|
|
264
|
+
"""Find communities using simple connected components."""
|
|
265
|
+
# Build adjacency list
|
|
266
|
+
adj: Dict[str, Set[str]] = defaultdict(set)
|
|
267
|
+
for edge in self.edges:
|
|
268
|
+
if edge.weight >= 0.5: # Only strong connections
|
|
269
|
+
adj[edge.source_id].add(edge.target_id)
|
|
270
|
+
adj[edge.target_id].add(edge.source_id)
|
|
271
|
+
|
|
272
|
+
# Find connected components
|
|
273
|
+
visited: Set[str] = set()
|
|
274
|
+
communities: List[Set[str]] = []
|
|
275
|
+
|
|
276
|
+
for node_id in self.nodes:
|
|
277
|
+
if node_id not in visited:
|
|
278
|
+
component: Set[str] = set()
|
|
279
|
+
stack = [node_id]
|
|
280
|
+
while stack:
|
|
281
|
+
current = stack.pop()
|
|
282
|
+
if current not in visited:
|
|
283
|
+
visited.add(current)
|
|
284
|
+
component.add(current)
|
|
285
|
+
stack.extend(adj[current] - visited)
|
|
286
|
+
|
|
287
|
+
if len(component) >= min_connections:
|
|
288
|
+
communities.append(component)
|
|
289
|
+
|
|
290
|
+
return sorted(communities, key=len, reverse=True)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class GraphSearchEngine:
|
|
294
|
+
"""Search using graph traversal."""
|
|
295
|
+
|
|
296
|
+
def __init__(self, nodes: Dict[str, MemoryNode], edges: List[MemoryEdge]):
|
|
297
|
+
self.nodes = nodes
|
|
298
|
+
self.edges = edges
|
|
299
|
+
self._build_index()
|
|
300
|
+
|
|
301
|
+
def _build_index(self) -> None:
|
|
302
|
+
"""Build adjacency index."""
|
|
303
|
+
self.outgoing: Dict[str, List[MemoryEdge]] = defaultdict(list)
|
|
304
|
+
self.incoming: Dict[str, List[MemoryEdge]] = defaultdict(list)
|
|
305
|
+
|
|
306
|
+
for edge in self.edges:
|
|
307
|
+
self.outgoing[edge.source_id].append(edge)
|
|
308
|
+
self.incoming[edge.target_id].append(edge)
|
|
309
|
+
|
|
310
|
+
def find_related(
|
|
311
|
+
self, node_id: str, max_depth: int = 2, limit: int = 10
|
|
312
|
+
) -> List[Tuple[MemoryNode, float, int]]:
|
|
313
|
+
"""Find related nodes using graph traversal."""
|
|
314
|
+
if node_id not in self.nodes:
|
|
315
|
+
return []
|
|
316
|
+
|
|
317
|
+
visited: Dict[str, Tuple[float, int]] = {} # node_id -> (score, depth)
|
|
318
|
+
queue: List[Tuple[str, float, int]] = [(node_id, 1.0, 0)]
|
|
319
|
+
|
|
320
|
+
while queue:
|
|
321
|
+
current_id, score, depth = queue.pop(0)
|
|
322
|
+
|
|
323
|
+
if current_id in visited:
|
|
324
|
+
continue
|
|
325
|
+
visited[current_id] = (score, depth)
|
|
326
|
+
|
|
327
|
+
if depth >= max_depth:
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
# Traverse outgoing edges
|
|
331
|
+
for edge in self.outgoing.get(current_id, []):
|
|
332
|
+
next_score = score * edge.weight * 0.7 # Decay factor
|
|
333
|
+
if edge.target_id not in visited:
|
|
334
|
+
queue.append((edge.target_id, next_score, depth + 1))
|
|
335
|
+
|
|
336
|
+
# Traverse incoming edges
|
|
337
|
+
for edge in self.incoming.get(current_id, []):
|
|
338
|
+
next_score = score * edge.weight * 0.5 # Lower for backlinks
|
|
339
|
+
if edge.source_id not in visited:
|
|
340
|
+
queue.append((edge.source_id, next_score, depth + 1))
|
|
341
|
+
|
|
342
|
+
# Remove starting node and sort by score
|
|
343
|
+
del visited[node_id]
|
|
344
|
+
results = [
|
|
345
|
+
(self.nodes[nid], score, depth)
|
|
346
|
+
for nid, (score, depth) in visited.items()
|
|
347
|
+
if nid in self.nodes
|
|
348
|
+
]
|
|
349
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
350
|
+
|
|
351
|
+
return results[:limit]
|
|
352
|
+
|
|
353
|
+
def search_by_tags(self, tags: List[str], limit: int = 10) -> List[MemoryNode]:
|
|
354
|
+
"""Search for nodes by tags."""
|
|
355
|
+
tag_set = set(t.lower() for t in tags)
|
|
356
|
+
scored_nodes: List[Tuple[MemoryNode, float]] = []
|
|
357
|
+
|
|
358
|
+
for node in self.nodes.values():
|
|
359
|
+
node_tags = set(t.lower() for t in node.tags)
|
|
360
|
+
overlap = tag_set & node_tags
|
|
361
|
+
if overlap:
|
|
362
|
+
score = len(overlap) / len(tag_set)
|
|
363
|
+
scored_nodes.append((node, score))
|
|
364
|
+
|
|
365
|
+
scored_nodes.sort(key=lambda x: x[1], reverse=True)
|
|
366
|
+
return [n for n, _ in scored_nodes[:limit]]
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
# --- Dashboard Helper ---
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def get_semantic_graph_dashboard(repo_root: Path) -> Dict[str, Any]:
|
|
373
|
+
"""Get data for semantic graph dashboard."""
|
|
374
|
+
builder = SemanticGraphBuilder(repo_root)
|
|
375
|
+
nodes, edges = builder.build_graph()
|
|
376
|
+
|
|
377
|
+
clusterer = SemanticClusterer(nodes, edges)
|
|
378
|
+
type_clusters = clusterer.cluster_by_type()
|
|
379
|
+
tag_clusters = clusterer.cluster_by_tags()
|
|
380
|
+
|
|
381
|
+
return {
|
|
382
|
+
"node_count": len(nodes),
|
|
383
|
+
"edge_count": len(edges),
|
|
384
|
+
"nodes": [n.to_dict() for n in nodes[:50]],
|
|
385
|
+
"edges": [e.to_dict() for e in edges[:100]],
|
|
386
|
+
"clusters_by_type": {k: len(v) for k, v in type_clusters.items()},
|
|
387
|
+
"clusters_by_tag": {k: len(v) for k, v in list(tag_clusters.items())[:10]},
|
|
388
|
+
}
|