agmem 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,388 @@
1
+ """
2
+ Semantic Memory Graph - Knowledge graph with relationships and embeddings.
3
+
4
+ This module provides:
5
+ - Memory node graph building
6
+ - Relationship inference
7
+ - Semantic clustering
8
+ - Graph-based search
9
+ """
10
+
11
+ import hashlib
12
+ import json
13
+ import math
14
+ from collections import defaultdict
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, timezone
17
+ from pathlib import Path
18
+ from typing import Any, Dict, List, Optional, Set, Tuple
19
+
20
+
21
+ @dataclass
22
+ class MemoryNode:
23
+ """A node in the semantic memory graph."""
24
+
25
+ node_id: str
26
+ path: str
27
+ memory_type: str
28
+ title: str
29
+ content_hash: str
30
+ created_at: str
31
+ tags: List[str] = field(default_factory=list)
32
+ embedding: Optional[List[float]] = None
33
+
34
+ def to_dict(self) -> Dict[str, Any]:
35
+ return {
36
+ "node_id": self.node_id,
37
+ "path": self.path,
38
+ "memory_type": self.memory_type,
39
+ "title": self.title,
40
+ "content_hash": self.content_hash,
41
+ "created_at": self.created_at,
42
+ "tags": self.tags,
43
+ }
44
+
45
+
46
+ @dataclass
47
+ class MemoryEdge:
48
+ """An edge between memory nodes."""
49
+
50
+ source_id: str
51
+ target_id: str
52
+ relationship: str # "references", "similar", "precedes", "related"
53
+ weight: float = 1.0
54
+ metadata: Dict[str, Any] = field(default_factory=dict)
55
+
56
+ def to_dict(self) -> Dict[str, Any]:
57
+ return {
58
+ "source": self.source_id,
59
+ "target": self.target_id,
60
+ "relationship": self.relationship,
61
+ "weight": self.weight,
62
+ }
63
+
64
+
65
+ class SemanticGraphBuilder:
66
+ """Builds a semantic graph from memory files."""
67
+
68
+ def __init__(self, repo_root: Path):
69
+ self.repo_root = Path(repo_root)
70
+ self.nodes: Dict[str, MemoryNode] = {}
71
+ self.edges: List[MemoryEdge] = []
72
+
73
+ def build_graph(self) -> Tuple[List[MemoryNode], List[MemoryEdge]]:
74
+ """Build the complete semantic graph."""
75
+ from memvcs.core.repository import Repository
76
+
77
+ try:
78
+ repo = Repository(self.repo_root)
79
+ current_dir = repo.current_dir
80
+
81
+ # Build nodes
82
+ for filepath in current_dir.rglob("*"):
83
+ if filepath.is_file():
84
+ node = self._create_node(filepath, current_dir)
85
+ if node:
86
+ self.nodes[node.node_id] = node
87
+
88
+ # Build edges
89
+ self._infer_reference_edges()
90
+ self._infer_similarity_edges()
91
+ self._infer_temporal_edges()
92
+
93
+ return list(self.nodes.values()), self.edges
94
+ except Exception:
95
+ return [], []
96
+
97
+ def _create_node(self, filepath: Path, base_dir: Path) -> Optional[MemoryNode]:
98
+ """Create a node from a file."""
99
+ try:
100
+ rel_path = str(filepath.relative_to(base_dir))
101
+ content = filepath.read_text(encoding="utf-8", errors="replace")
102
+
103
+ # Determine memory type
104
+ memory_type = "unknown"
105
+ for mt in ["episodic", "semantic", "procedural"]:
106
+ if mt in filepath.parts:
107
+ memory_type = mt
108
+ break
109
+
110
+ # Extract title
111
+ title = filepath.stem
112
+ for line in content.split("\n")[:5]:
113
+ if line.startswith("# "):
114
+ title = line[2:].strip()
115
+ break
116
+
117
+ # Extract tags from YAML frontmatter or content
118
+ tags = self._extract_tags(content)
119
+
120
+ content_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
121
+ mtime = datetime.fromtimestamp(filepath.stat().st_mtime, tz=timezone.utc).isoformat()
122
+
123
+ return MemoryNode(
124
+ node_id=content_hash,
125
+ path=rel_path,
126
+ memory_type=memory_type,
127
+ title=title,
128
+ content_hash=content_hash,
129
+ created_at=mtime,
130
+ tags=tags,
131
+ )
132
+ except Exception:
133
+ return None
134
+
135
+ def _extract_tags(self, content: str) -> List[str]:
136
+ """Extract tags from content."""
137
+ import re
138
+
139
+ tags = set()
140
+
141
+ # Look for YAML frontmatter tags
142
+ if content.startswith("---"):
143
+ match = re.search(r"tags:\s*\[([^\]]+)\]", content[:1000])
144
+ if match:
145
+ tags.update(t.strip().strip("'\"") for t in match.group(1).split(","))
146
+
147
+ # Look for hashtags
148
+ hashtags = re.findall(r"#(\w+)", content)
149
+ tags.update(hashtags[:10]) # Limit hashtags
150
+
151
+ return list(tags)[:20]
152
+
153
+ def _infer_reference_edges(self) -> None:
154
+ """Find explicit references between memories."""
155
+ import re
156
+
157
+ for node in self.nodes.values():
158
+ try:
159
+ filepath = self.repo_root / ".mem" / "current" / node.path
160
+ if not filepath.exists():
161
+ filepath = self.repo_root / node.path
162
+ if not filepath.exists():
163
+ continue
164
+
165
+ content = filepath.read_text(encoding="utf-8", errors="replace")
166
+
167
+ # Find markdown links
168
+ links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", content)
169
+ for _, target in links:
170
+ if not target.startswith("http"):
171
+ # Internal link
172
+ target_path = target.lstrip("./")
173
+ for other_node in self.nodes.values():
174
+ if (
175
+ other_node.path.endswith(target_path)
176
+ or target_path in other_node.path
177
+ ):
178
+ self.edges.append(
179
+ MemoryEdge(
180
+ source_id=node.node_id,
181
+ target_id=other_node.node_id,
182
+ relationship="references",
183
+ weight=1.0,
184
+ )
185
+ )
186
+ break
187
+ except Exception:
188
+ pass
189
+
190
+ def _infer_similarity_edges(self) -> None:
191
+ """Find similar memories based on tags."""
192
+ nodes_list = list(self.nodes.values())
193
+
194
+ for i, node1 in enumerate(nodes_list):
195
+ for node2 in nodes_list[i + 1 :]:
196
+ if node1.tags and node2.tags:
197
+ common_tags = set(node1.tags) & set(node2.tags)
198
+ if common_tags:
199
+ weight = len(common_tags) / max(len(node1.tags), len(node2.tags))
200
+ if weight >= 0.3: # Threshold
201
+ self.edges.append(
202
+ MemoryEdge(
203
+ source_id=node1.node_id,
204
+ target_id=node2.node_id,
205
+ relationship="similar",
206
+ weight=weight,
207
+ metadata={"common_tags": list(common_tags)},
208
+ )
209
+ )
210
+
211
+ def _infer_temporal_edges(self) -> None:
212
+ """Find temporal relationships between memories."""
213
+ # Sort by creation time
214
+ sorted_nodes = sorted(self.nodes.values(), key=lambda n: n.created_at)
215
+
216
+ # Connect sequential memories of the same type
217
+ by_type: Dict[str, List[MemoryNode]] = defaultdict(list)
218
+ for node in sorted_nodes:
219
+ by_type[node.memory_type].append(node)
220
+
221
+ for nodes_list in by_type.values():
222
+ for i in range(len(nodes_list) - 1):
223
+ self.edges.append(
224
+ MemoryEdge(
225
+ source_id=nodes_list[i].node_id,
226
+ target_id=nodes_list[i + 1].node_id,
227
+ relationship="precedes",
228
+ weight=0.5,
229
+ )
230
+ )
231
+
232
+
233
+ class SemanticClusterer:
234
+ """Clusters memories based on semantic similarity."""
235
+
236
+ def __init__(self, nodes: List[MemoryNode], edges: List[MemoryEdge]):
237
+ self.nodes = {n.node_id: n for n in nodes}
238
+ self.edges = edges
239
+
240
+ def cluster_by_tags(self, min_cluster_size: int = 2) -> Dict[str, List[str]]:
241
+ """Cluster nodes by shared tags."""
242
+ # Build tag -> nodes mapping
243
+ tag_to_nodes: Dict[str, Set[str]] = defaultdict(set)
244
+ for node in self.nodes.values():
245
+ for tag in node.tags:
246
+ tag_to_nodes[tag].add(node.node_id)
247
+
248
+ # Filter to meaningful clusters
249
+ clusters = {}
250
+ for tag, node_ids in tag_to_nodes.items():
251
+ if len(node_ids) >= min_cluster_size:
252
+ clusters[tag] = list(node_ids)
253
+
254
+ return clusters
255
+
256
+ def cluster_by_type(self) -> Dict[str, List[str]]:
257
+ """Cluster nodes by memory type."""
258
+ clusters: Dict[str, List[str]] = defaultdict(list)
259
+ for node in self.nodes.values():
260
+ clusters[node.memory_type].append(node.node_id)
261
+ return dict(clusters)
262
+
263
+ def find_communities(self, min_connections: int = 2) -> List[Set[str]]:
264
+ """Find communities using simple connected components."""
265
+ # Build adjacency list
266
+ adj: Dict[str, Set[str]] = defaultdict(set)
267
+ for edge in self.edges:
268
+ if edge.weight >= 0.5: # Only strong connections
269
+ adj[edge.source_id].add(edge.target_id)
270
+ adj[edge.target_id].add(edge.source_id)
271
+
272
+ # Find connected components
273
+ visited: Set[str] = set()
274
+ communities: List[Set[str]] = []
275
+
276
+ for node_id in self.nodes:
277
+ if node_id not in visited:
278
+ component: Set[str] = set()
279
+ stack = [node_id]
280
+ while stack:
281
+ current = stack.pop()
282
+ if current not in visited:
283
+ visited.add(current)
284
+ component.add(current)
285
+ stack.extend(adj[current] - visited)
286
+
287
+ if len(component) >= min_connections:
288
+ communities.append(component)
289
+
290
+ return sorted(communities, key=len, reverse=True)
291
+
292
+
293
+ class GraphSearchEngine:
294
+ """Search using graph traversal."""
295
+
296
+ def __init__(self, nodes: Dict[str, MemoryNode], edges: List[MemoryEdge]):
297
+ self.nodes = nodes
298
+ self.edges = edges
299
+ self._build_index()
300
+
301
+ def _build_index(self) -> None:
302
+ """Build adjacency index."""
303
+ self.outgoing: Dict[str, List[MemoryEdge]] = defaultdict(list)
304
+ self.incoming: Dict[str, List[MemoryEdge]] = defaultdict(list)
305
+
306
+ for edge in self.edges:
307
+ self.outgoing[edge.source_id].append(edge)
308
+ self.incoming[edge.target_id].append(edge)
309
+
310
+ def find_related(
311
+ self, node_id: str, max_depth: int = 2, limit: int = 10
312
+ ) -> List[Tuple[MemoryNode, float, int]]:
313
+ """Find related nodes using graph traversal."""
314
+ if node_id not in self.nodes:
315
+ return []
316
+
317
+ visited: Dict[str, Tuple[float, int]] = {} # node_id -> (score, depth)
318
+ queue: List[Tuple[str, float, int]] = [(node_id, 1.0, 0)]
319
+
320
+ while queue:
321
+ current_id, score, depth = queue.pop(0)
322
+
323
+ if current_id in visited:
324
+ continue
325
+ visited[current_id] = (score, depth)
326
+
327
+ if depth >= max_depth:
328
+ continue
329
+
330
+ # Traverse outgoing edges
331
+ for edge in self.outgoing.get(current_id, []):
332
+ next_score = score * edge.weight * 0.7 # Decay factor
333
+ if edge.target_id not in visited:
334
+ queue.append((edge.target_id, next_score, depth + 1))
335
+
336
+ # Traverse incoming edges
337
+ for edge in self.incoming.get(current_id, []):
338
+ next_score = score * edge.weight * 0.5 # Lower for backlinks
339
+ if edge.source_id not in visited:
340
+ queue.append((edge.source_id, next_score, depth + 1))
341
+
342
+ # Remove starting node and sort by score
343
+ del visited[node_id]
344
+ results = [
345
+ (self.nodes[nid], score, depth)
346
+ for nid, (score, depth) in visited.items()
347
+ if nid in self.nodes
348
+ ]
349
+ results.sort(key=lambda x: x[1], reverse=True)
350
+
351
+ return results[:limit]
352
+
353
+ def search_by_tags(self, tags: List[str], limit: int = 10) -> List[MemoryNode]:
354
+ """Search for nodes by tags."""
355
+ tag_set = set(t.lower() for t in tags)
356
+ scored_nodes: List[Tuple[MemoryNode, float]] = []
357
+
358
+ for node in self.nodes.values():
359
+ node_tags = set(t.lower() for t in node.tags)
360
+ overlap = tag_set & node_tags
361
+ if overlap:
362
+ score = len(overlap) / len(tag_set)
363
+ scored_nodes.append((node, score))
364
+
365
+ scored_nodes.sort(key=lambda x: x[1], reverse=True)
366
+ return [n for n, _ in scored_nodes[:limit]]
367
+
368
+
369
+ # --- Dashboard Helper ---
370
+
371
+
372
+ def get_semantic_graph_dashboard(repo_root: Path) -> Dict[str, Any]:
373
+ """Get data for semantic graph dashboard."""
374
+ builder = SemanticGraphBuilder(repo_root)
375
+ nodes, edges = builder.build_graph()
376
+
377
+ clusterer = SemanticClusterer(nodes, edges)
378
+ type_clusters = clusterer.cluster_by_type()
379
+ tag_clusters = clusterer.cluster_by_tags()
380
+
381
+ return {
382
+ "node_count": len(nodes),
383
+ "edge_count": len(edges),
384
+ "nodes": [n.to_dict() for n in nodes[:50]],
385
+ "edges": [e.to_dict() for e in edges[:100]],
386
+ "clusters_by_type": {k: len(v) for k, v in type_clusters.items()},
387
+ "clusters_by_tag": {k: len(v) for k, v in list(tag_clusters.items())[:10]},
388
+ }