agmem 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agmem-0.1.1.dist-info/METADATA +656 -0
- agmem-0.1.1.dist-info/RECORD +67 -0
- agmem-0.1.1.dist-info/WHEEL +5 -0
- agmem-0.1.1.dist-info/entry_points.txt +2 -0
- agmem-0.1.1.dist-info/licenses/LICENSE +21 -0
- agmem-0.1.1.dist-info/top_level.txt +1 -0
- memvcs/__init__.py +9 -0
- memvcs/cli.py +178 -0
- memvcs/commands/__init__.py +23 -0
- memvcs/commands/add.py +258 -0
- memvcs/commands/base.py +23 -0
- memvcs/commands/blame.py +169 -0
- memvcs/commands/branch.py +110 -0
- memvcs/commands/checkout.py +101 -0
- memvcs/commands/clean.py +76 -0
- memvcs/commands/clone.py +91 -0
- memvcs/commands/commit.py +174 -0
- memvcs/commands/daemon.py +267 -0
- memvcs/commands/diff.py +157 -0
- memvcs/commands/fsck.py +203 -0
- memvcs/commands/garden.py +107 -0
- memvcs/commands/graph.py +151 -0
- memvcs/commands/init.py +61 -0
- memvcs/commands/log.py +103 -0
- memvcs/commands/mcp.py +59 -0
- memvcs/commands/merge.py +88 -0
- memvcs/commands/pull.py +65 -0
- memvcs/commands/push.py +143 -0
- memvcs/commands/reflog.py +52 -0
- memvcs/commands/remote.py +51 -0
- memvcs/commands/reset.py +98 -0
- memvcs/commands/search.py +163 -0
- memvcs/commands/serve.py +54 -0
- memvcs/commands/show.py +125 -0
- memvcs/commands/stash.py +97 -0
- memvcs/commands/status.py +112 -0
- memvcs/commands/tag.py +117 -0
- memvcs/commands/test.py +132 -0
- memvcs/commands/tree.py +156 -0
- memvcs/core/__init__.py +21 -0
- memvcs/core/config_loader.py +245 -0
- memvcs/core/constants.py +12 -0
- memvcs/core/diff.py +380 -0
- memvcs/core/gardener.py +466 -0
- memvcs/core/hooks.py +151 -0
- memvcs/core/knowledge_graph.py +381 -0
- memvcs/core/merge.py +474 -0
- memvcs/core/objects.py +323 -0
- memvcs/core/pii_scanner.py +343 -0
- memvcs/core/refs.py +447 -0
- memvcs/core/remote.py +278 -0
- memvcs/core/repository.py +522 -0
- memvcs/core/schema.py +414 -0
- memvcs/core/staging.py +227 -0
- memvcs/core/storage/__init__.py +72 -0
- memvcs/core/storage/base.py +359 -0
- memvcs/core/storage/gcs.py +308 -0
- memvcs/core/storage/local.py +182 -0
- memvcs/core/storage/s3.py +369 -0
- memvcs/core/test_runner.py +371 -0
- memvcs/core/vector_store.py +313 -0
- memvcs/integrations/__init__.py +5 -0
- memvcs/integrations/mcp_server.py +267 -0
- memvcs/integrations/web_ui/__init__.py +1 -0
- memvcs/integrations/web_ui/server.py +352 -0
- memvcs/utils/__init__.py +9 -0
- memvcs/utils/helpers.py +178 -0
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge graph builder for agmem.
|
|
3
|
+
|
|
4
|
+
Visualizes connections between memory files to spot contradictions or knowledge islands.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Dict, Any, Optional, Tuple, Set
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
import networkx as nx
|
|
16
|
+
NETWORKX_AVAILABLE = True
|
|
17
|
+
except ImportError:
|
|
18
|
+
NETWORKX_AVAILABLE = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class GraphNode:
|
|
23
|
+
"""A node in the knowledge graph (represents a memory file)."""
|
|
24
|
+
id: str # File path
|
|
25
|
+
label: str # Display name
|
|
26
|
+
memory_type: str # episodic, semantic, procedural
|
|
27
|
+
size: int # Content size
|
|
28
|
+
tags: List[str] = field(default_factory=list)
|
|
29
|
+
|
|
30
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
31
|
+
return {
|
|
32
|
+
'id': self.id,
|
|
33
|
+
'label': self.label,
|
|
34
|
+
'type': self.memory_type,
|
|
35
|
+
'size': self.size,
|
|
36
|
+
'tags': self.tags
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class GraphEdge:
|
|
42
|
+
"""An edge in the knowledge graph (represents a connection)."""
|
|
43
|
+
source: str
|
|
44
|
+
target: str
|
|
45
|
+
edge_type: str # "reference", "similarity", "same_topic"
|
|
46
|
+
weight: float = 1.0
|
|
47
|
+
|
|
48
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
49
|
+
return {
|
|
50
|
+
'source': self.source,
|
|
51
|
+
'target': self.target,
|
|
52
|
+
'type': self.edge_type,
|
|
53
|
+
'weight': self.weight
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class KnowledgeGraphData:
|
|
59
|
+
"""Complete graph data for export."""
|
|
60
|
+
nodes: List[GraphNode]
|
|
61
|
+
edges: List[GraphEdge]
|
|
62
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
63
|
+
|
|
64
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
65
|
+
return {
|
|
66
|
+
'nodes': [n.to_dict() for n in self.nodes],
|
|
67
|
+
'edges': [e.to_dict() for e in self.edges],
|
|
68
|
+
'metadata': self.metadata
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
def to_json(self, indent: int = 2) -> str:
|
|
72
|
+
return json.dumps(self.to_dict(), indent=indent)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class KnowledgeGraphBuilder:
|
|
76
|
+
"""
|
|
77
|
+
Builds a knowledge graph from memory files.
|
|
78
|
+
|
|
79
|
+
Detects connections through:
|
|
80
|
+
1. Wikilinks: [[filename]] references
|
|
81
|
+
2. Semantic similarity: Using embeddings
|
|
82
|
+
3. Shared tags: Files with common tags
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
# Pattern for wikilinks: [[target]] or [[target|display text]]
|
|
86
|
+
WIKILINK_PATTERN = re.compile(r'\[\[([^\]|]+)(?:\|[^\]]+)?\]\]')
|
|
87
|
+
|
|
88
|
+
def __init__(self, repo, vector_store=None):
|
|
89
|
+
"""
|
|
90
|
+
Initialize the graph builder.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
repo: Repository instance
|
|
94
|
+
vector_store: Optional VectorStore for semantic similarity
|
|
95
|
+
"""
|
|
96
|
+
self.repo = repo
|
|
97
|
+
self.vector_store = vector_store
|
|
98
|
+
self.current_dir = repo.root / 'current'
|
|
99
|
+
|
|
100
|
+
self._graph = None
|
|
101
|
+
if NETWORKX_AVAILABLE:
|
|
102
|
+
self._graph = nx.DiGraph()
|
|
103
|
+
|
|
104
|
+
def _detect_memory_type(self, filepath: str) -> str:
|
|
105
|
+
"""Detect memory type from file path."""
|
|
106
|
+
path_lower = filepath.lower()
|
|
107
|
+
if 'episodic' in path_lower:
|
|
108
|
+
return 'episodic'
|
|
109
|
+
elif 'semantic' in path_lower:
|
|
110
|
+
return 'semantic'
|
|
111
|
+
elif 'procedural' in path_lower:
|
|
112
|
+
return 'procedural'
|
|
113
|
+
elif 'checkpoint' in path_lower:
|
|
114
|
+
return 'checkpoints'
|
|
115
|
+
elif 'session-summar' in path_lower:
|
|
116
|
+
return 'session-summaries'
|
|
117
|
+
return 'unknown'
|
|
118
|
+
|
|
119
|
+
def _extract_wikilinks(self, content: str) -> Set[str]:
|
|
120
|
+
"""Extract wikilink targets from content."""
|
|
121
|
+
matches = self.WIKILINK_PATTERN.findall(content)
|
|
122
|
+
return set(matches)
|
|
123
|
+
|
|
124
|
+
def _extract_tags_from_frontmatter(self, content: str) -> List[str]:
|
|
125
|
+
"""Extract tags from YAML frontmatter."""
|
|
126
|
+
try:
|
|
127
|
+
import yaml
|
|
128
|
+
from .schema import FrontmatterParser
|
|
129
|
+
|
|
130
|
+
fm, _ = FrontmatterParser.parse(content)
|
|
131
|
+
if fm and fm.tags:
|
|
132
|
+
return fm.tags
|
|
133
|
+
except Exception:
|
|
134
|
+
pass
|
|
135
|
+
return []
|
|
136
|
+
|
|
137
|
+
def _normalize_link_target(self, target: str, source_path: str) -> Optional[str]:
|
|
138
|
+
"""
|
|
139
|
+
Normalize a wikilink target to a file path.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
target: Wikilink target (e.g., "user-preferences")
|
|
143
|
+
source_path: Path of the source file
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Normalized file path or None if not found
|
|
147
|
+
"""
|
|
148
|
+
# Try exact match
|
|
149
|
+
for ext in ['.md', '.txt', '']:
|
|
150
|
+
check_path = self.current_dir / (target + ext)
|
|
151
|
+
if check_path.exists():
|
|
152
|
+
return str(check_path.relative_to(self.current_dir))
|
|
153
|
+
|
|
154
|
+
# Try in same directory as source
|
|
155
|
+
source_dir = Path(source_path).parent
|
|
156
|
+
for ext in ['.md', '.txt', '']:
|
|
157
|
+
check_path = self.current_dir / source_dir / (target + ext)
|
|
158
|
+
if check_path.exists():
|
|
159
|
+
return str(check_path.relative_to(self.current_dir))
|
|
160
|
+
|
|
161
|
+
# Try in common directories
|
|
162
|
+
for subdir in ['semantic', 'episodic', 'procedural']:
|
|
163
|
+
for ext in ['.md', '.txt', '']:
|
|
164
|
+
check_path = self.current_dir / subdir / (target + ext)
|
|
165
|
+
if check_path.exists():
|
|
166
|
+
return str(check_path.relative_to(self.current_dir))
|
|
167
|
+
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
def build_graph(self, include_similarity: bool = True, similarity_threshold: float = 0.7) -> KnowledgeGraphData:
|
|
171
|
+
"""
|
|
172
|
+
Build the knowledge graph from memory files.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
include_similarity: Include similarity-based edges
|
|
176
|
+
similarity_threshold: Minimum similarity for edges (0-1)
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
KnowledgeGraphData with nodes and edges
|
|
180
|
+
"""
|
|
181
|
+
nodes = []
|
|
182
|
+
edges = []
|
|
183
|
+
file_paths = []
|
|
184
|
+
file_contents = {}
|
|
185
|
+
file_tags = defaultdict(list)
|
|
186
|
+
|
|
187
|
+
# Collect all memory files
|
|
188
|
+
if not self.current_dir.exists():
|
|
189
|
+
return KnowledgeGraphData(nodes=[], edges=[])
|
|
190
|
+
|
|
191
|
+
for memory_file in self.current_dir.glob('**/*.md'):
|
|
192
|
+
try:
|
|
193
|
+
rel_path = str(memory_file.relative_to(self.current_dir))
|
|
194
|
+
content = memory_file.read_text()
|
|
195
|
+
|
|
196
|
+
# Create node
|
|
197
|
+
memory_type = self._detect_memory_type(rel_path)
|
|
198
|
+
tags = self._extract_tags_from_frontmatter(content)
|
|
199
|
+
|
|
200
|
+
node = GraphNode(
|
|
201
|
+
id=rel_path,
|
|
202
|
+
label=memory_file.stem,
|
|
203
|
+
memory_type=memory_type,
|
|
204
|
+
size=len(content),
|
|
205
|
+
tags=tags
|
|
206
|
+
)
|
|
207
|
+
nodes.append(node)
|
|
208
|
+
file_paths.append(rel_path)
|
|
209
|
+
file_contents[rel_path] = content
|
|
210
|
+
|
|
211
|
+
# Index tags
|
|
212
|
+
for tag in tags:
|
|
213
|
+
file_tags[tag].append(rel_path)
|
|
214
|
+
|
|
215
|
+
# Add to NetworkX graph if available
|
|
216
|
+
if self._graph is not None:
|
|
217
|
+
self._graph.add_node(rel_path, **node.to_dict())
|
|
218
|
+
|
|
219
|
+
except Exception:
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
# Add wikilink edges
|
|
223
|
+
for source_path, content in file_contents.items():
|
|
224
|
+
links = self._extract_wikilinks(content)
|
|
225
|
+
for target in links:
|
|
226
|
+
target_path = self._normalize_link_target(target, source_path)
|
|
227
|
+
if target_path and target_path in file_contents:
|
|
228
|
+
edge = GraphEdge(
|
|
229
|
+
source=source_path,
|
|
230
|
+
target=target_path,
|
|
231
|
+
edge_type='reference',
|
|
232
|
+
weight=1.0
|
|
233
|
+
)
|
|
234
|
+
edges.append(edge)
|
|
235
|
+
|
|
236
|
+
if self._graph is not None:
|
|
237
|
+
self._graph.add_edge(source_path, target_path, type='reference', weight=1.0)
|
|
238
|
+
|
|
239
|
+
# Add tag-based edges
|
|
240
|
+
for tag, files in file_tags.items():
|
|
241
|
+
if len(files) > 1:
|
|
242
|
+
for i, file1 in enumerate(files):
|
|
243
|
+
for file2 in files[i+1:]:
|
|
244
|
+
edge = GraphEdge(
|
|
245
|
+
source=file1,
|
|
246
|
+
target=file2,
|
|
247
|
+
edge_type='same_topic',
|
|
248
|
+
weight=0.5
|
|
249
|
+
)
|
|
250
|
+
edges.append(edge)
|
|
251
|
+
|
|
252
|
+
if self._graph is not None:
|
|
253
|
+
self._graph.add_edge(file1, file2, type='same_topic', weight=0.5)
|
|
254
|
+
|
|
255
|
+
# Add similarity edges
|
|
256
|
+
if include_similarity and self.vector_store and len(file_paths) > 1:
|
|
257
|
+
try:
|
|
258
|
+
edges.extend(self._build_similarity_edges(file_paths, file_contents, similarity_threshold))
|
|
259
|
+
except Exception:
|
|
260
|
+
pass # Skip similarity if vector store fails
|
|
261
|
+
|
|
262
|
+
# Build metadata
|
|
263
|
+
metadata = {
|
|
264
|
+
'total_nodes': len(nodes),
|
|
265
|
+
'total_edges': len(edges),
|
|
266
|
+
'memory_types': {
|
|
267
|
+
'episodic': sum(1 for n in nodes if n.memory_type == 'episodic'),
|
|
268
|
+
'semantic': sum(1 for n in nodes if n.memory_type == 'semantic'),
|
|
269
|
+
'procedural': sum(1 for n in nodes if n.memory_type == 'procedural'),
|
|
270
|
+
'other': sum(1 for n in nodes if n.memory_type not in ['episodic', 'semantic', 'procedural']),
|
|
271
|
+
},
|
|
272
|
+
'edge_types': {
|
|
273
|
+
'reference': sum(1 for e in edges if e.edge_type == 'reference'),
|
|
274
|
+
'similarity': sum(1 for e in edges if e.edge_type == 'similarity'),
|
|
275
|
+
'same_topic': sum(1 for e in edges if e.edge_type == 'same_topic'),
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return KnowledgeGraphData(nodes=nodes, edges=edges, metadata=metadata)
|
|
280
|
+
|
|
281
|
+
def _build_similarity_edges(self, file_paths: List[str], file_contents: Dict[str, str], threshold: float) -> List[GraphEdge]:
|
|
282
|
+
"""Build edges based on semantic similarity."""
|
|
283
|
+
edges = []
|
|
284
|
+
|
|
285
|
+
# Get embeddings for all files
|
|
286
|
+
embeddings = {}
|
|
287
|
+
for path, content in file_contents.items():
|
|
288
|
+
try:
|
|
289
|
+
# Use first 2000 chars for efficiency
|
|
290
|
+
truncated = content[:2000]
|
|
291
|
+
emb = self.vector_store._embed(truncated)
|
|
292
|
+
embeddings[path] = emb
|
|
293
|
+
except Exception:
|
|
294
|
+
continue
|
|
295
|
+
|
|
296
|
+
# Compute pairwise similarities
|
|
297
|
+
import math
|
|
298
|
+
|
|
299
|
+
def cosine_similarity(a: List[float], b: List[float]) -> float:
|
|
300
|
+
dot = sum(x*y for x, y in zip(a, b))
|
|
301
|
+
norm_a = math.sqrt(sum(x*x for x in a))
|
|
302
|
+
norm_b = math.sqrt(sum(x*x for x in b))
|
|
303
|
+
if norm_a == 0 or norm_b == 0:
|
|
304
|
+
return 0
|
|
305
|
+
return dot / (norm_a * norm_b)
|
|
306
|
+
|
|
307
|
+
paths_list = list(embeddings.keys())
|
|
308
|
+
for i, path1 in enumerate(paths_list):
|
|
309
|
+
for path2 in paths_list[i+1:]:
|
|
310
|
+
sim = cosine_similarity(embeddings[path1], embeddings[path2])
|
|
311
|
+
if sim >= threshold:
|
|
312
|
+
edge = GraphEdge(
|
|
313
|
+
source=path1,
|
|
314
|
+
target=path2,
|
|
315
|
+
edge_type='similarity',
|
|
316
|
+
weight=sim
|
|
317
|
+
)
|
|
318
|
+
edges.append(edge)
|
|
319
|
+
|
|
320
|
+
if self._graph is not None:
|
|
321
|
+
self._graph.add_edge(path1, path2, type='similarity', weight=sim)
|
|
322
|
+
|
|
323
|
+
return edges
|
|
324
|
+
|
|
325
|
+
def find_isolated_nodes(self) -> List[str]:
|
|
326
|
+
"""Find nodes with no connections (knowledge islands)."""
|
|
327
|
+
if self._graph is None or len(self._graph) == 0:
|
|
328
|
+
return []
|
|
329
|
+
|
|
330
|
+
# Convert to undirected for analysis
|
|
331
|
+
undirected = self._graph.to_undirected()
|
|
332
|
+
return [node for node in undirected.nodes() if undirected.degree(node) == 0]
|
|
333
|
+
|
|
334
|
+
def find_potential_contradictions(self) -> List[Tuple[str, str, float]]:
|
|
335
|
+
"""
|
|
336
|
+
Find files that might have contradictory information.
|
|
337
|
+
|
|
338
|
+
Returns files in the same topic cluster with low similarity.
|
|
339
|
+
"""
|
|
340
|
+
if self._graph is None:
|
|
341
|
+
return []
|
|
342
|
+
|
|
343
|
+
contradictions = []
|
|
344
|
+
|
|
345
|
+
# Files connected by same_topic but with low similarity
|
|
346
|
+
for u, v, data in self._graph.edges(data=True):
|
|
347
|
+
if data.get('type') == 'same_topic':
|
|
348
|
+
# Check if there's also a similarity edge
|
|
349
|
+
sim_edge = self._graph.get_edge_data(u, v)
|
|
350
|
+
if sim_edge and sim_edge.get('type') == 'similarity':
|
|
351
|
+
if sim_edge.get('weight', 1.0) < 0.3:
|
|
352
|
+
contradictions.append((u, v, sim_edge.get('weight', 0)))
|
|
353
|
+
|
|
354
|
+
return contradictions
|
|
355
|
+
|
|
356
|
+
def export_for_d3(self) -> str:
|
|
357
|
+
"""Export graph in D3.js force-graph format."""
|
|
358
|
+
graph_data = self.build_graph()
|
|
359
|
+
|
|
360
|
+
d3_format = {
|
|
361
|
+
'nodes': [
|
|
362
|
+
{
|
|
363
|
+
'id': n.id,
|
|
364
|
+
'name': n.label,
|
|
365
|
+
'group': n.memory_type,
|
|
366
|
+
'size': min(20, max(5, n.size // 100))
|
|
367
|
+
}
|
|
368
|
+
for n in graph_data.nodes
|
|
369
|
+
],
|
|
370
|
+
'links': [
|
|
371
|
+
{
|
|
372
|
+
'source': e.source,
|
|
373
|
+
'target': e.target,
|
|
374
|
+
'type': e.edge_type,
|
|
375
|
+
'value': e.weight
|
|
376
|
+
}
|
|
377
|
+
for e in graph_data.edges
|
|
378
|
+
]
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
return json.dumps(d3_format, indent=2)
|