agmem 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/METADATA +157 -16
- agmem-0.1.3.dist-info/RECORD +105 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +45 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +83 -76
- memvcs/commands/audit.py +59 -0
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +11 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +109 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +90 -0
- memvcs/commands/federated.py +53 -0
- memvcs/commands/fsck.py +86 -61
- memvcs/commands/garden.py +40 -35
- memvcs/commands/gc.py +51 -0
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +69 -27
- memvcs/commands/pack.py +129 -0
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +31 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +110 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/audit.py +124 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/crypto_verify.py +280 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +86 -0
- memvcs/core/gardener.py +176 -145
- memvcs/core/hooks.py +48 -14
- memvcs/core/ipfs_remote.py +39 -0
- memvcs/core/knowledge_graph.py +135 -138
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +260 -170
- memvcs/core/objects.py +110 -101
- memvcs/core/pack.py +92 -0
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/refs.py +132 -115
- memvcs/core/remote.py +38 -0
- memvcs/core/repository.py +254 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +121 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +56 -36
- memvcs/core/zk_proofs.py +26 -0
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/top_level.txt +0 -0
memvcs/core/knowledge_graph.py
CHANGED
|
@@ -13,6 +13,7 @@ from collections import defaultdict
|
|
|
13
13
|
|
|
14
14
|
try:
|
|
15
15
|
import networkx as nx
|
|
16
|
+
|
|
16
17
|
NETWORKX_AVAILABLE = True
|
|
17
18
|
except ImportError:
|
|
18
19
|
NETWORKX_AVAILABLE = False
|
|
@@ -21,53 +22,56 @@ except ImportError:
|
|
|
21
22
|
@dataclass
|
|
22
23
|
class GraphNode:
|
|
23
24
|
"""A node in the knowledge graph (represents a memory file)."""
|
|
25
|
+
|
|
24
26
|
id: str # File path
|
|
25
27
|
label: str # Display name
|
|
26
28
|
memory_type: str # episodic, semantic, procedural
|
|
27
29
|
size: int # Content size
|
|
28
30
|
tags: List[str] = field(default_factory=list)
|
|
29
|
-
|
|
31
|
+
|
|
30
32
|
def to_dict(self) -> Dict[str, Any]:
|
|
31
33
|
return {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
"id": self.id,
|
|
35
|
+
"label": self.label,
|
|
36
|
+
"type": self.memory_type,
|
|
37
|
+
"size": self.size,
|
|
38
|
+
"tags": self.tags,
|
|
37
39
|
}
|
|
38
40
|
|
|
39
41
|
|
|
40
42
|
@dataclass
|
|
41
43
|
class GraphEdge:
|
|
42
44
|
"""An edge in the knowledge graph (represents a connection)."""
|
|
45
|
+
|
|
43
46
|
source: str
|
|
44
47
|
target: str
|
|
45
48
|
edge_type: str # "reference", "similarity", "same_topic"
|
|
46
49
|
weight: float = 1.0
|
|
47
|
-
|
|
50
|
+
|
|
48
51
|
def to_dict(self) -> Dict[str, Any]:
|
|
49
52
|
return {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
"source": self.source,
|
|
54
|
+
"target": self.target,
|
|
55
|
+
"type": self.edge_type,
|
|
56
|
+
"weight": self.weight,
|
|
54
57
|
}
|
|
55
58
|
|
|
56
59
|
|
|
57
60
|
@dataclass
|
|
58
61
|
class KnowledgeGraphData:
|
|
59
62
|
"""Complete graph data for export."""
|
|
63
|
+
|
|
60
64
|
nodes: List[GraphNode]
|
|
61
65
|
edges: List[GraphEdge]
|
|
62
66
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
63
|
-
|
|
67
|
+
|
|
64
68
|
def to_dict(self) -> Dict[str, Any]:
|
|
65
69
|
return {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
"nodes": [n.to_dict() for n in self.nodes],
|
|
71
|
+
"edges": [e.to_dict() for e in self.edges],
|
|
72
|
+
"metadata": self.metadata,
|
|
69
73
|
}
|
|
70
|
-
|
|
74
|
+
|
|
71
75
|
def to_json(self, indent: int = 2) -> str:
|
|
72
76
|
return json.dumps(self.to_dict(), indent=indent)
|
|
73
77
|
|
|
@@ -75,106 +79,109 @@ class KnowledgeGraphData:
|
|
|
75
79
|
class KnowledgeGraphBuilder:
|
|
76
80
|
"""
|
|
77
81
|
Builds a knowledge graph from memory files.
|
|
78
|
-
|
|
82
|
+
|
|
79
83
|
Detects connections through:
|
|
80
84
|
1. Wikilinks: [[filename]] references
|
|
81
85
|
2. Semantic similarity: Using embeddings
|
|
82
86
|
3. Shared tags: Files with common tags
|
|
87
|
+
4. Co-occurrence: Facts in same episodic session (optional)
|
|
83
88
|
"""
|
|
84
|
-
|
|
89
|
+
|
|
85
90
|
# Pattern for wikilinks: [[target]] or [[target|display text]]
|
|
86
|
-
WIKILINK_PATTERN = re.compile(r
|
|
87
|
-
|
|
91
|
+
WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]")
|
|
92
|
+
|
|
88
93
|
def __init__(self, repo, vector_store=None):
|
|
89
94
|
"""
|
|
90
95
|
Initialize the graph builder.
|
|
91
|
-
|
|
96
|
+
|
|
92
97
|
Args:
|
|
93
98
|
repo: Repository instance
|
|
94
99
|
vector_store: Optional VectorStore for semantic similarity
|
|
95
100
|
"""
|
|
96
101
|
self.repo = repo
|
|
97
102
|
self.vector_store = vector_store
|
|
98
|
-
self.current_dir = repo.root /
|
|
99
|
-
|
|
103
|
+
self.current_dir = repo.root / "current"
|
|
104
|
+
|
|
100
105
|
self._graph = None
|
|
101
106
|
if NETWORKX_AVAILABLE:
|
|
102
107
|
self._graph = nx.DiGraph()
|
|
103
|
-
|
|
108
|
+
|
|
104
109
|
def _detect_memory_type(self, filepath: str) -> str:
|
|
105
110
|
"""Detect memory type from file path."""
|
|
106
111
|
path_lower = filepath.lower()
|
|
107
|
-
if
|
|
108
|
-
return
|
|
109
|
-
elif
|
|
110
|
-
return
|
|
111
|
-
elif
|
|
112
|
-
return
|
|
113
|
-
elif
|
|
114
|
-
return
|
|
115
|
-
elif
|
|
116
|
-
return
|
|
117
|
-
return
|
|
118
|
-
|
|
112
|
+
if "episodic" in path_lower:
|
|
113
|
+
return "episodic"
|
|
114
|
+
elif "semantic" in path_lower:
|
|
115
|
+
return "semantic"
|
|
116
|
+
elif "procedural" in path_lower:
|
|
117
|
+
return "procedural"
|
|
118
|
+
elif "checkpoint" in path_lower:
|
|
119
|
+
return "checkpoints"
|
|
120
|
+
elif "session-summar" in path_lower:
|
|
121
|
+
return "session-summaries"
|
|
122
|
+
return "unknown"
|
|
123
|
+
|
|
119
124
|
def _extract_wikilinks(self, content: str) -> Set[str]:
|
|
120
125
|
"""Extract wikilink targets from content."""
|
|
121
126
|
matches = self.WIKILINK_PATTERN.findall(content)
|
|
122
127
|
return set(matches)
|
|
123
|
-
|
|
128
|
+
|
|
124
129
|
def _extract_tags_from_frontmatter(self, content: str) -> List[str]:
|
|
125
130
|
"""Extract tags from YAML frontmatter."""
|
|
126
131
|
try:
|
|
127
132
|
import yaml
|
|
128
133
|
from .schema import FrontmatterParser
|
|
129
|
-
|
|
134
|
+
|
|
130
135
|
fm, _ = FrontmatterParser.parse(content)
|
|
131
136
|
if fm and fm.tags:
|
|
132
137
|
return fm.tags
|
|
133
138
|
except Exception:
|
|
134
139
|
pass
|
|
135
140
|
return []
|
|
136
|
-
|
|
141
|
+
|
|
137
142
|
def _normalize_link_target(self, target: str, source_path: str) -> Optional[str]:
|
|
138
143
|
"""
|
|
139
144
|
Normalize a wikilink target to a file path.
|
|
140
|
-
|
|
145
|
+
|
|
141
146
|
Args:
|
|
142
147
|
target: Wikilink target (e.g., "user-preferences")
|
|
143
148
|
source_path: Path of the source file
|
|
144
|
-
|
|
149
|
+
|
|
145
150
|
Returns:
|
|
146
151
|
Normalized file path or None if not found
|
|
147
152
|
"""
|
|
148
153
|
# Try exact match
|
|
149
|
-
for ext in [
|
|
154
|
+
for ext in [".md", ".txt", ""]:
|
|
150
155
|
check_path = self.current_dir / (target + ext)
|
|
151
156
|
if check_path.exists():
|
|
152
157
|
return str(check_path.relative_to(self.current_dir))
|
|
153
|
-
|
|
158
|
+
|
|
154
159
|
# Try in same directory as source
|
|
155
160
|
source_dir = Path(source_path).parent
|
|
156
|
-
for ext in [
|
|
161
|
+
for ext in [".md", ".txt", ""]:
|
|
157
162
|
check_path = self.current_dir / source_dir / (target + ext)
|
|
158
163
|
if check_path.exists():
|
|
159
164
|
return str(check_path.relative_to(self.current_dir))
|
|
160
|
-
|
|
165
|
+
|
|
161
166
|
# Try in common directories
|
|
162
|
-
for subdir in [
|
|
163
|
-
for ext in [
|
|
167
|
+
for subdir in ["semantic", "episodic", "procedural"]:
|
|
168
|
+
for ext in [".md", ".txt", ""]:
|
|
164
169
|
check_path = self.current_dir / subdir / (target + ext)
|
|
165
170
|
if check_path.exists():
|
|
166
171
|
return str(check_path.relative_to(self.current_dir))
|
|
167
|
-
|
|
172
|
+
|
|
168
173
|
return None
|
|
169
|
-
|
|
170
|
-
def build_graph(
|
|
174
|
+
|
|
175
|
+
def build_graph(
|
|
176
|
+
self, include_similarity: bool = True, similarity_threshold: float = 0.7
|
|
177
|
+
) -> KnowledgeGraphData:
|
|
171
178
|
"""
|
|
172
179
|
Build the knowledge graph from memory files.
|
|
173
|
-
|
|
180
|
+
|
|
174
181
|
Args:
|
|
175
182
|
include_similarity: Include similarity-based edges
|
|
176
183
|
similarity_threshold: Minimum similarity for edges (0-1)
|
|
177
|
-
|
|
184
|
+
|
|
178
185
|
Returns:
|
|
179
186
|
KnowledgeGraphData with nodes and edges
|
|
180
187
|
"""
|
|
@@ -183,42 +190,42 @@ class KnowledgeGraphBuilder:
|
|
|
183
190
|
file_paths = []
|
|
184
191
|
file_contents = {}
|
|
185
192
|
file_tags = defaultdict(list)
|
|
186
|
-
|
|
193
|
+
|
|
187
194
|
# Collect all memory files
|
|
188
195
|
if not self.current_dir.exists():
|
|
189
196
|
return KnowledgeGraphData(nodes=[], edges=[])
|
|
190
|
-
|
|
191
|
-
for memory_file in self.current_dir.glob(
|
|
197
|
+
|
|
198
|
+
for memory_file in self.current_dir.glob("**/*.md"):
|
|
192
199
|
try:
|
|
193
200
|
rel_path = str(memory_file.relative_to(self.current_dir))
|
|
194
201
|
content = memory_file.read_text()
|
|
195
|
-
|
|
202
|
+
|
|
196
203
|
# Create node
|
|
197
204
|
memory_type = self._detect_memory_type(rel_path)
|
|
198
205
|
tags = self._extract_tags_from_frontmatter(content)
|
|
199
|
-
|
|
206
|
+
|
|
200
207
|
node = GraphNode(
|
|
201
208
|
id=rel_path,
|
|
202
209
|
label=memory_file.stem,
|
|
203
210
|
memory_type=memory_type,
|
|
204
211
|
size=len(content),
|
|
205
|
-
tags=tags
|
|
212
|
+
tags=tags,
|
|
206
213
|
)
|
|
207
214
|
nodes.append(node)
|
|
208
215
|
file_paths.append(rel_path)
|
|
209
216
|
file_contents[rel_path] = content
|
|
210
|
-
|
|
217
|
+
|
|
211
218
|
# Index tags
|
|
212
219
|
for tag in tags:
|
|
213
220
|
file_tags[tag].append(rel_path)
|
|
214
|
-
|
|
221
|
+
|
|
215
222
|
# Add to NetworkX graph if available
|
|
216
223
|
if self._graph is not None:
|
|
217
224
|
self._graph.add_node(rel_path, **node.to_dict())
|
|
218
|
-
|
|
225
|
+
|
|
219
226
|
except Exception:
|
|
220
227
|
continue
|
|
221
|
-
|
|
228
|
+
|
|
222
229
|
# Add wikilink edges
|
|
223
230
|
for source_path, content in file_contents.items():
|
|
224
231
|
links = self._extract_wikilinks(content)
|
|
@@ -226,62 +233,62 @@ class KnowledgeGraphBuilder:
|
|
|
226
233
|
target_path = self._normalize_link_target(target, source_path)
|
|
227
234
|
if target_path and target_path in file_contents:
|
|
228
235
|
edge = GraphEdge(
|
|
229
|
-
source=source_path,
|
|
230
|
-
target=target_path,
|
|
231
|
-
edge_type='reference',
|
|
232
|
-
weight=1.0
|
|
236
|
+
source=source_path, target=target_path, edge_type="reference", weight=1.0
|
|
233
237
|
)
|
|
234
238
|
edges.append(edge)
|
|
235
|
-
|
|
239
|
+
|
|
236
240
|
if self._graph is not None:
|
|
237
|
-
self._graph.add_edge(source_path, target_path, type=
|
|
238
|
-
|
|
241
|
+
self._graph.add_edge(source_path, target_path, type="reference", weight=1.0)
|
|
242
|
+
|
|
239
243
|
# Add tag-based edges
|
|
240
244
|
for tag, files in file_tags.items():
|
|
241
245
|
if len(files) > 1:
|
|
242
246
|
for i, file1 in enumerate(files):
|
|
243
|
-
for file2 in files[i+1:]:
|
|
247
|
+
for file2 in files[i + 1 :]:
|
|
244
248
|
edge = GraphEdge(
|
|
245
|
-
source=file1,
|
|
246
|
-
target=file2,
|
|
247
|
-
edge_type='same_topic',
|
|
248
|
-
weight=0.5
|
|
249
|
+
source=file1, target=file2, edge_type="same_topic", weight=0.5
|
|
249
250
|
)
|
|
250
251
|
edges.append(edge)
|
|
251
|
-
|
|
252
|
+
|
|
252
253
|
if self._graph is not None:
|
|
253
|
-
self._graph.add_edge(file1, file2, type=
|
|
254
|
-
|
|
254
|
+
self._graph.add_edge(file1, file2, type="same_topic", weight=0.5)
|
|
255
|
+
|
|
255
256
|
# Add similarity edges
|
|
256
257
|
if include_similarity and self.vector_store and len(file_paths) > 1:
|
|
257
258
|
try:
|
|
258
|
-
edges.extend(
|
|
259
|
+
edges.extend(
|
|
260
|
+
self._build_similarity_edges(file_paths, file_contents, similarity_threshold)
|
|
261
|
+
)
|
|
259
262
|
except Exception:
|
|
260
263
|
pass # Skip similarity if vector store fails
|
|
261
|
-
|
|
264
|
+
|
|
262
265
|
# Build metadata
|
|
263
266
|
metadata = {
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
267
|
+
"total_nodes": len(nodes),
|
|
268
|
+
"total_edges": len(edges),
|
|
269
|
+
"memory_types": {
|
|
270
|
+
"episodic": sum(1 for n in nodes if n.memory_type == "episodic"),
|
|
271
|
+
"semantic": sum(1 for n in nodes if n.memory_type == "semantic"),
|
|
272
|
+
"procedural": sum(1 for n in nodes if n.memory_type == "procedural"),
|
|
273
|
+
"other": sum(
|
|
274
|
+
1 for n in nodes if n.memory_type not in ["episodic", "semantic", "procedural"]
|
|
275
|
+
),
|
|
276
|
+
},
|
|
277
|
+
"edge_types": {
|
|
278
|
+
"reference": sum(1 for e in edges if e.edge_type == "reference"),
|
|
279
|
+
"similarity": sum(1 for e in edges if e.edge_type == "similarity"),
|
|
280
|
+
"same_topic": sum(1 for e in edges if e.edge_type == "same_topic"),
|
|
271
281
|
},
|
|
272
|
-
'edge_types': {
|
|
273
|
-
'reference': sum(1 for e in edges if e.edge_type == 'reference'),
|
|
274
|
-
'similarity': sum(1 for e in edges if e.edge_type == 'similarity'),
|
|
275
|
-
'same_topic': sum(1 for e in edges if e.edge_type == 'same_topic'),
|
|
276
|
-
}
|
|
277
282
|
}
|
|
278
|
-
|
|
283
|
+
|
|
279
284
|
return KnowledgeGraphData(nodes=nodes, edges=edges, metadata=metadata)
|
|
280
|
-
|
|
281
|
-
def _build_similarity_edges(
|
|
285
|
+
|
|
286
|
+
def _build_similarity_edges(
|
|
287
|
+
self, file_paths: List[str], file_contents: Dict[str, str], threshold: float
|
|
288
|
+
) -> List[GraphEdge]:
|
|
282
289
|
"""Build edges based on semantic similarity."""
|
|
283
290
|
edges = []
|
|
284
|
-
|
|
291
|
+
|
|
285
292
|
# Get embeddings for all files
|
|
286
293
|
embeddings = {}
|
|
287
294
|
for path, content in file_contents.items():
|
|
@@ -292,90 +299,80 @@ class KnowledgeGraphBuilder:
|
|
|
292
299
|
embeddings[path] = emb
|
|
293
300
|
except Exception:
|
|
294
301
|
continue
|
|
295
|
-
|
|
302
|
+
|
|
296
303
|
# Compute pairwise similarities
|
|
297
304
|
import math
|
|
298
|
-
|
|
305
|
+
|
|
299
306
|
def cosine_similarity(a: List[float], b: List[float]) -> float:
|
|
300
|
-
dot = sum(x*y for x, y in zip(a, b))
|
|
301
|
-
norm_a = math.sqrt(sum(x*x for x in a))
|
|
302
|
-
norm_b = math.sqrt(sum(x*x for x in b))
|
|
307
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
308
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
309
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
303
310
|
if norm_a == 0 or norm_b == 0:
|
|
304
311
|
return 0
|
|
305
312
|
return dot / (norm_a * norm_b)
|
|
306
|
-
|
|
313
|
+
|
|
307
314
|
paths_list = list(embeddings.keys())
|
|
308
315
|
for i, path1 in enumerate(paths_list):
|
|
309
|
-
for path2 in paths_list[i+1:]:
|
|
316
|
+
for path2 in paths_list[i + 1 :]:
|
|
310
317
|
sim = cosine_similarity(embeddings[path1], embeddings[path2])
|
|
311
318
|
if sim >= threshold:
|
|
312
|
-
edge = GraphEdge(
|
|
313
|
-
source=path1,
|
|
314
|
-
target=path2,
|
|
315
|
-
edge_type='similarity',
|
|
316
|
-
weight=sim
|
|
317
|
-
)
|
|
319
|
+
edge = GraphEdge(source=path1, target=path2, edge_type="similarity", weight=sim)
|
|
318
320
|
edges.append(edge)
|
|
319
|
-
|
|
321
|
+
|
|
320
322
|
if self._graph is not None:
|
|
321
|
-
self._graph.add_edge(path1, path2, type=
|
|
322
|
-
|
|
323
|
+
self._graph.add_edge(path1, path2, type="similarity", weight=sim)
|
|
324
|
+
|
|
323
325
|
return edges
|
|
324
|
-
|
|
326
|
+
|
|
325
327
|
def find_isolated_nodes(self) -> List[str]:
|
|
326
328
|
"""Find nodes with no connections (knowledge islands)."""
|
|
327
329
|
if self._graph is None or len(self._graph) == 0:
|
|
328
330
|
return []
|
|
329
|
-
|
|
331
|
+
|
|
330
332
|
# Convert to undirected for analysis
|
|
331
333
|
undirected = self._graph.to_undirected()
|
|
332
334
|
return [node for node in undirected.nodes() if undirected.degree(node) == 0]
|
|
333
|
-
|
|
335
|
+
|
|
334
336
|
def find_potential_contradictions(self) -> List[Tuple[str, str, float]]:
|
|
335
337
|
"""
|
|
336
338
|
Find files that might have contradictory information.
|
|
337
|
-
|
|
339
|
+
|
|
338
340
|
Returns files in the same topic cluster with low similarity.
|
|
339
341
|
"""
|
|
340
342
|
if self._graph is None:
|
|
341
343
|
return []
|
|
342
|
-
|
|
344
|
+
|
|
343
345
|
contradictions = []
|
|
344
|
-
|
|
346
|
+
|
|
345
347
|
# Files connected by same_topic but with low similarity
|
|
346
348
|
for u, v, data in self._graph.edges(data=True):
|
|
347
|
-
if data.get(
|
|
349
|
+
if data.get("type") == "same_topic":
|
|
348
350
|
# Check if there's also a similarity edge
|
|
349
351
|
sim_edge = self._graph.get_edge_data(u, v)
|
|
350
|
-
if sim_edge and sim_edge.get(
|
|
351
|
-
if sim_edge.get(
|
|
352
|
-
contradictions.append((u, v, sim_edge.get(
|
|
353
|
-
|
|
352
|
+
if sim_edge and sim_edge.get("type") == "similarity":
|
|
353
|
+
if sim_edge.get("weight", 1.0) < 0.3:
|
|
354
|
+
contradictions.append((u, v, sim_edge.get("weight", 0)))
|
|
355
|
+
|
|
354
356
|
return contradictions
|
|
355
|
-
|
|
357
|
+
|
|
356
358
|
def export_for_d3(self) -> str:
|
|
357
359
|
"""Export graph in D3.js force-graph format."""
|
|
358
360
|
graph_data = self.build_graph()
|
|
359
|
-
|
|
361
|
+
|
|
360
362
|
d3_format = {
|
|
361
|
-
|
|
363
|
+
"nodes": [
|
|
362
364
|
{
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
365
|
+
"id": n.id,
|
|
366
|
+
"name": n.label,
|
|
367
|
+
"group": n.memory_type,
|
|
368
|
+
"size": min(20, max(5, n.size // 100)),
|
|
367
369
|
}
|
|
368
370
|
for n in graph_data.nodes
|
|
369
371
|
],
|
|
370
|
-
|
|
371
|
-
{
|
|
372
|
-
'source': e.source,
|
|
373
|
-
'target': e.target,
|
|
374
|
-
'type': e.edge_type,
|
|
375
|
-
'value': e.weight
|
|
376
|
-
}
|
|
372
|
+
"links": [
|
|
373
|
+
{"source": e.source, "target": e.target, "type": e.edge_type, "value": e.weight}
|
|
377
374
|
for e in graph_data.edges
|
|
378
|
-
]
|
|
375
|
+
],
|
|
379
376
|
}
|
|
380
|
-
|
|
377
|
+
|
|
381
378
|
return json.dumps(d3_format, indent=2)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Anthropic (Claude) LLM provider."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional, List, Dict, Any
|
|
5
|
+
|
|
6
|
+
from .base import LLMProvider
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AnthropicProvider(LLMProvider):
|
|
10
|
+
"""Anthropic Claude provider. API key from ANTHROPIC_API_KEY."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, model: Optional[str] = None):
|
|
13
|
+
self._model = model or os.environ.get("ANTHROPIC_MODEL", "claude-3-haiku-20240307")
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def name(self) -> str:
|
|
17
|
+
return "anthropic"
|
|
18
|
+
|
|
19
|
+
def complete(
|
|
20
|
+
self,
|
|
21
|
+
messages: List[Dict[str, str]],
|
|
22
|
+
*,
|
|
23
|
+
model: Optional[str] = None,
|
|
24
|
+
max_tokens: int = 1024,
|
|
25
|
+
**kwargs: Any,
|
|
26
|
+
) -> str:
|
|
27
|
+
try:
|
|
28
|
+
import anthropic
|
|
29
|
+
except ImportError:
|
|
30
|
+
raise RuntimeError("Anthropic provider requires: pip install anthropic")
|
|
31
|
+
m = model or self._model
|
|
32
|
+
client = anthropic.Anthropic()
|
|
33
|
+
# Convert OpenAI-style messages to Anthropic (system + user/assistant)
|
|
34
|
+
system = ""
|
|
35
|
+
anthropic_messages = []
|
|
36
|
+
for msg in messages:
|
|
37
|
+
role = msg.get("role", "user")
|
|
38
|
+
content = msg.get("content", "")
|
|
39
|
+
if role == "system":
|
|
40
|
+
system = content
|
|
41
|
+
else:
|
|
42
|
+
anthropic_messages.append({"role": role, "content": content})
|
|
43
|
+
resp = client.messages.create(
|
|
44
|
+
model=m,
|
|
45
|
+
max_tokens=max_tokens,
|
|
46
|
+
system=system or None,
|
|
47
|
+
messages=anthropic_messages,
|
|
48
|
+
**kwargs,
|
|
49
|
+
)
|
|
50
|
+
return resp.content[0].text if resp.content else ""
|
memvcs/core/llm/base.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM provider interface for agmem.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Optional, List, Dict, Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LLMProvider(ABC):
|
|
10
|
+
"""Abstract LLM provider (complete(messages) -> text)."""
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def complete(
|
|
14
|
+
self,
|
|
15
|
+
messages: List[Dict[str, str]],
|
|
16
|
+
*,
|
|
17
|
+
model: Optional[str] = None,
|
|
18
|
+
max_tokens: int = 1024,
|
|
19
|
+
**kwargs: Any,
|
|
20
|
+
) -> str:
|
|
21
|
+
"""Return completion text for messages. Raises on failure."""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def name(self) -> str:
|
|
26
|
+
"""Provider name (e.g. openai, anthropic)."""
|
|
27
|
+
return "base"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""LLM provider factory: select by config or env."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional, Dict, Any
|
|
5
|
+
|
|
6
|
+
from .base import LLMProvider
|
|
7
|
+
from .openai_provider import OpenAIProvider
|
|
8
|
+
from .anthropic_provider import AnthropicProvider
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_provider(
|
|
12
|
+
provider_name: Optional[str] = None,
|
|
13
|
+
model: Optional[str] = None,
|
|
14
|
+
config: Optional[Dict[str, Any]] = None,
|
|
15
|
+
) -> Optional[LLMProvider]:
|
|
16
|
+
"""
|
|
17
|
+
Return LLM provider by name. Config may have llm_provider, llm_model.
|
|
18
|
+
Env: AGMEM_LLM_PROVIDER, OPENAI_API_KEY, ANTHROPIC_API_KEY.
|
|
19
|
+
"""
|
|
20
|
+
name = (
|
|
21
|
+
provider_name
|
|
22
|
+
or (config or {}).get("llm_provider")
|
|
23
|
+
or os.environ.get("AGMEM_LLM_PROVIDER", "openai")
|
|
24
|
+
)
|
|
25
|
+
m = model or (config or {}).get("llm_model")
|
|
26
|
+
if name == "openai":
|
|
27
|
+
return OpenAIProvider(model=m)
|
|
28
|
+
if name == "anthropic":
|
|
29
|
+
return AnthropicProvider(model=m)
|
|
30
|
+
return OpenAIProvider(model=m)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""OpenAI LLM provider."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional, List, Dict, Any
|
|
5
|
+
|
|
6
|
+
from .base import LLMProvider
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OpenAIProvider(LLMProvider):
|
|
10
|
+
"""OpenAI (GPT) provider. API key from OPENAI_API_KEY."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, model: Optional[str] = None):
|
|
13
|
+
self._model = model or os.environ.get("OPENAI_MODEL", "gpt-3.5-turbo")
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def name(self) -> str:
|
|
17
|
+
return "openai"
|
|
18
|
+
|
|
19
|
+
def complete(
|
|
20
|
+
self,
|
|
21
|
+
messages: List[Dict[str, str]],
|
|
22
|
+
*,
|
|
23
|
+
model: Optional[str] = None,
|
|
24
|
+
max_tokens: int = 1024,
|
|
25
|
+
**kwargs: Any,
|
|
26
|
+
) -> str:
|
|
27
|
+
import openai
|
|
28
|
+
|
|
29
|
+
m = model or self._model
|
|
30
|
+
response = openai.chat.completions.create(
|
|
31
|
+
model=m,
|
|
32
|
+
messages=messages,
|
|
33
|
+
max_tokens=max_tokens,
|
|
34
|
+
**kwargs,
|
|
35
|
+
)
|
|
36
|
+
return response.choices[0].message.content or ""
|