agmem 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/METADATA +144 -14
  2. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/RECORD +48 -28
  3. memvcs/cli.py +10 -0
  4. memvcs/commands/add.py +6 -0
  5. memvcs/commands/audit.py +59 -0
  6. memvcs/commands/clone.py +7 -0
  7. memvcs/commands/daemon.py +45 -0
  8. memvcs/commands/distill.py +24 -0
  9. memvcs/commands/federated.py +59 -0
  10. memvcs/commands/fsck.py +31 -0
  11. memvcs/commands/garden.py +22 -0
  12. memvcs/commands/gc.py +66 -0
  13. memvcs/commands/merge.py +55 -1
  14. memvcs/commands/prove.py +66 -0
  15. memvcs/commands/pull.py +27 -0
  16. memvcs/commands/resolve.py +130 -0
  17. memvcs/commands/timeline.py +27 -0
  18. memvcs/commands/verify.py +74 -23
  19. memvcs/commands/when.py +27 -0
  20. memvcs/core/audit.py +124 -0
  21. memvcs/core/compression_pipeline.py +157 -0
  22. memvcs/core/consistency.py +9 -9
  23. memvcs/core/crypto_verify.py +291 -0
  24. memvcs/core/distiller.py +47 -29
  25. memvcs/core/encryption.py +169 -0
  26. memvcs/core/federated.py +147 -0
  27. memvcs/core/gardener.py +47 -29
  28. memvcs/core/ipfs_remote.py +200 -0
  29. memvcs/core/knowledge_graph.py +77 -5
  30. memvcs/core/llm/__init__.py +10 -0
  31. memvcs/core/llm/anthropic_provider.py +50 -0
  32. memvcs/core/llm/base.py +27 -0
  33. memvcs/core/llm/factory.py +30 -0
  34. memvcs/core/llm/openai_provider.py +36 -0
  35. memvcs/core/merge.py +36 -23
  36. memvcs/core/objects.py +39 -19
  37. memvcs/core/pack.py +278 -0
  38. memvcs/core/privacy_budget.py +63 -0
  39. memvcs/core/remote.py +229 -3
  40. memvcs/core/repository.py +82 -2
  41. memvcs/core/temporal_index.py +9 -0
  42. memvcs/core/trust.py +103 -0
  43. memvcs/core/vector_store.py +15 -1
  44. memvcs/core/zk_proofs.py +158 -0
  45. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/WHEEL +0 -0
  46. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/entry_points.txt +0 -0
  47. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/licenses/LICENSE +0 -0
  48. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,147 @@
1
+ """
2
+ Federated memory collaboration for agmem.
3
+
4
+ Agents share model updates or aggregated summaries instead of raw episodic logs.
5
+ Optional coordinator URL; optional differential privacy (Tier 3).
6
+ """
7
+
8
+ import hashlib
9
+ import json
10
+ from pathlib import Path
11
+ from typing import Optional, List, Dict, Any
12
+
13
+ from .config_loader import load_agmem_config
14
+
15
+
16
+ def get_federated_config(repo_root: Path) -> Optional[Dict[str, Any]]:
17
+ """Get federated config from repo/user config. Returns None if disabled."""
18
+ config = load_agmem_config(repo_root)
19
+ fed = config.get("federated") or {}
20
+ if not fed.get("enabled"):
21
+ return None
22
+ url = fed.get("coordinator_url")
23
+ if not url:
24
+ return None
25
+ out = {
26
+ "coordinator_url": url.rstrip("/"),
27
+ "memory_types": fed.get("memory_types", ["episodic", "semantic"]),
28
+ }
29
+ dp = fed.get("differential_privacy") or config.get("differential_privacy") or {}
30
+ if dp.get("enabled"):
31
+ out["use_dp"] = True
32
+ out["dp_epsilon"] = float(dp.get("epsilon", 0.1))
33
+ out["dp_delta"] = float(dp.get("delta", 1e-5))
34
+ else:
35
+ out["use_dp"] = False
36
+ return out
37
+
38
+
39
+ def _normalize_for_hash(text: str) -> str:
40
+ """Normalize text for hashing (no raw content sent)."""
41
+ return " ".join(text.strip().split())
42
+
43
+
44
+ def _extract_topic_from_md(path: Path, content: str) -> str:
45
+ """Extract topic from frontmatter tags or first heading."""
46
+ if content.startswith("---"):
47
+ end = content.find("---", 3)
48
+ if end > 0:
49
+ try:
50
+ import yaml
51
+ fm = yaml.safe_load(content[3:end])
52
+ if isinstance(fm, dict):
53
+ tags = fm.get("tags", [])
54
+ if tags:
55
+ return str(tags[0])[:50]
56
+ except (ImportError, Exception):
57
+ pass
58
+ first_line = content.strip().split("\n")[0] if content.strip() else ""
59
+ if first_line.startswith("#"):
60
+ return first_line.lstrip("#").strip()[:50] or "untitled"
61
+ return "untitled"
62
+
63
+
64
+ def produce_local_summary(
65
+ repo_root: Path, memory_types: List[str], use_dp: bool = False, dp_epsilon: float = 0.1, dp_delta: float = 1e-5
66
+ ) -> Dict[str, Any]:
67
+ """
68
+ Produce a local summary from episodic/semantic data (no raw content).
69
+ Returns dict with topic counts and fact hashes suitable for coordinator.
70
+ """
71
+ current_dir = repo_root / "current"
72
+ summary = {"memory_types": memory_types, "topics": {}, "topic_hashes": {}, "fact_count": 0}
73
+ all_fact_hashes: List[str] = []
74
+
75
+ for mtype in memory_types:
76
+ d = current_dir / mtype
77
+ if not d.exists():
78
+ summary["topics"][mtype] = 0
79
+ summary["topic_hashes"][mtype] = []
80
+ continue
81
+ topic_to_count: Dict[str, int] = {}
82
+ topic_to_hashes: Dict[str, List[str]] = {}
83
+ for f in d.rglob("*.md"):
84
+ if not f.is_file():
85
+ continue
86
+ try:
87
+ content = f.read_text(encoding="utf-8", errors="replace")
88
+ except Exception:
89
+ continue
90
+ normalized = _normalize_for_hash(content)
91
+ if normalized:
92
+ h = hashlib.sha256(normalized.encode()).hexdigest()
93
+ all_fact_hashes.append(h)
94
+ topic = _extract_topic_from_md(f, content)
95
+ topic_to_count[topic] = topic_to_count.get(topic, 0) + 1
96
+ topic_to_hashes.setdefault(topic, []).append(h)
97
+ summary["topics"][mtype] = sum(topic_to_count.values())
98
+ summary["topic_hashes"][mtype] = list(topic_to_hashes.keys())
99
+ if mtype == "semantic":
100
+ summary["fact_count"] = len(all_fact_hashes)
101
+
102
+ if use_dp and dp_epsilon and dp_delta:
103
+ from .privacy_budget import add_noise
104
+ for mtype in summary["topics"]:
105
+ raw = summary["topics"][mtype]
106
+ summary["topics"][mtype] = max(0, int(round(add_noise(float(raw), 1.0, dp_epsilon, dp_delta))))
107
+ summary["fact_count"] = max(0, int(round(add_noise(float(summary["fact_count"]), 1.0, dp_epsilon, dp_delta))))
108
+
109
+ return summary
110
+
111
+
112
+ def push_updates(repo_root: Path, summary: Dict[str, Any]) -> str:
113
+ """Send local summary to coordinator. Returns status message."""
114
+ cfg = get_federated_config(repo_root)
115
+ if not cfg:
116
+ return "Federated collaboration not configured"
117
+ url = cfg["coordinator_url"] + "/push"
118
+ try:
119
+ import urllib.request
120
+
121
+ req = urllib.request.Request(
122
+ url,
123
+ data=json.dumps(summary).encode(),
124
+ headers={"Content-Type": "application/json"},
125
+ method="POST",
126
+ )
127
+ with urllib.request.urlopen(req, timeout=30) as resp:
128
+ if resp.status in (200, 201):
129
+ return "Pushed updates to coordinator"
130
+ return f"Coordinator returned {resp.status}"
131
+ except Exception as e:
132
+ return f"Push failed: {e}"
133
+
134
+
135
+ def pull_merged(repo_root: Path) -> Optional[Dict[str, Any]]:
136
+ """Pull merged summaries from coordinator. Returns merged data or None."""
137
+ cfg = get_federated_config(repo_root)
138
+ if not cfg:
139
+ return None
140
+ url = cfg["coordinator_url"] + "/pull"
141
+ try:
142
+ import urllib.request
143
+
144
+ with urllib.request.urlopen(url, timeout=30) as resp:
145
+ return json.loads(resp.read().decode())
146
+ except Exception:
147
+ return None
memvcs/core/gardener.py CHANGED
@@ -43,6 +43,9 @@ class GardenerConfig:
43
43
  llm_provider: Optional[str] = None # "openai", "anthropic", etc.
44
44
  llm_model: Optional[str] = None
45
45
  auto_commit: bool = True
46
+ use_dp: bool = False
47
+ dp_epsilon: Optional[float] = None
48
+ dp_delta: Optional[float] = None
46
49
 
47
50
 
48
51
  @dataclass
@@ -284,37 +287,36 @@ class Gardener:
284
287
 
285
288
  combined = "\n---\n".join(contents)
286
289
 
287
- # Try LLM summarization
288
- if self.config.llm_provider == "openai" and self.config.llm_model:
290
+ # Try LLM summarization (multi-provider)
291
+ if self.config.llm_provider and self.config.llm_model:
289
292
  try:
290
- return self._summarize_with_openai(combined, cluster.topic)
293
+ from .llm import get_provider
294
+
295
+ config = {
296
+ "llm_provider": self.config.llm_provider,
297
+ "llm_model": self.config.llm_model,
298
+ }
299
+ provider = get_provider(config=config)
300
+ if provider:
301
+ return provider.complete(
302
+ [
303
+ {
304
+ "role": "system",
305
+ "content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
306
+ },
307
+ {
308
+ "role": "user",
309
+ "content": f"Summarize these conversation logs about '{cluster.topic}' into 2-3 key insights:\n\n{combined[:4000]}",
310
+ },
311
+ ],
312
+ max_tokens=500,
313
+ )
291
314
  except Exception:
292
315
  pass
293
316
 
294
317
  # Fall back to simple summary
295
318
  return self._simple_summary(cluster, contents)
296
319
 
297
- def _summarize_with_openai(self, content: str, topic: str) -> str:
298
- """Summarize using OpenAI API."""
299
- import openai
300
-
301
- response = openai.chat.completions.create(
302
- model=self.config.llm_model or "gpt-3.5-turbo",
303
- messages=[
304
- {
305
- "role": "system",
306
- "content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
307
- },
308
- {
309
- "role": "user",
310
- "content": f"Summarize these conversation logs about '{topic}' into 2-3 key insights:\n\n{content[:4000]}",
311
- },
312
- ],
313
- max_tokens=500,
314
- )
315
-
316
- return response.choices[0].message.content
317
-
318
320
  def _simple_summary(self, cluster: EpisodeCluster, contents: List[str]) -> str:
319
321
  """Generate a simple summary without LLM."""
320
322
  return f"""# Insights: {cluster.topic.title()}
@@ -352,14 +354,20 @@ class Gardener:
352
354
  except ValueError:
353
355
  insight_path = self.semantic_dir / f"insight-{timestamp}.md"
354
356
 
355
- # Generate frontmatter
357
+ # Generate frontmatter (optionally noised for differential privacy)
358
+ source_episodes = len(cluster.episodes)
359
+ if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
360
+ from .privacy_budget import add_noise
361
+ source_episodes = max(0, int(round(add_noise(
362
+ float(source_episodes), 1.0, self.config.dp_epsilon, self.config.dp_delta
363
+ ))))
356
364
  frontmatter = {
357
365
  "schema_version": "1.0",
358
366
  "last_updated": datetime.utcnow().isoformat() + "Z",
359
367
  "source_agent_id": "gardener",
360
368
  "memory_type": "semantic",
361
369
  "tags": cluster.tags + ["auto-generated", "insight"],
362
- "source_episodes": len(cluster.episodes),
370
+ "source_episodes": source_episodes,
363
371
  }
364
372
 
365
373
  # Write file
@@ -488,11 +496,21 @@ class Gardener:
488
496
  except Exception as e:
489
497
  print(f"Warning: Auto-commit failed: {e}")
490
498
 
499
+ clusters_found = len(clusters)
500
+ insights_generated = insights_written
501
+ episodes_archived = archived_count
502
+ if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
503
+ from .privacy_budget import add_noise
504
+ sensitivity = 1.0
505
+ clusters_found = max(0, int(round(add_noise(float(clusters_found), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
506
+ insights_generated = max(0, int(round(add_noise(float(insights_generated), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
507
+ episodes_archived = max(0, int(round(add_noise(float(episodes_archived), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
508
+
491
509
  return GardenerResult(
492
510
  success=True,
493
- clusters_found=len(clusters),
494
- insights_generated=insights_written,
495
- episodes_archived=archived_count,
511
+ clusters_found=clusters_found,
512
+ insights_generated=insights_generated,
513
+ episodes_archived=episodes_archived,
496
514
  commit_hash=commit_hash,
497
515
  message=f"Processed {len(clusters)} clusters, generated {insights_written} insights",
498
516
  )
@@ -0,0 +1,200 @@
1
+ """
2
+ IPFS remote for agmem.
3
+
4
+ Push/pull via CIDs using HTTP gateway (POST /api/v0/add, GET /ipfs/<cid>).
5
+ Optional ipfshttpclient for local daemon.
6
+ """
7
+
8
+ import json
9
+ import struct
10
+ import zlib
11
+ from pathlib import Path
12
+ from typing import Optional, Set, Dict, Tuple
13
+
14
+ from .objects import ObjectStore
15
+ from .remote import _collect_objects_from_commit
16
+
17
+ # Type byte for bundle (same as pack)
18
+ _TYPE_BLOB = 1
19
+ _TYPE_TREE = 2
20
+ _TYPE_COMMIT = 3
21
+ _TYPE_TAG = 4
22
+ _TYPE_TO_BYTE = {"blob": _TYPE_BLOB, "tree": _TYPE_TREE, "commit": _TYPE_COMMIT, "tag": _TYPE_TAG}
23
+ _BYTE_TO_TYPE = {v: k for k, v in _TYPE_TO_BYTE.items()}
24
+
25
+
26
+ def _get_object_type_and_content(store: ObjectStore, hash_id: str) -> Optional[Tuple[str, bytes]]:
27
+ """Return (obj_type, raw_content) for a hash, or None."""
28
+ for obj_type in ["commit", "tree", "blob", "tag"]:
29
+ content = store.retrieve(hash_id, obj_type)
30
+ if content is not None:
31
+ return (obj_type, content)
32
+ return None
33
+
34
+
35
+ def _bundle_objects(store: ObjectStore, hash_ids: Set[str]) -> bytes:
36
+ """Bundle objects into a single byte blob: count + [hash(32) type(1) len(4) zlib_payload]."""
37
+ entries = []
38
+ for h in sorted(hash_ids):
39
+ pair = _get_object_type_and_content(store, h)
40
+ if pair is None:
41
+ continue
42
+ obj_type, content = pair
43
+ header = f"{obj_type} {len(content)}\0".encode()
44
+ full = header + content
45
+ compressed = zlib.compress(full)
46
+ h_bin = bytes.fromhex(h) if len(h) == 64 else h.encode().ljust(32)[:32]
47
+ entries.append((h_bin, _TYPE_TO_BYTE.get(obj_type, _TYPE_BLOB), compressed))
48
+ parts = [struct.pack(">I", len(entries))]
49
+ for h_bin, type_byte, compressed in entries:
50
+ parts.append(h_bin)
51
+ parts.append(bytes([type_byte]))
52
+ parts.append(struct.pack(">I", len(compressed)))
53
+ parts.append(compressed)
54
+ return b"".join(parts)
55
+
56
+
57
+ def _unbundle_objects(data: bytes, objects_dir: Path) -> int:
58
+ """Unbundle and write loose objects. Returns count written."""
59
+ if len(data) < 4:
60
+ return 0
61
+ count = struct.unpack(">I", data[:4])[0]
62
+ offset = 4
63
+ written = 0
64
+ for _ in range(count):
65
+ if offset + 32 + 1 + 4 > len(data):
66
+ break
67
+ h_bin = data[offset : offset + 32]
68
+ offset += 32
69
+ type_byte = data[offset]
70
+ offset += 1
71
+ comp_len = struct.unpack(">I", data[offset : offset + 4])[0]
72
+ offset += 4
73
+ if offset + comp_len > len(data):
74
+ break
75
+ compressed = data[offset : offset + comp_len]
76
+ offset += comp_len
77
+ obj_type = _BYTE_TO_TYPE.get(type_byte)
78
+ if obj_type is None:
79
+ continue
80
+ try:
81
+ full = zlib.decompress(compressed)
82
+ except Exception:
83
+ continue
84
+ null_idx = full.index(b"\0")
85
+ # Validate header
86
+ prefix = full[:null_idx].decode()
87
+ if " " not in prefix:
88
+ continue
89
+ name, size_str = prefix.split(" ", 1)
90
+ hash_hex = h_bin.hex() if len(h_bin) == 32 else h_bin.decode().strip()
91
+ if len(hash_hex) < 4:
92
+ continue
93
+ obj_path = objects_dir / obj_type / hash_hex[:2] / hash_hex[2:]
94
+ obj_path.parent.mkdir(parents=True, exist_ok=True)
95
+ obj_path.write_bytes(compressed)
96
+ written += 1
97
+ return written
98
+
99
+
100
+ def _add_to_ipfs_gateway(bundle: bytes, gateway_url: str) -> Optional[str]:
101
+ """POST bundle to IPFS gateway /api/v0/add (multipart). Returns CID or None."""
102
+ boundary = "----agmem-boundary-" + str(abs(hash(bundle)))[:12]
103
+ body = (
104
+ b"--" + boundary.encode() + b"\r\n"
105
+ b'Content-Disposition: form-data; name="file"; filename="agmem-bundle.bin"\r\n'
106
+ b"Content-Type: application/octet-stream\r\n\r\n"
107
+ + bundle + b"\r\n"
108
+ b"--" + boundary.encode() + b"--\r\n"
109
+ )
110
+ try:
111
+ import urllib.request
112
+
113
+ url = gateway_url.rstrip("/") + "/api/v0/add"
114
+ req = urllib.request.Request(url, data=body, method="POST")
115
+ req.add_header("Content-Type", "multipart/form-data; boundary=" + boundary)
116
+ req.add_header("Content-Length", str(len(body)))
117
+ with urllib.request.urlopen(req, timeout=120) as resp:
118
+ if resp.status != 200:
119
+ return None
120
+ data = json.loads(resp.read().decode())
121
+ return data.get("Hash") or data.get("Name")
122
+ except Exception:
123
+ try:
124
+ import requests
125
+
126
+ url = gateway_url.rstrip("/") + "/api/v0/add"
127
+ r = requests.post(
128
+ url,
129
+ files={"file": ("agmem-bundle.bin", bundle, "application/octet-stream")},
130
+ timeout=120,
131
+ )
132
+ if r.status_code != 200:
133
+ return None
134
+ return r.json().get("Hash") or r.json().get("Name")
135
+ except Exception:
136
+ return None
137
+
138
+
139
+ def push_to_ipfs(
140
+ objects_dir: Path,
141
+ branch: str,
142
+ commit_hash: str,
143
+ gateway_url: str = "https://ipfs.io",
144
+ store: Optional[ObjectStore] = None,
145
+ ) -> Optional[str]:
146
+ """
147
+ Push branch objects to IPFS and return root CID.
148
+ Uses gateway POST /api/v0/add (multipart).
149
+ """
150
+ if store is None:
151
+ store = ObjectStore(objects_dir)
152
+ try:
153
+ reachable = _collect_objects_from_commit(store, commit_hash)
154
+ except Exception:
155
+ return None
156
+ if not reachable:
157
+ return None
158
+ bundle = _bundle_objects(store, reachable)
159
+ return _add_to_ipfs_gateway(bundle, gateway_url)
160
+
161
+
162
+ def pull_from_ipfs(
163
+ objects_dir: Path,
164
+ cid: str,
165
+ gateway_url: str = "https://ipfs.io",
166
+ ) -> bool:
167
+ """
168
+ Pull objects by CID from IPFS into objects_dir (loose objects).
169
+ Uses GET gateway_url/ipfs/<cid>.
170
+ """
171
+ try:
172
+ import urllib.request
173
+
174
+ url = gateway_url.rstrip("/") + "/ipfs/" + cid
175
+ req = urllib.request.Request(url, method="GET")
176
+ with urllib.request.urlopen(req, timeout=60) as resp:
177
+ if resp.status != 200:
178
+ return False
179
+ data = resp.read()
180
+ except Exception:
181
+ try:
182
+ import requests
183
+
184
+ url = gateway_url.rstrip("/") + "/ipfs/" + cid
185
+ r = requests.get(url, timeout=60)
186
+ if r.status_code != 200:
187
+ return False
188
+ data = r.content
189
+ except Exception:
190
+ return False
191
+ written = _unbundle_objects(data, objects_dir)
192
+ return written > 0
193
+
194
+
195
+ def parse_ipfs_url(url: str) -> Optional[str]:
196
+ """Parse ipfs://<cid> or ipfs://<cid>/path. Returns CID or None."""
197
+ if not url.startswith("ipfs://"):
198
+ return None
199
+ rest = url[7:].lstrip("/")
200
+ return rest.split("/")[0] or None
@@ -84,6 +84,14 @@ class KnowledgeGraphBuilder:
84
84
  1. Wikilinks: [[filename]] references
85
85
  2. Semantic similarity: Using embeddings
86
86
  3. Shared tags: Files with common tags
87
+ 4. Co-occurrence: Files that mention the same entity (e.g. same section/session)
88
+ 5. Causal: Phrases like "caused by", "because of" linking concepts (when derivable)
89
+ 6. Entity: Person/place/thing links (simple keyword or pattern)
90
+
91
+ Incremental updates: To update when new files are added without full rebuild,
92
+ filter the file list to new/changed paths, run build_graph logic for that subset,
93
+ and merge new nodes/edges into the existing graph (or re-run build_graph; cost is
94
+ linear in file count).
87
95
  """
88
96
 
89
97
  # Pattern for wikilinks: [[target]] or [[target|display text]]
@@ -261,7 +269,22 @@ class KnowledgeGraphBuilder:
261
269
  except Exception:
262
270
  pass # Skip similarity if vector store fails
263
271
 
272
+ # Add co-occurrence edges (files sharing entities)
273
+ try:
274
+ edges.extend(self._build_cooccurrence_edges(file_paths, file_contents))
275
+ except Exception:
276
+ pass
277
+
278
+ # Add causal edges (phrases like "caused by", "because of" linking to another file)
279
+ try:
280
+ edges.extend(self._build_causal_edges(file_contents))
281
+ except Exception:
282
+ pass
283
+
264
284
  # Build metadata
285
+ edge_type_counts = defaultdict(int)
286
+ for e in edges:
287
+ edge_type_counts[e.edge_type] += 1
265
288
  metadata = {
266
289
  "total_nodes": len(nodes),
267
290
  "total_edges": len(edges),
@@ -273,15 +296,64 @@ class KnowledgeGraphBuilder:
273
296
  1 for n in nodes if n.memory_type not in ["episodic", "semantic", "procedural"]
274
297
  ),
275
298
  },
276
- "edge_types": {
277
- "reference": sum(1 for e in edges if e.edge_type == "reference"),
278
- "similarity": sum(1 for e in edges if e.edge_type == "similarity"),
279
- "same_topic": sum(1 for e in edges if e.edge_type == "same_topic"),
280
- },
299
+ "edge_types": dict(edge_type_counts),
281
300
  }
282
301
 
283
302
  return KnowledgeGraphData(nodes=nodes, edges=edges, metadata=metadata)
284
303
 
304
+ def _extract_entities_simple(self, content: str) -> Set[str]:
305
+ """Extract simple entity tokens (capitalized words, key phrases) for co-occurrence."""
306
+ entities = set()
307
+ for word in re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", content):
308
+ if len(word) > 2:
309
+ entities.add(word.lower())
310
+ for phrase in ["user", "project", "agent", "memory", "preference", "workflow"]:
311
+ if phrase in content.lower():
312
+ entities.add(phrase)
313
+ return entities
314
+
315
+ def _build_cooccurrence_edges(
316
+ self, file_paths: List[str], file_contents: Dict[str, str]
317
+ ) -> List[GraphEdge]:
318
+ """Build edges between files that share at least one entity (co-occurrence)."""
319
+ file_entities: Dict[str, Set[str]] = {}
320
+ for path, content in file_contents.items():
321
+ file_entities[path] = self._extract_entities_simple(content)
322
+ edges = []
323
+ paths_list = list(file_paths)
324
+ for i, path1 in enumerate(paths_list):
325
+ for path2 in paths_list[i + 1 :]:
326
+ common = file_entities.get(path1, set()) & file_entities.get(path2, set())
327
+ if common:
328
+ w = min(1.0, 0.3 + 0.1 * len(common))
329
+ edge = GraphEdge(source=path1, target=path2, edge_type="co_occurrence", weight=w)
330
+ edges.append(edge)
331
+ if self._graph is not None:
332
+ self._graph.add_edge(path1, path2, type="co_occurrence", weight=w)
333
+ return edges
334
+
335
+ def _build_causal_edges(self, file_contents: Dict[str, str]) -> List[GraphEdge]:
336
+ """Build edges when content has causal phrases linking to another file (e.g. caused by [[X]])."""
337
+ causal_phrases = re.compile(
338
+ r"(?:caused by|because of|led to|due to)\s+(?:\[\[([^\]]+)\]\]|(\w+))",
339
+ re.IGNORECASE,
340
+ )
341
+ edges = []
342
+ for source_path, content in file_contents.items():
343
+ for m in causal_phrases.finditer(content):
344
+ target = m.group(1) or m.group(2)
345
+ if not target:
346
+ continue
347
+ target_path = self._normalize_link_target(target.strip(), source_path)
348
+ if target_path and target_path in file_contents and target_path != source_path:
349
+ edge = GraphEdge(
350
+ source=source_path, target=target_path, edge_type="causal", weight=0.7
351
+ )
352
+ edges.append(edge)
353
+ if self._graph is not None:
354
+ self._graph.add_edge(source_path, target_path, type="causal", weight=0.7)
355
+ return edges
356
+
285
357
  def _build_similarity_edges(
286
358
  self, file_paths: List[str], file_contents: Dict[str, str], threshold: float
287
359
  ) -> List[GraphEdge]:
@@ -0,0 +1,10 @@
1
+ """
2
+ Multi-provider LLM integration for agmem.
3
+
4
+ Abstract interface; implementations: OpenAI, Anthropic, Ollama, custom HTTP.
5
+ """
6
+
7
+ from .base import LLMProvider
8
+ from .factory import get_provider
9
+
10
+ __all__ = ["LLMProvider", "get_provider"]
@@ -0,0 +1,50 @@
1
+ """Anthropic (Claude) LLM provider."""
2
+
3
+ import os
4
+ from typing import Optional, List, Dict, Any
5
+
6
+ from .base import LLMProvider
7
+
8
+
9
+ class AnthropicProvider(LLMProvider):
10
+ """Anthropic Claude provider. API key from ANTHROPIC_API_KEY."""
11
+
12
+ def __init__(self, model: Optional[str] = None):
13
+ self._model = model or os.environ.get("ANTHROPIC_MODEL", "claude-3-haiku-20240307")
14
+
15
+ @property
16
+ def name(self) -> str:
17
+ return "anthropic"
18
+
19
+ def complete(
20
+ self,
21
+ messages: List[Dict[str, str]],
22
+ *,
23
+ model: Optional[str] = None,
24
+ max_tokens: int = 1024,
25
+ **kwargs: Any,
26
+ ) -> str:
27
+ try:
28
+ import anthropic
29
+ except ImportError:
30
+ raise RuntimeError("Anthropic provider requires: pip install anthropic")
31
+ m = model or self._model
32
+ client = anthropic.Anthropic()
33
+ # Convert OpenAI-style messages to Anthropic (system + user/assistant)
34
+ system = ""
35
+ anthropic_messages = []
36
+ for msg in messages:
37
+ role = msg.get("role", "user")
38
+ content = msg.get("content", "")
39
+ if role == "system":
40
+ system = content
41
+ else:
42
+ anthropic_messages.append({"role": role, "content": content})
43
+ resp = client.messages.create(
44
+ model=m,
45
+ max_tokens=max_tokens,
46
+ system=system or None,
47
+ messages=anthropic_messages,
48
+ **kwargs,
49
+ )
50
+ return resp.content[0].text if resp.content else ""
@@ -0,0 +1,27 @@
1
+ """
2
+ LLM provider interface for agmem.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Optional, List, Dict, Any
7
+
8
+
9
+ class LLMProvider(ABC):
10
+ """Abstract LLM provider (complete(messages) -> text)."""
11
+
12
+ @abstractmethod
13
+ def complete(
14
+ self,
15
+ messages: List[Dict[str, str]],
16
+ *,
17
+ model: Optional[str] = None,
18
+ max_tokens: int = 1024,
19
+ **kwargs: Any,
20
+ ) -> str:
21
+ """Return completion text for messages. Raises on failure."""
22
+ pass
23
+
24
+ @property
25
+ def name(self) -> str:
26
+ """Provider name (e.g. openai, anthropic)."""
27
+ return "base"