agmem 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/METADATA +144 -14
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/RECORD +48 -28
- memvcs/cli.py +10 -0
- memvcs/commands/add.py +6 -0
- memvcs/commands/audit.py +59 -0
- memvcs/commands/clone.py +7 -0
- memvcs/commands/daemon.py +45 -0
- memvcs/commands/distill.py +24 -0
- memvcs/commands/federated.py +59 -0
- memvcs/commands/fsck.py +31 -0
- memvcs/commands/garden.py +22 -0
- memvcs/commands/gc.py +66 -0
- memvcs/commands/merge.py +55 -1
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +27 -0
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/timeline.py +27 -0
- memvcs/commands/verify.py +74 -23
- memvcs/commands/when.py +27 -0
- memvcs/core/audit.py +124 -0
- memvcs/core/compression_pipeline.py +157 -0
- memvcs/core/consistency.py +9 -9
- memvcs/core/crypto_verify.py +291 -0
- memvcs/core/distiller.py +47 -29
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +147 -0
- memvcs/core/gardener.py +47 -29
- memvcs/core/ipfs_remote.py +200 -0
- memvcs/core/knowledge_graph.py +77 -5
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +36 -23
- memvcs/core/objects.py +39 -19
- memvcs/core/pack.py +278 -0
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/remote.py +229 -3
- memvcs/core/repository.py +82 -2
- memvcs/core/temporal_index.py +9 -0
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +15 -1
- memvcs/core/zk_proofs.py +158 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/WHEEL +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/top_level.txt +0 -0
memvcs/core/federated.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Federated memory collaboration for agmem.
|
|
3
|
+
|
|
4
|
+
Agents share model updates or aggregated summaries instead of raw episodic logs.
|
|
5
|
+
Optional coordinator URL; optional differential privacy (Tier 3).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional, List, Dict, Any
|
|
12
|
+
|
|
13
|
+
from .config_loader import load_agmem_config
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_federated_config(repo_root: Path) -> Optional[Dict[str, Any]]:
|
|
17
|
+
"""Get federated config from repo/user config. Returns None if disabled."""
|
|
18
|
+
config = load_agmem_config(repo_root)
|
|
19
|
+
fed = config.get("federated") or {}
|
|
20
|
+
if not fed.get("enabled"):
|
|
21
|
+
return None
|
|
22
|
+
url = fed.get("coordinator_url")
|
|
23
|
+
if not url:
|
|
24
|
+
return None
|
|
25
|
+
out = {
|
|
26
|
+
"coordinator_url": url.rstrip("/"),
|
|
27
|
+
"memory_types": fed.get("memory_types", ["episodic", "semantic"]),
|
|
28
|
+
}
|
|
29
|
+
dp = fed.get("differential_privacy") or config.get("differential_privacy") or {}
|
|
30
|
+
if dp.get("enabled"):
|
|
31
|
+
out["use_dp"] = True
|
|
32
|
+
out["dp_epsilon"] = float(dp.get("epsilon", 0.1))
|
|
33
|
+
out["dp_delta"] = float(dp.get("delta", 1e-5))
|
|
34
|
+
else:
|
|
35
|
+
out["use_dp"] = False
|
|
36
|
+
return out
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _normalize_for_hash(text: str) -> str:
|
|
40
|
+
"""Normalize text for hashing (no raw content sent)."""
|
|
41
|
+
return " ".join(text.strip().split())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _extract_topic_from_md(path: Path, content: str) -> str:
|
|
45
|
+
"""Extract topic from frontmatter tags or first heading."""
|
|
46
|
+
if content.startswith("---"):
|
|
47
|
+
end = content.find("---", 3)
|
|
48
|
+
if end > 0:
|
|
49
|
+
try:
|
|
50
|
+
import yaml
|
|
51
|
+
fm = yaml.safe_load(content[3:end])
|
|
52
|
+
if isinstance(fm, dict):
|
|
53
|
+
tags = fm.get("tags", [])
|
|
54
|
+
if tags:
|
|
55
|
+
return str(tags[0])[:50]
|
|
56
|
+
except (ImportError, Exception):
|
|
57
|
+
pass
|
|
58
|
+
first_line = content.strip().split("\n")[0] if content.strip() else ""
|
|
59
|
+
if first_line.startswith("#"):
|
|
60
|
+
return first_line.lstrip("#").strip()[:50] or "untitled"
|
|
61
|
+
return "untitled"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def produce_local_summary(
|
|
65
|
+
repo_root: Path, memory_types: List[str], use_dp: bool = False, dp_epsilon: float = 0.1, dp_delta: float = 1e-5
|
|
66
|
+
) -> Dict[str, Any]:
|
|
67
|
+
"""
|
|
68
|
+
Produce a local summary from episodic/semantic data (no raw content).
|
|
69
|
+
Returns dict with topic counts and fact hashes suitable for coordinator.
|
|
70
|
+
"""
|
|
71
|
+
current_dir = repo_root / "current"
|
|
72
|
+
summary = {"memory_types": memory_types, "topics": {}, "topic_hashes": {}, "fact_count": 0}
|
|
73
|
+
all_fact_hashes: List[str] = []
|
|
74
|
+
|
|
75
|
+
for mtype in memory_types:
|
|
76
|
+
d = current_dir / mtype
|
|
77
|
+
if not d.exists():
|
|
78
|
+
summary["topics"][mtype] = 0
|
|
79
|
+
summary["topic_hashes"][mtype] = []
|
|
80
|
+
continue
|
|
81
|
+
topic_to_count: Dict[str, int] = {}
|
|
82
|
+
topic_to_hashes: Dict[str, List[str]] = {}
|
|
83
|
+
for f in d.rglob("*.md"):
|
|
84
|
+
if not f.is_file():
|
|
85
|
+
continue
|
|
86
|
+
try:
|
|
87
|
+
content = f.read_text(encoding="utf-8", errors="replace")
|
|
88
|
+
except Exception:
|
|
89
|
+
continue
|
|
90
|
+
normalized = _normalize_for_hash(content)
|
|
91
|
+
if normalized:
|
|
92
|
+
h = hashlib.sha256(normalized.encode()).hexdigest()
|
|
93
|
+
all_fact_hashes.append(h)
|
|
94
|
+
topic = _extract_topic_from_md(f, content)
|
|
95
|
+
topic_to_count[topic] = topic_to_count.get(topic, 0) + 1
|
|
96
|
+
topic_to_hashes.setdefault(topic, []).append(h)
|
|
97
|
+
summary["topics"][mtype] = sum(topic_to_count.values())
|
|
98
|
+
summary["topic_hashes"][mtype] = list(topic_to_hashes.keys())
|
|
99
|
+
if mtype == "semantic":
|
|
100
|
+
summary["fact_count"] = len(all_fact_hashes)
|
|
101
|
+
|
|
102
|
+
if use_dp and dp_epsilon and dp_delta:
|
|
103
|
+
from .privacy_budget import add_noise
|
|
104
|
+
for mtype in summary["topics"]:
|
|
105
|
+
raw = summary["topics"][mtype]
|
|
106
|
+
summary["topics"][mtype] = max(0, int(round(add_noise(float(raw), 1.0, dp_epsilon, dp_delta))))
|
|
107
|
+
summary["fact_count"] = max(0, int(round(add_noise(float(summary["fact_count"]), 1.0, dp_epsilon, dp_delta))))
|
|
108
|
+
|
|
109
|
+
return summary
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def push_updates(repo_root: Path, summary: Dict[str, Any]) -> str:
|
|
113
|
+
"""Send local summary to coordinator. Returns status message."""
|
|
114
|
+
cfg = get_federated_config(repo_root)
|
|
115
|
+
if not cfg:
|
|
116
|
+
return "Federated collaboration not configured"
|
|
117
|
+
url = cfg["coordinator_url"] + "/push"
|
|
118
|
+
try:
|
|
119
|
+
import urllib.request
|
|
120
|
+
|
|
121
|
+
req = urllib.request.Request(
|
|
122
|
+
url,
|
|
123
|
+
data=json.dumps(summary).encode(),
|
|
124
|
+
headers={"Content-Type": "application/json"},
|
|
125
|
+
method="POST",
|
|
126
|
+
)
|
|
127
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
128
|
+
if resp.status in (200, 201):
|
|
129
|
+
return "Pushed updates to coordinator"
|
|
130
|
+
return f"Coordinator returned {resp.status}"
|
|
131
|
+
except Exception as e:
|
|
132
|
+
return f"Push failed: {e}"
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def pull_merged(repo_root: Path) -> Optional[Dict[str, Any]]:
|
|
136
|
+
"""Pull merged summaries from coordinator. Returns merged data or None."""
|
|
137
|
+
cfg = get_federated_config(repo_root)
|
|
138
|
+
if not cfg:
|
|
139
|
+
return None
|
|
140
|
+
url = cfg["coordinator_url"] + "/pull"
|
|
141
|
+
try:
|
|
142
|
+
import urllib.request
|
|
143
|
+
|
|
144
|
+
with urllib.request.urlopen(url, timeout=30) as resp:
|
|
145
|
+
return json.loads(resp.read().decode())
|
|
146
|
+
except Exception:
|
|
147
|
+
return None
|
memvcs/core/gardener.py
CHANGED
|
@@ -43,6 +43,9 @@ class GardenerConfig:
|
|
|
43
43
|
llm_provider: Optional[str] = None # "openai", "anthropic", etc.
|
|
44
44
|
llm_model: Optional[str] = None
|
|
45
45
|
auto_commit: bool = True
|
|
46
|
+
use_dp: bool = False
|
|
47
|
+
dp_epsilon: Optional[float] = None
|
|
48
|
+
dp_delta: Optional[float] = None
|
|
46
49
|
|
|
47
50
|
|
|
48
51
|
@dataclass
|
|
@@ -284,37 +287,36 @@ class Gardener:
|
|
|
284
287
|
|
|
285
288
|
combined = "\n---\n".join(contents)
|
|
286
289
|
|
|
287
|
-
# Try LLM summarization
|
|
288
|
-
if self.config.llm_provider
|
|
290
|
+
# Try LLM summarization (multi-provider)
|
|
291
|
+
if self.config.llm_provider and self.config.llm_model:
|
|
289
292
|
try:
|
|
290
|
-
|
|
293
|
+
from .llm import get_provider
|
|
294
|
+
|
|
295
|
+
config = {
|
|
296
|
+
"llm_provider": self.config.llm_provider,
|
|
297
|
+
"llm_model": self.config.llm_model,
|
|
298
|
+
}
|
|
299
|
+
provider = get_provider(config=config)
|
|
300
|
+
if provider:
|
|
301
|
+
return provider.complete(
|
|
302
|
+
[
|
|
303
|
+
{
|
|
304
|
+
"role": "system",
|
|
305
|
+
"content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
"role": "user",
|
|
309
|
+
"content": f"Summarize these conversation logs about '{cluster.topic}' into 2-3 key insights:\n\n{combined[:4000]}",
|
|
310
|
+
},
|
|
311
|
+
],
|
|
312
|
+
max_tokens=500,
|
|
313
|
+
)
|
|
291
314
|
except Exception:
|
|
292
315
|
pass
|
|
293
316
|
|
|
294
317
|
# Fall back to simple summary
|
|
295
318
|
return self._simple_summary(cluster, contents)
|
|
296
319
|
|
|
297
|
-
def _summarize_with_openai(self, content: str, topic: str) -> str:
|
|
298
|
-
"""Summarize using OpenAI API."""
|
|
299
|
-
import openai
|
|
300
|
-
|
|
301
|
-
response = openai.chat.completions.create(
|
|
302
|
-
model=self.config.llm_model or "gpt-3.5-turbo",
|
|
303
|
-
messages=[
|
|
304
|
-
{
|
|
305
|
-
"role": "system",
|
|
306
|
-
"content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
|
|
307
|
-
},
|
|
308
|
-
{
|
|
309
|
-
"role": "user",
|
|
310
|
-
"content": f"Summarize these conversation logs about '{topic}' into 2-3 key insights:\n\n{content[:4000]}",
|
|
311
|
-
},
|
|
312
|
-
],
|
|
313
|
-
max_tokens=500,
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
return response.choices[0].message.content
|
|
317
|
-
|
|
318
320
|
def _simple_summary(self, cluster: EpisodeCluster, contents: List[str]) -> str:
|
|
319
321
|
"""Generate a simple summary without LLM."""
|
|
320
322
|
return f"""# Insights: {cluster.topic.title()}
|
|
@@ -352,14 +354,20 @@ class Gardener:
|
|
|
352
354
|
except ValueError:
|
|
353
355
|
insight_path = self.semantic_dir / f"insight-{timestamp}.md"
|
|
354
356
|
|
|
355
|
-
# Generate frontmatter
|
|
357
|
+
# Generate frontmatter (optionally noised for differential privacy)
|
|
358
|
+
source_episodes = len(cluster.episodes)
|
|
359
|
+
if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
|
|
360
|
+
from .privacy_budget import add_noise
|
|
361
|
+
source_episodes = max(0, int(round(add_noise(
|
|
362
|
+
float(source_episodes), 1.0, self.config.dp_epsilon, self.config.dp_delta
|
|
363
|
+
))))
|
|
356
364
|
frontmatter = {
|
|
357
365
|
"schema_version": "1.0",
|
|
358
366
|
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
359
367
|
"source_agent_id": "gardener",
|
|
360
368
|
"memory_type": "semantic",
|
|
361
369
|
"tags": cluster.tags + ["auto-generated", "insight"],
|
|
362
|
-
"source_episodes":
|
|
370
|
+
"source_episodes": source_episodes,
|
|
363
371
|
}
|
|
364
372
|
|
|
365
373
|
# Write file
|
|
@@ -488,11 +496,21 @@ class Gardener:
|
|
|
488
496
|
except Exception as e:
|
|
489
497
|
print(f"Warning: Auto-commit failed: {e}")
|
|
490
498
|
|
|
499
|
+
clusters_found = len(clusters)
|
|
500
|
+
insights_generated = insights_written
|
|
501
|
+
episodes_archived = archived_count
|
|
502
|
+
if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
|
|
503
|
+
from .privacy_budget import add_noise
|
|
504
|
+
sensitivity = 1.0
|
|
505
|
+
clusters_found = max(0, int(round(add_noise(float(clusters_found), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
506
|
+
insights_generated = max(0, int(round(add_noise(float(insights_generated), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
507
|
+
episodes_archived = max(0, int(round(add_noise(float(episodes_archived), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
508
|
+
|
|
491
509
|
return GardenerResult(
|
|
492
510
|
success=True,
|
|
493
|
-
clusters_found=
|
|
494
|
-
insights_generated=
|
|
495
|
-
episodes_archived=
|
|
511
|
+
clusters_found=clusters_found,
|
|
512
|
+
insights_generated=insights_generated,
|
|
513
|
+
episodes_archived=episodes_archived,
|
|
496
514
|
commit_hash=commit_hash,
|
|
497
515
|
message=f"Processed {len(clusters)} clusters, generated {insights_written} insights",
|
|
498
516
|
)
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""
|
|
2
|
+
IPFS remote for agmem.
|
|
3
|
+
|
|
4
|
+
Push/pull via CIDs using HTTP gateway (POST /api/v0/add, GET /ipfs/<cid>).
|
|
5
|
+
Optional ipfshttpclient for local daemon.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import struct
|
|
10
|
+
import zlib
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional, Set, Dict, Tuple
|
|
13
|
+
|
|
14
|
+
from .objects import ObjectStore
|
|
15
|
+
from .remote import _collect_objects_from_commit
|
|
16
|
+
|
|
17
|
+
# Type byte for bundle (same as pack)
|
|
18
|
+
_TYPE_BLOB = 1
|
|
19
|
+
_TYPE_TREE = 2
|
|
20
|
+
_TYPE_COMMIT = 3
|
|
21
|
+
_TYPE_TAG = 4
|
|
22
|
+
_TYPE_TO_BYTE = {"blob": _TYPE_BLOB, "tree": _TYPE_TREE, "commit": _TYPE_COMMIT, "tag": _TYPE_TAG}
|
|
23
|
+
_BYTE_TO_TYPE = {v: k for k, v in _TYPE_TO_BYTE.items()}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _get_object_type_and_content(store: ObjectStore, hash_id: str) -> Optional[Tuple[str, bytes]]:
|
|
27
|
+
"""Return (obj_type, raw_content) for a hash, or None."""
|
|
28
|
+
for obj_type in ["commit", "tree", "blob", "tag"]:
|
|
29
|
+
content = store.retrieve(hash_id, obj_type)
|
|
30
|
+
if content is not None:
|
|
31
|
+
return (obj_type, content)
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _bundle_objects(store: ObjectStore, hash_ids: Set[str]) -> bytes:
|
|
36
|
+
"""Bundle objects into a single byte blob: count + [hash(32) type(1) len(4) zlib_payload]."""
|
|
37
|
+
entries = []
|
|
38
|
+
for h in sorted(hash_ids):
|
|
39
|
+
pair = _get_object_type_and_content(store, h)
|
|
40
|
+
if pair is None:
|
|
41
|
+
continue
|
|
42
|
+
obj_type, content = pair
|
|
43
|
+
header = f"{obj_type} {len(content)}\0".encode()
|
|
44
|
+
full = header + content
|
|
45
|
+
compressed = zlib.compress(full)
|
|
46
|
+
h_bin = bytes.fromhex(h) if len(h) == 64 else h.encode().ljust(32)[:32]
|
|
47
|
+
entries.append((h_bin, _TYPE_TO_BYTE.get(obj_type, _TYPE_BLOB), compressed))
|
|
48
|
+
parts = [struct.pack(">I", len(entries))]
|
|
49
|
+
for h_bin, type_byte, compressed in entries:
|
|
50
|
+
parts.append(h_bin)
|
|
51
|
+
parts.append(bytes([type_byte]))
|
|
52
|
+
parts.append(struct.pack(">I", len(compressed)))
|
|
53
|
+
parts.append(compressed)
|
|
54
|
+
return b"".join(parts)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _unbundle_objects(data: bytes, objects_dir: Path) -> int:
|
|
58
|
+
"""Unbundle and write loose objects. Returns count written."""
|
|
59
|
+
if len(data) < 4:
|
|
60
|
+
return 0
|
|
61
|
+
count = struct.unpack(">I", data[:4])[0]
|
|
62
|
+
offset = 4
|
|
63
|
+
written = 0
|
|
64
|
+
for _ in range(count):
|
|
65
|
+
if offset + 32 + 1 + 4 > len(data):
|
|
66
|
+
break
|
|
67
|
+
h_bin = data[offset : offset + 32]
|
|
68
|
+
offset += 32
|
|
69
|
+
type_byte = data[offset]
|
|
70
|
+
offset += 1
|
|
71
|
+
comp_len = struct.unpack(">I", data[offset : offset + 4])[0]
|
|
72
|
+
offset += 4
|
|
73
|
+
if offset + comp_len > len(data):
|
|
74
|
+
break
|
|
75
|
+
compressed = data[offset : offset + comp_len]
|
|
76
|
+
offset += comp_len
|
|
77
|
+
obj_type = _BYTE_TO_TYPE.get(type_byte)
|
|
78
|
+
if obj_type is None:
|
|
79
|
+
continue
|
|
80
|
+
try:
|
|
81
|
+
full = zlib.decompress(compressed)
|
|
82
|
+
except Exception:
|
|
83
|
+
continue
|
|
84
|
+
null_idx = full.index(b"\0")
|
|
85
|
+
# Validate header
|
|
86
|
+
prefix = full[:null_idx].decode()
|
|
87
|
+
if " " not in prefix:
|
|
88
|
+
continue
|
|
89
|
+
name, size_str = prefix.split(" ", 1)
|
|
90
|
+
hash_hex = h_bin.hex() if len(h_bin) == 32 else h_bin.decode().strip()
|
|
91
|
+
if len(hash_hex) < 4:
|
|
92
|
+
continue
|
|
93
|
+
obj_path = objects_dir / obj_type / hash_hex[:2] / hash_hex[2:]
|
|
94
|
+
obj_path.parent.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
obj_path.write_bytes(compressed)
|
|
96
|
+
written += 1
|
|
97
|
+
return written
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _add_to_ipfs_gateway(bundle: bytes, gateway_url: str) -> Optional[str]:
|
|
101
|
+
"""POST bundle to IPFS gateway /api/v0/add (multipart). Returns CID or None."""
|
|
102
|
+
boundary = "----agmem-boundary-" + str(abs(hash(bundle)))[:12]
|
|
103
|
+
body = (
|
|
104
|
+
b"--" + boundary.encode() + b"\r\n"
|
|
105
|
+
b'Content-Disposition: form-data; name="file"; filename="agmem-bundle.bin"\r\n'
|
|
106
|
+
b"Content-Type: application/octet-stream\r\n\r\n"
|
|
107
|
+
+ bundle + b"\r\n"
|
|
108
|
+
b"--" + boundary.encode() + b"--\r\n"
|
|
109
|
+
)
|
|
110
|
+
try:
|
|
111
|
+
import urllib.request
|
|
112
|
+
|
|
113
|
+
url = gateway_url.rstrip("/") + "/api/v0/add"
|
|
114
|
+
req = urllib.request.Request(url, data=body, method="POST")
|
|
115
|
+
req.add_header("Content-Type", "multipart/form-data; boundary=" + boundary)
|
|
116
|
+
req.add_header("Content-Length", str(len(body)))
|
|
117
|
+
with urllib.request.urlopen(req, timeout=120) as resp:
|
|
118
|
+
if resp.status != 200:
|
|
119
|
+
return None
|
|
120
|
+
data = json.loads(resp.read().decode())
|
|
121
|
+
return data.get("Hash") or data.get("Name")
|
|
122
|
+
except Exception:
|
|
123
|
+
try:
|
|
124
|
+
import requests
|
|
125
|
+
|
|
126
|
+
url = gateway_url.rstrip("/") + "/api/v0/add"
|
|
127
|
+
r = requests.post(
|
|
128
|
+
url,
|
|
129
|
+
files={"file": ("agmem-bundle.bin", bundle, "application/octet-stream")},
|
|
130
|
+
timeout=120,
|
|
131
|
+
)
|
|
132
|
+
if r.status_code != 200:
|
|
133
|
+
return None
|
|
134
|
+
return r.json().get("Hash") or r.json().get("Name")
|
|
135
|
+
except Exception:
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def push_to_ipfs(
|
|
140
|
+
objects_dir: Path,
|
|
141
|
+
branch: str,
|
|
142
|
+
commit_hash: str,
|
|
143
|
+
gateway_url: str = "https://ipfs.io",
|
|
144
|
+
store: Optional[ObjectStore] = None,
|
|
145
|
+
) -> Optional[str]:
|
|
146
|
+
"""
|
|
147
|
+
Push branch objects to IPFS and return root CID.
|
|
148
|
+
Uses gateway POST /api/v0/add (multipart).
|
|
149
|
+
"""
|
|
150
|
+
if store is None:
|
|
151
|
+
store = ObjectStore(objects_dir)
|
|
152
|
+
try:
|
|
153
|
+
reachable = _collect_objects_from_commit(store, commit_hash)
|
|
154
|
+
except Exception:
|
|
155
|
+
return None
|
|
156
|
+
if not reachable:
|
|
157
|
+
return None
|
|
158
|
+
bundle = _bundle_objects(store, reachable)
|
|
159
|
+
return _add_to_ipfs_gateway(bundle, gateway_url)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def pull_from_ipfs(
|
|
163
|
+
objects_dir: Path,
|
|
164
|
+
cid: str,
|
|
165
|
+
gateway_url: str = "https://ipfs.io",
|
|
166
|
+
) -> bool:
|
|
167
|
+
"""
|
|
168
|
+
Pull objects by CID from IPFS into objects_dir (loose objects).
|
|
169
|
+
Uses GET gateway_url/ipfs/<cid>.
|
|
170
|
+
"""
|
|
171
|
+
try:
|
|
172
|
+
import urllib.request
|
|
173
|
+
|
|
174
|
+
url = gateway_url.rstrip("/") + "/ipfs/" + cid
|
|
175
|
+
req = urllib.request.Request(url, method="GET")
|
|
176
|
+
with urllib.request.urlopen(req, timeout=60) as resp:
|
|
177
|
+
if resp.status != 200:
|
|
178
|
+
return False
|
|
179
|
+
data = resp.read()
|
|
180
|
+
except Exception:
|
|
181
|
+
try:
|
|
182
|
+
import requests
|
|
183
|
+
|
|
184
|
+
url = gateway_url.rstrip("/") + "/ipfs/" + cid
|
|
185
|
+
r = requests.get(url, timeout=60)
|
|
186
|
+
if r.status_code != 200:
|
|
187
|
+
return False
|
|
188
|
+
data = r.content
|
|
189
|
+
except Exception:
|
|
190
|
+
return False
|
|
191
|
+
written = _unbundle_objects(data, objects_dir)
|
|
192
|
+
return written > 0
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def parse_ipfs_url(url: str) -> Optional[str]:
|
|
196
|
+
"""Parse ipfs://<cid> or ipfs://<cid>/path. Returns CID or None."""
|
|
197
|
+
if not url.startswith("ipfs://"):
|
|
198
|
+
return None
|
|
199
|
+
rest = url[7:].lstrip("/")
|
|
200
|
+
return rest.split("/")[0] or None
|
memvcs/core/knowledge_graph.py
CHANGED
|
@@ -84,6 +84,14 @@ class KnowledgeGraphBuilder:
|
|
|
84
84
|
1. Wikilinks: [[filename]] references
|
|
85
85
|
2. Semantic similarity: Using embeddings
|
|
86
86
|
3. Shared tags: Files with common tags
|
|
87
|
+
4. Co-occurrence: Files that mention the same entity (e.g. same section/session)
|
|
88
|
+
5. Causal: Phrases like "caused by", "because of" linking concepts (when derivable)
|
|
89
|
+
6. Entity: Person/place/thing links (simple keyword or pattern)
|
|
90
|
+
|
|
91
|
+
Incremental updates: To update when new files are added without full rebuild,
|
|
92
|
+
filter the file list to new/changed paths, run build_graph logic for that subset,
|
|
93
|
+
and merge new nodes/edges into the existing graph (or re-run build_graph; cost is
|
|
94
|
+
linear in file count).
|
|
87
95
|
"""
|
|
88
96
|
|
|
89
97
|
# Pattern for wikilinks: [[target]] or [[target|display text]]
|
|
@@ -261,7 +269,22 @@ class KnowledgeGraphBuilder:
|
|
|
261
269
|
except Exception:
|
|
262
270
|
pass # Skip similarity if vector store fails
|
|
263
271
|
|
|
272
|
+
# Add co-occurrence edges (files sharing entities)
|
|
273
|
+
try:
|
|
274
|
+
edges.extend(self._build_cooccurrence_edges(file_paths, file_contents))
|
|
275
|
+
except Exception:
|
|
276
|
+
pass
|
|
277
|
+
|
|
278
|
+
# Add causal edges (phrases like "caused by", "because of" linking to another file)
|
|
279
|
+
try:
|
|
280
|
+
edges.extend(self._build_causal_edges(file_contents))
|
|
281
|
+
except Exception:
|
|
282
|
+
pass
|
|
283
|
+
|
|
264
284
|
# Build metadata
|
|
285
|
+
edge_type_counts = defaultdict(int)
|
|
286
|
+
for e in edges:
|
|
287
|
+
edge_type_counts[e.edge_type] += 1
|
|
265
288
|
metadata = {
|
|
266
289
|
"total_nodes": len(nodes),
|
|
267
290
|
"total_edges": len(edges),
|
|
@@ -273,15 +296,64 @@ class KnowledgeGraphBuilder:
|
|
|
273
296
|
1 for n in nodes if n.memory_type not in ["episodic", "semantic", "procedural"]
|
|
274
297
|
),
|
|
275
298
|
},
|
|
276
|
-
"edge_types":
|
|
277
|
-
"reference": sum(1 for e in edges if e.edge_type == "reference"),
|
|
278
|
-
"similarity": sum(1 for e in edges if e.edge_type == "similarity"),
|
|
279
|
-
"same_topic": sum(1 for e in edges if e.edge_type == "same_topic"),
|
|
280
|
-
},
|
|
299
|
+
"edge_types": dict(edge_type_counts),
|
|
281
300
|
}
|
|
282
301
|
|
|
283
302
|
return KnowledgeGraphData(nodes=nodes, edges=edges, metadata=metadata)
|
|
284
303
|
|
|
304
|
+
def _extract_entities_simple(self, content: str) -> Set[str]:
|
|
305
|
+
"""Extract simple entity tokens (capitalized words, key phrases) for co-occurrence."""
|
|
306
|
+
entities = set()
|
|
307
|
+
for word in re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", content):
|
|
308
|
+
if len(word) > 2:
|
|
309
|
+
entities.add(word.lower())
|
|
310
|
+
for phrase in ["user", "project", "agent", "memory", "preference", "workflow"]:
|
|
311
|
+
if phrase in content.lower():
|
|
312
|
+
entities.add(phrase)
|
|
313
|
+
return entities
|
|
314
|
+
|
|
315
|
+
def _build_cooccurrence_edges(
|
|
316
|
+
self, file_paths: List[str], file_contents: Dict[str, str]
|
|
317
|
+
) -> List[GraphEdge]:
|
|
318
|
+
"""Build edges between files that share at least one entity (co-occurrence)."""
|
|
319
|
+
file_entities: Dict[str, Set[str]] = {}
|
|
320
|
+
for path, content in file_contents.items():
|
|
321
|
+
file_entities[path] = self._extract_entities_simple(content)
|
|
322
|
+
edges = []
|
|
323
|
+
paths_list = list(file_paths)
|
|
324
|
+
for i, path1 in enumerate(paths_list):
|
|
325
|
+
for path2 in paths_list[i + 1 :]:
|
|
326
|
+
common = file_entities.get(path1, set()) & file_entities.get(path2, set())
|
|
327
|
+
if common:
|
|
328
|
+
w = min(1.0, 0.3 + 0.1 * len(common))
|
|
329
|
+
edge = GraphEdge(source=path1, target=path2, edge_type="co_occurrence", weight=w)
|
|
330
|
+
edges.append(edge)
|
|
331
|
+
if self._graph is not None:
|
|
332
|
+
self._graph.add_edge(path1, path2, type="co_occurrence", weight=w)
|
|
333
|
+
return edges
|
|
334
|
+
|
|
335
|
+
def _build_causal_edges(self, file_contents: Dict[str, str]) -> List[GraphEdge]:
|
|
336
|
+
"""Build edges when content has causal phrases linking to another file (e.g. caused by [[X]])."""
|
|
337
|
+
causal_phrases = re.compile(
|
|
338
|
+
r"(?:caused by|because of|led to|due to)\s+(?:\[\[([^\]]+)\]\]|(\w+))",
|
|
339
|
+
re.IGNORECASE,
|
|
340
|
+
)
|
|
341
|
+
edges = []
|
|
342
|
+
for source_path, content in file_contents.items():
|
|
343
|
+
for m in causal_phrases.finditer(content):
|
|
344
|
+
target = m.group(1) or m.group(2)
|
|
345
|
+
if not target:
|
|
346
|
+
continue
|
|
347
|
+
target_path = self._normalize_link_target(target.strip(), source_path)
|
|
348
|
+
if target_path and target_path in file_contents and target_path != source_path:
|
|
349
|
+
edge = GraphEdge(
|
|
350
|
+
source=source_path, target=target_path, edge_type="causal", weight=0.7
|
|
351
|
+
)
|
|
352
|
+
edges.append(edge)
|
|
353
|
+
if self._graph is not None:
|
|
354
|
+
self._graph.add_edge(source_path, target_path, type="causal", weight=0.7)
|
|
355
|
+
return edges
|
|
356
|
+
|
|
285
357
|
def _build_similarity_edges(
|
|
286
358
|
self, file_paths: List[str], file_contents: Dict[str, str], threshold: float
|
|
287
359
|
) -> List[GraphEdge]:
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Anthropic (Claude) LLM provider."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional, List, Dict, Any
|
|
5
|
+
|
|
6
|
+
from .base import LLMProvider
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AnthropicProvider(LLMProvider):
|
|
10
|
+
"""Anthropic Claude provider. API key from ANTHROPIC_API_KEY."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, model: Optional[str] = None):
|
|
13
|
+
self._model = model or os.environ.get("ANTHROPIC_MODEL", "claude-3-haiku-20240307")
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def name(self) -> str:
|
|
17
|
+
return "anthropic"
|
|
18
|
+
|
|
19
|
+
def complete(
|
|
20
|
+
self,
|
|
21
|
+
messages: List[Dict[str, str]],
|
|
22
|
+
*,
|
|
23
|
+
model: Optional[str] = None,
|
|
24
|
+
max_tokens: int = 1024,
|
|
25
|
+
**kwargs: Any,
|
|
26
|
+
) -> str:
|
|
27
|
+
try:
|
|
28
|
+
import anthropic
|
|
29
|
+
except ImportError:
|
|
30
|
+
raise RuntimeError("Anthropic provider requires: pip install anthropic")
|
|
31
|
+
m = model or self._model
|
|
32
|
+
client = anthropic.Anthropic()
|
|
33
|
+
# Convert OpenAI-style messages to Anthropic (system + user/assistant)
|
|
34
|
+
system = ""
|
|
35
|
+
anthropic_messages = []
|
|
36
|
+
for msg in messages:
|
|
37
|
+
role = msg.get("role", "user")
|
|
38
|
+
content = msg.get("content", "")
|
|
39
|
+
if role == "system":
|
|
40
|
+
system = content
|
|
41
|
+
else:
|
|
42
|
+
anthropic_messages.append({"role": role, "content": content})
|
|
43
|
+
resp = client.messages.create(
|
|
44
|
+
model=m,
|
|
45
|
+
max_tokens=max_tokens,
|
|
46
|
+
system=system or None,
|
|
47
|
+
messages=anthropic_messages,
|
|
48
|
+
**kwargs,
|
|
49
|
+
)
|
|
50
|
+
return resp.content[0].text if resp.content else ""
|
memvcs/core/llm/base.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM provider interface for agmem.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Optional, List, Dict, Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LLMProvider(ABC):
|
|
10
|
+
"""Abstract LLM provider (complete(messages) -> text)."""
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def complete(
|
|
14
|
+
self,
|
|
15
|
+
messages: List[Dict[str, str]],
|
|
16
|
+
*,
|
|
17
|
+
model: Optional[str] = None,
|
|
18
|
+
max_tokens: int = 1024,
|
|
19
|
+
**kwargs: Any,
|
|
20
|
+
) -> str:
|
|
21
|
+
"""Return completion text for messages. Raises on failure."""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def name(self) -> str:
|
|
26
|
+
"""Provider name (e.g. openai, anthropic)."""
|
|
27
|
+
return "base"
|