agmem 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/METADATA +24 -18
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/RECORD +25 -24
- memvcs/commands/daemon.py +21 -3
- memvcs/commands/distill.py +10 -2
- memvcs/commands/federated.py +7 -1
- memvcs/commands/garden.py +10 -2
- memvcs/commands/gc.py +18 -1
- memvcs/commands/prove.py +4 -2
- memvcs/commands/timeline.py +28 -0
- memvcs/commands/when.py +28 -0
- memvcs/core/compression_pipeline.py +165 -0
- memvcs/core/crypto_verify.py +12 -1
- memvcs/core/distiller.py +70 -4
- memvcs/core/federated.py +80 -9
- memvcs/core/gardener.py +80 -5
- memvcs/core/ipfs_remote.py +168 -8
- memvcs/core/knowledge_graph.py +79 -6
- memvcs/core/objects.py +33 -21
- memvcs/core/pack.py +201 -1
- memvcs/core/remote.py +200 -3
- memvcs/core/zk_proofs.py +145 -11
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/WHEEL +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/top_level.txt +0 -0
memvcs/core/crypto_verify.py
CHANGED
|
@@ -12,7 +12,7 @@ import os
|
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from typing import Optional, List, Tuple, Any, Dict
|
|
14
14
|
|
|
15
|
-
from .objects import ObjectStore, Tree, Commit
|
|
15
|
+
from .objects import ObjectStore, Tree, Commit, Blob
|
|
16
16
|
|
|
17
17
|
# Ed25519 via cryptography (optional)
|
|
18
18
|
try:
|
|
@@ -239,6 +239,17 @@ def verify_commit(
|
|
|
239
239
|
stored_sig = (commit.metadata or {}).get("signature")
|
|
240
240
|
if not stored_root:
|
|
241
241
|
return (False, "commit has no merkle_root (unverified)")
|
|
242
|
+
|
|
243
|
+
# Verify that blob objects can be loaded successfully (detects tampering in compressed/encrypted content)
|
|
244
|
+
blob_hashes = _collect_blob_hashes_from_tree(store, commit.tree)
|
|
245
|
+
for blob_hash in blob_hashes:
|
|
246
|
+
try:
|
|
247
|
+
blob = Blob.load(store, blob_hash)
|
|
248
|
+
if blob is None:
|
|
249
|
+
return (False, f"blob {blob_hash[:8]} corrupted or missing")
|
|
250
|
+
except Exception as e:
|
|
251
|
+
return (False, f"merkle_root mismatch (commit tampered)")
|
|
252
|
+
|
|
242
253
|
computed_root = build_merkle_root_for_commit(store, commit_hash)
|
|
243
254
|
if not computed_root:
|
|
244
255
|
return (False, "could not build Merkle tree (missing tree/blobs)")
|
memvcs/core/distiller.py
CHANGED
|
@@ -35,6 +35,9 @@ class DistillerConfig:
|
|
|
35
35
|
llm_provider: Optional[str] = None
|
|
36
36
|
llm_model: Optional[str] = None
|
|
37
37
|
create_safety_branch: bool = True
|
|
38
|
+
use_dp: bool = False
|
|
39
|
+
dp_epsilon: Optional[float] = None
|
|
40
|
+
dp_delta: Optional[float] = None
|
|
38
41
|
|
|
39
42
|
|
|
40
43
|
@dataclass
|
|
@@ -160,13 +163,25 @@ class Distiller:
|
|
|
160
163
|
except ValueError:
|
|
161
164
|
out_path = self.target_dir / f"consolidated-{ts}.md"
|
|
162
165
|
|
|
166
|
+
confidence_score = self.config.extraction_confidence_threshold
|
|
167
|
+
if (
|
|
168
|
+
self.config.use_dp
|
|
169
|
+
and self.config.dp_epsilon is not None
|
|
170
|
+
and self.config.dp_delta is not None
|
|
171
|
+
):
|
|
172
|
+
from .privacy_budget import add_noise
|
|
173
|
+
|
|
174
|
+
confidence_score = add_noise(
|
|
175
|
+
confidence_score, 0.1, self.config.dp_epsilon, self.config.dp_delta
|
|
176
|
+
)
|
|
177
|
+
confidence_score = max(0.0, min(1.0, confidence_score))
|
|
163
178
|
frontmatter = {
|
|
164
179
|
"schema_version": "1.0",
|
|
165
180
|
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
166
181
|
"source_agent_id": "distiller",
|
|
167
182
|
"memory_type": "semantic",
|
|
168
183
|
"tags": cluster.tags + ["auto-generated", "consolidated"],
|
|
169
|
-
"confidence_score":
|
|
184
|
+
"confidence_score": confidence_score,
|
|
170
185
|
}
|
|
171
186
|
body = f"# Consolidated: {cluster.topic}\n\n" + "\n".join(facts)
|
|
172
187
|
if YAML_AVAILABLE:
|
|
@@ -266,11 +281,62 @@ class Distiller:
|
|
|
266
281
|
except Exception:
|
|
267
282
|
pass
|
|
268
283
|
|
|
284
|
+
clusters_processed = len(clusters)
|
|
285
|
+
facts_extracted = facts_count
|
|
286
|
+
episodes_archived = archived
|
|
287
|
+
if (
|
|
288
|
+
self.config.use_dp
|
|
289
|
+
and self.config.dp_epsilon is not None
|
|
290
|
+
and self.config.dp_delta is not None
|
|
291
|
+
):
|
|
292
|
+
from .privacy_budget import add_noise
|
|
293
|
+
|
|
294
|
+
sensitivity = 1.0
|
|
295
|
+
clusters_processed = max(
|
|
296
|
+
0,
|
|
297
|
+
int(
|
|
298
|
+
round(
|
|
299
|
+
add_noise(
|
|
300
|
+
float(clusters_processed),
|
|
301
|
+
sensitivity,
|
|
302
|
+
self.config.dp_epsilon,
|
|
303
|
+
self.config.dp_delta,
|
|
304
|
+
)
|
|
305
|
+
)
|
|
306
|
+
),
|
|
307
|
+
)
|
|
308
|
+
facts_extracted = max(
|
|
309
|
+
0,
|
|
310
|
+
int(
|
|
311
|
+
round(
|
|
312
|
+
add_noise(
|
|
313
|
+
float(facts_extracted),
|
|
314
|
+
sensitivity,
|
|
315
|
+
self.config.dp_epsilon,
|
|
316
|
+
self.config.dp_delta,
|
|
317
|
+
)
|
|
318
|
+
)
|
|
319
|
+
),
|
|
320
|
+
)
|
|
321
|
+
episodes_archived = max(
|
|
322
|
+
0,
|
|
323
|
+
int(
|
|
324
|
+
round(
|
|
325
|
+
add_noise(
|
|
326
|
+
float(episodes_archived),
|
|
327
|
+
sensitivity,
|
|
328
|
+
self.config.dp_epsilon,
|
|
329
|
+
self.config.dp_delta,
|
|
330
|
+
)
|
|
331
|
+
)
|
|
332
|
+
),
|
|
333
|
+
)
|
|
334
|
+
|
|
269
335
|
return DistillerResult(
|
|
270
336
|
success=True,
|
|
271
|
-
clusters_processed=
|
|
272
|
-
facts_extracted=
|
|
273
|
-
episodes_archived=
|
|
337
|
+
clusters_processed=clusters_processed,
|
|
338
|
+
facts_extracted=facts_extracted,
|
|
339
|
+
episodes_archived=episodes_archived,
|
|
274
340
|
branch_created=branch_name,
|
|
275
341
|
commit_hash=commit_hash,
|
|
276
342
|
message=f"Processed {len(clusters)} clusters, extracted {facts_count} facts",
|
memvcs/core/federated.py
CHANGED
|
@@ -5,6 +5,7 @@ Agents share model updates or aggregated summaries instead of raw episodic logs.
|
|
|
5
5
|
Optional coordinator URL; optional differential privacy (Tier 3).
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import hashlib
|
|
8
9
|
import json
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import Optional, List, Dict, Any
|
|
@@ -21,30 +22,100 @@ def get_federated_config(repo_root: Path) -> Optional[Dict[str, Any]]:
|
|
|
21
22
|
url = fed.get("coordinator_url")
|
|
22
23
|
if not url:
|
|
23
24
|
return None
|
|
24
|
-
|
|
25
|
+
out = {
|
|
25
26
|
"coordinator_url": url.rstrip("/"),
|
|
26
27
|
"memory_types": fed.get("memory_types", ["episodic", "semantic"]),
|
|
27
28
|
}
|
|
29
|
+
dp = fed.get("differential_privacy") or config.get("differential_privacy") or {}
|
|
30
|
+
if dp.get("enabled"):
|
|
31
|
+
out["use_dp"] = True
|
|
32
|
+
out["dp_epsilon"] = float(dp.get("epsilon", 0.1))
|
|
33
|
+
out["dp_delta"] = float(dp.get("delta", 1e-5))
|
|
34
|
+
else:
|
|
35
|
+
out["use_dp"] = False
|
|
36
|
+
return out
|
|
28
37
|
|
|
29
38
|
|
|
30
|
-
def
|
|
39
|
+
def _normalize_for_hash(text: str) -> str:
|
|
40
|
+
"""Normalize text for hashing (no raw content sent)."""
|
|
41
|
+
return " ".join(text.strip().split())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _extract_topic_from_md(path: Path, content: str) -> str:
|
|
45
|
+
"""Extract topic from frontmatter tags or first heading."""
|
|
46
|
+
if content.startswith("---"):
|
|
47
|
+
end = content.find("---", 3)
|
|
48
|
+
if end > 0:
|
|
49
|
+
try:
|
|
50
|
+
import yaml
|
|
51
|
+
|
|
52
|
+
fm = yaml.safe_load(content[3:end])
|
|
53
|
+
if isinstance(fm, dict):
|
|
54
|
+
tags = fm.get("tags", [])
|
|
55
|
+
if tags:
|
|
56
|
+
return str(tags[0])[:50]
|
|
57
|
+
except (ImportError, Exception):
|
|
58
|
+
pass
|
|
59
|
+
first_line = content.strip().split("\n")[0] if content.strip() else ""
|
|
60
|
+
if first_line.startswith("#"):
|
|
61
|
+
return first_line.lstrip("#").strip()[:50] or "untitled"
|
|
62
|
+
return "untitled"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def produce_local_summary(
|
|
66
|
+
repo_root: Path,
|
|
67
|
+
memory_types: List[str],
|
|
68
|
+
use_dp: bool = False,
|
|
69
|
+
dp_epsilon: float = 0.1,
|
|
70
|
+
dp_delta: float = 1e-5,
|
|
71
|
+
) -> Dict[str, Any]:
|
|
31
72
|
"""
|
|
32
73
|
Produce a local summary from episodic/semantic data (no raw content).
|
|
33
|
-
Returns dict
|
|
74
|
+
Returns dict with topic counts and fact hashes suitable for coordinator.
|
|
34
75
|
"""
|
|
35
76
|
current_dir = repo_root / "current"
|
|
36
|
-
summary = {"memory_types": memory_types, "topics": {}, "fact_count": 0}
|
|
77
|
+
summary = {"memory_types": memory_types, "topics": {}, "topic_hashes": {}, "fact_count": 0}
|
|
78
|
+
all_fact_hashes: List[str] = []
|
|
79
|
+
|
|
37
80
|
for mtype in memory_types:
|
|
38
81
|
d = current_dir / mtype
|
|
39
82
|
if not d.exists():
|
|
83
|
+
summary["topics"][mtype] = 0
|
|
84
|
+
summary["topic_hashes"][mtype] = []
|
|
40
85
|
continue
|
|
41
|
-
|
|
86
|
+
topic_to_count: Dict[str, int] = {}
|
|
87
|
+
topic_to_hashes: Dict[str, List[str]] = {}
|
|
42
88
|
for f in d.rglob("*.md"):
|
|
43
|
-
if f.is_file():
|
|
44
|
-
|
|
45
|
-
|
|
89
|
+
if not f.is_file():
|
|
90
|
+
continue
|
|
91
|
+
try:
|
|
92
|
+
content = f.read_text(encoding="utf-8", errors="replace")
|
|
93
|
+
except Exception:
|
|
94
|
+
continue
|
|
95
|
+
normalized = _normalize_for_hash(content)
|
|
96
|
+
if normalized:
|
|
97
|
+
h = hashlib.sha256(normalized.encode()).hexdigest()
|
|
98
|
+
all_fact_hashes.append(h)
|
|
99
|
+
topic = _extract_topic_from_md(f, content)
|
|
100
|
+
topic_to_count[topic] = topic_to_count.get(topic, 0) + 1
|
|
101
|
+
topic_to_hashes.setdefault(topic, []).append(h)
|
|
102
|
+
summary["topics"][mtype] = sum(topic_to_count.values())
|
|
103
|
+
summary["topic_hashes"][mtype] = list(topic_to_hashes.keys())
|
|
46
104
|
if mtype == "semantic":
|
|
47
|
-
summary["fact_count"] =
|
|
105
|
+
summary["fact_count"] = len(all_fact_hashes)
|
|
106
|
+
|
|
107
|
+
if use_dp and dp_epsilon and dp_delta:
|
|
108
|
+
from .privacy_budget import add_noise
|
|
109
|
+
|
|
110
|
+
for mtype in summary["topics"]:
|
|
111
|
+
raw = summary["topics"][mtype]
|
|
112
|
+
summary["topics"][mtype] = max(
|
|
113
|
+
0, int(round(add_noise(float(raw), 1.0, dp_epsilon, dp_delta)))
|
|
114
|
+
)
|
|
115
|
+
summary["fact_count"] = max(
|
|
116
|
+
0, int(round(add_noise(float(summary["fact_count"]), 1.0, dp_epsilon, dp_delta)))
|
|
117
|
+
)
|
|
118
|
+
|
|
48
119
|
return summary
|
|
49
120
|
|
|
50
121
|
|
memvcs/core/gardener.py
CHANGED
|
@@ -43,6 +43,9 @@ class GardenerConfig:
|
|
|
43
43
|
llm_provider: Optional[str] = None # "openai", "anthropic", etc.
|
|
44
44
|
llm_model: Optional[str] = None
|
|
45
45
|
auto_commit: bool = True
|
|
46
|
+
use_dp: bool = False
|
|
47
|
+
dp_epsilon: Optional[float] = None
|
|
48
|
+
dp_delta: Optional[float] = None
|
|
46
49
|
|
|
47
50
|
|
|
48
51
|
@dataclass
|
|
@@ -351,14 +354,35 @@ class Gardener:
|
|
|
351
354
|
except ValueError:
|
|
352
355
|
insight_path = self.semantic_dir / f"insight-{timestamp}.md"
|
|
353
356
|
|
|
354
|
-
# Generate frontmatter
|
|
357
|
+
# Generate frontmatter (optionally noised for differential privacy)
|
|
358
|
+
source_episodes = len(cluster.episodes)
|
|
359
|
+
if (
|
|
360
|
+
self.config.use_dp
|
|
361
|
+
and self.config.dp_epsilon is not None
|
|
362
|
+
and self.config.dp_delta is not None
|
|
363
|
+
):
|
|
364
|
+
from .privacy_budget import add_noise
|
|
365
|
+
|
|
366
|
+
source_episodes = max(
|
|
367
|
+
0,
|
|
368
|
+
int(
|
|
369
|
+
round(
|
|
370
|
+
add_noise(
|
|
371
|
+
float(source_episodes),
|
|
372
|
+
1.0,
|
|
373
|
+
self.config.dp_epsilon,
|
|
374
|
+
self.config.dp_delta,
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
),
|
|
378
|
+
)
|
|
355
379
|
frontmatter = {
|
|
356
380
|
"schema_version": "1.0",
|
|
357
381
|
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
358
382
|
"source_agent_id": "gardener",
|
|
359
383
|
"memory_type": "semantic",
|
|
360
384
|
"tags": cluster.tags + ["auto-generated", "insight"],
|
|
361
|
-
"source_episodes":
|
|
385
|
+
"source_episodes": source_episodes,
|
|
362
386
|
}
|
|
363
387
|
|
|
364
388
|
# Write file
|
|
@@ -487,11 +511,62 @@ class Gardener:
|
|
|
487
511
|
except Exception as e:
|
|
488
512
|
print(f"Warning: Auto-commit failed: {e}")
|
|
489
513
|
|
|
514
|
+
clusters_found = len(clusters)
|
|
515
|
+
insights_generated = insights_written
|
|
516
|
+
episodes_archived = archived_count
|
|
517
|
+
if (
|
|
518
|
+
self.config.use_dp
|
|
519
|
+
and self.config.dp_epsilon is not None
|
|
520
|
+
and self.config.dp_delta is not None
|
|
521
|
+
):
|
|
522
|
+
from .privacy_budget import add_noise
|
|
523
|
+
|
|
524
|
+
sensitivity = 1.0
|
|
525
|
+
clusters_found = max(
|
|
526
|
+
0,
|
|
527
|
+
int(
|
|
528
|
+
round(
|
|
529
|
+
add_noise(
|
|
530
|
+
float(clusters_found),
|
|
531
|
+
sensitivity,
|
|
532
|
+
self.config.dp_epsilon,
|
|
533
|
+
self.config.dp_delta,
|
|
534
|
+
)
|
|
535
|
+
)
|
|
536
|
+
),
|
|
537
|
+
)
|
|
538
|
+
insights_generated = max(
|
|
539
|
+
0,
|
|
540
|
+
int(
|
|
541
|
+
round(
|
|
542
|
+
add_noise(
|
|
543
|
+
float(insights_generated),
|
|
544
|
+
sensitivity,
|
|
545
|
+
self.config.dp_epsilon,
|
|
546
|
+
self.config.dp_delta,
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
),
|
|
550
|
+
)
|
|
551
|
+
episodes_archived = max(
|
|
552
|
+
0,
|
|
553
|
+
int(
|
|
554
|
+
round(
|
|
555
|
+
add_noise(
|
|
556
|
+
float(episodes_archived),
|
|
557
|
+
sensitivity,
|
|
558
|
+
self.config.dp_epsilon,
|
|
559
|
+
self.config.dp_delta,
|
|
560
|
+
)
|
|
561
|
+
)
|
|
562
|
+
),
|
|
563
|
+
)
|
|
564
|
+
|
|
490
565
|
return GardenerResult(
|
|
491
566
|
success=True,
|
|
492
|
-
clusters_found=
|
|
493
|
-
insights_generated=
|
|
494
|
-
episodes_archived=
|
|
567
|
+
clusters_found=clusters_found,
|
|
568
|
+
insights_generated=insights_generated,
|
|
569
|
+
episodes_archived=episodes_archived,
|
|
495
570
|
commit_hash=commit_hash,
|
|
496
571
|
message=f"Processed {len(clusters)} clusters, generated {insights_written} insights",
|
|
497
572
|
)
|
memvcs/core/ipfs_remote.py
CHANGED
|
@@ -1,25 +1,161 @@
|
|
|
1
1
|
"""
|
|
2
|
-
IPFS remote for agmem
|
|
2
|
+
IPFS remote for agmem.
|
|
3
3
|
|
|
4
|
-
Push/pull via CIDs
|
|
5
|
-
|
|
4
|
+
Push/pull via CIDs using HTTP gateway (POST /api/v0/add, GET /ipfs/<cid>).
|
|
5
|
+
Optional ipfshttpclient for local daemon.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import json
|
|
9
|
+
import struct
|
|
10
|
+
import zlib
|
|
8
11
|
from pathlib import Path
|
|
9
|
-
from typing import Optional, Set
|
|
12
|
+
from typing import Optional, Set, Dict, Tuple
|
|
10
13
|
|
|
11
14
|
from .objects import ObjectStore
|
|
12
15
|
from .remote import _collect_objects_from_commit
|
|
13
16
|
|
|
17
|
+
# Type byte for bundle (same as pack)
|
|
18
|
+
_TYPE_BLOB = 1
|
|
19
|
+
_TYPE_TREE = 2
|
|
20
|
+
_TYPE_COMMIT = 3
|
|
21
|
+
_TYPE_TAG = 4
|
|
22
|
+
_TYPE_TO_BYTE = {"blob": _TYPE_BLOB, "tree": _TYPE_TREE, "commit": _TYPE_COMMIT, "tag": _TYPE_TAG}
|
|
23
|
+
_BYTE_TO_TYPE = {v: k for k, v in _TYPE_TO_BYTE.items()}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _get_object_type_and_content(store: ObjectStore, hash_id: str) -> Optional[Tuple[str, bytes]]:
|
|
27
|
+
"""Return (obj_type, raw_content) for a hash, or None."""
|
|
28
|
+
for obj_type in ["commit", "tree", "blob", "tag"]:
|
|
29
|
+
content = store.retrieve(hash_id, obj_type)
|
|
30
|
+
if content is not None:
|
|
31
|
+
return (obj_type, content)
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _bundle_objects(store: ObjectStore, hash_ids: Set[str]) -> bytes:
|
|
36
|
+
"""Bundle objects into a single byte blob: count + [hash(32) type(1) len(4) zlib_payload]."""
|
|
37
|
+
entries = []
|
|
38
|
+
for h in sorted(hash_ids):
|
|
39
|
+
pair = _get_object_type_and_content(store, h)
|
|
40
|
+
if pair is None:
|
|
41
|
+
continue
|
|
42
|
+
obj_type, content = pair
|
|
43
|
+
header = f"{obj_type} {len(content)}\0".encode()
|
|
44
|
+
full = header + content
|
|
45
|
+
compressed = zlib.compress(full)
|
|
46
|
+
h_bin = bytes.fromhex(h) if len(h) == 64 else h.encode().ljust(32)[:32]
|
|
47
|
+
entries.append((h_bin, _TYPE_TO_BYTE.get(obj_type, _TYPE_BLOB), compressed))
|
|
48
|
+
parts = [struct.pack(">I", len(entries))]
|
|
49
|
+
for h_bin, type_byte, compressed in entries:
|
|
50
|
+
parts.append(h_bin)
|
|
51
|
+
parts.append(bytes([type_byte]))
|
|
52
|
+
parts.append(struct.pack(">I", len(compressed)))
|
|
53
|
+
parts.append(compressed)
|
|
54
|
+
return b"".join(parts)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _unbundle_objects(data: bytes, objects_dir: Path) -> int:
|
|
58
|
+
"""Unbundle and write loose objects. Returns count written."""
|
|
59
|
+
if len(data) < 4:
|
|
60
|
+
return 0
|
|
61
|
+
count = struct.unpack(">I", data[:4])[0]
|
|
62
|
+
offset = 4
|
|
63
|
+
written = 0
|
|
64
|
+
for _ in range(count):
|
|
65
|
+
if offset + 32 + 1 + 4 > len(data):
|
|
66
|
+
break
|
|
67
|
+
h_bin = data[offset : offset + 32]
|
|
68
|
+
offset += 32
|
|
69
|
+
type_byte = data[offset]
|
|
70
|
+
offset += 1
|
|
71
|
+
comp_len = struct.unpack(">I", data[offset : offset + 4])[0]
|
|
72
|
+
offset += 4
|
|
73
|
+
if offset + comp_len > len(data):
|
|
74
|
+
break
|
|
75
|
+
compressed = data[offset : offset + comp_len]
|
|
76
|
+
offset += comp_len
|
|
77
|
+
obj_type = _BYTE_TO_TYPE.get(type_byte)
|
|
78
|
+
if obj_type is None:
|
|
79
|
+
continue
|
|
80
|
+
try:
|
|
81
|
+
full = zlib.decompress(compressed)
|
|
82
|
+
except Exception:
|
|
83
|
+
continue
|
|
84
|
+
null_idx = full.index(b"\0")
|
|
85
|
+
# Validate header
|
|
86
|
+
prefix = full[:null_idx].decode()
|
|
87
|
+
if " " not in prefix:
|
|
88
|
+
continue
|
|
89
|
+
name, size_str = prefix.split(" ", 1)
|
|
90
|
+
hash_hex = h_bin.hex() if len(h_bin) == 32 else h_bin.decode().strip()
|
|
91
|
+
if len(hash_hex) < 4:
|
|
92
|
+
continue
|
|
93
|
+
obj_path = objects_dir / obj_type / hash_hex[:2] / hash_hex[2:]
|
|
94
|
+
obj_path.parent.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
obj_path.write_bytes(compressed)
|
|
96
|
+
written += 1
|
|
97
|
+
return written
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _add_to_ipfs_gateway(bundle: bytes, gateway_url: str) -> Optional[str]:
|
|
101
|
+
"""POST bundle to IPFS gateway /api/v0/add (multipart). Returns CID or None."""
|
|
102
|
+
boundary = "----agmem-boundary-" + str(abs(hash(bundle)))[:12]
|
|
103
|
+
body = (
|
|
104
|
+
b"--" + boundary.encode() + b"\r\n"
|
|
105
|
+
b'Content-Disposition: form-data; name="file"; filename="agmem-bundle.bin"\r\n'
|
|
106
|
+
b"Content-Type: application/octet-stream\r\n\r\n" + bundle + b"\r\n"
|
|
107
|
+
b"--" + boundary.encode() + b"--\r\n"
|
|
108
|
+
)
|
|
109
|
+
try:
|
|
110
|
+
import urllib.request
|
|
111
|
+
|
|
112
|
+
url = gateway_url.rstrip("/") + "/api/v0/add"
|
|
113
|
+
req = urllib.request.Request(url, data=body, method="POST")
|
|
114
|
+
req.add_header("Content-Type", "multipart/form-data; boundary=" + boundary)
|
|
115
|
+
req.add_header("Content-Length", str(len(body)))
|
|
116
|
+
with urllib.request.urlopen(req, timeout=120) as resp:
|
|
117
|
+
if resp.status != 200:
|
|
118
|
+
return None
|
|
119
|
+
data = json.loads(resp.read().decode())
|
|
120
|
+
return data.get("Hash") or data.get("Name")
|
|
121
|
+
except Exception:
|
|
122
|
+
try:
|
|
123
|
+
import requests
|
|
124
|
+
|
|
125
|
+
url = gateway_url.rstrip("/") + "/api/v0/add"
|
|
126
|
+
r = requests.post(
|
|
127
|
+
url,
|
|
128
|
+
files={"file": ("agmem-bundle.bin", bundle, "application/octet-stream")},
|
|
129
|
+
timeout=120,
|
|
130
|
+
)
|
|
131
|
+
if r.status_code != 200:
|
|
132
|
+
return None
|
|
133
|
+
return r.json().get("Hash") or r.json().get("Name")
|
|
134
|
+
except Exception:
|
|
135
|
+
return None
|
|
136
|
+
|
|
14
137
|
|
|
15
138
|
def push_to_ipfs(
|
|
16
139
|
objects_dir: Path,
|
|
17
140
|
branch: str,
|
|
18
141
|
commit_hash: str,
|
|
19
142
|
gateway_url: str = "https://ipfs.io",
|
|
143
|
+
store: Optional[ObjectStore] = None,
|
|
20
144
|
) -> Optional[str]:
|
|
21
|
-
"""
|
|
22
|
-
return
|
|
145
|
+
"""
|
|
146
|
+
Push branch objects to IPFS and return root CID.
|
|
147
|
+
Uses gateway POST /api/v0/add (multipart).
|
|
148
|
+
"""
|
|
149
|
+
if store is None:
|
|
150
|
+
store = ObjectStore(objects_dir)
|
|
151
|
+
try:
|
|
152
|
+
reachable = _collect_objects_from_commit(store, commit_hash)
|
|
153
|
+
except Exception:
|
|
154
|
+
return None
|
|
155
|
+
if not reachable:
|
|
156
|
+
return None
|
|
157
|
+
bundle = _bundle_objects(store, reachable)
|
|
158
|
+
return _add_to_ipfs_gateway(bundle, gateway_url)
|
|
23
159
|
|
|
24
160
|
|
|
25
161
|
def pull_from_ipfs(
|
|
@@ -27,8 +163,32 @@ def pull_from_ipfs(
|
|
|
27
163
|
cid: str,
|
|
28
164
|
gateway_url: str = "https://ipfs.io",
|
|
29
165
|
) -> bool:
|
|
30
|
-
"""
|
|
31
|
-
|
|
166
|
+
"""
|
|
167
|
+
Pull objects by CID from IPFS into objects_dir (loose objects).
|
|
168
|
+
Uses GET gateway_url/ipfs/<cid>.
|
|
169
|
+
"""
|
|
170
|
+
try:
|
|
171
|
+
import urllib.request
|
|
172
|
+
|
|
173
|
+
url = gateway_url.rstrip("/") + "/ipfs/" + cid
|
|
174
|
+
req = urllib.request.Request(url, method="GET")
|
|
175
|
+
with urllib.request.urlopen(req, timeout=60) as resp:
|
|
176
|
+
if resp.status != 200:
|
|
177
|
+
return False
|
|
178
|
+
data = resp.read()
|
|
179
|
+
except Exception:
|
|
180
|
+
try:
|
|
181
|
+
import requests
|
|
182
|
+
|
|
183
|
+
url = gateway_url.rstrip("/") + "/ipfs/" + cid
|
|
184
|
+
r = requests.get(url, timeout=60)
|
|
185
|
+
if r.status_code != 200:
|
|
186
|
+
return False
|
|
187
|
+
data = r.content
|
|
188
|
+
except Exception:
|
|
189
|
+
return False
|
|
190
|
+
written = _unbundle_objects(data, objects_dir)
|
|
191
|
+
return written > 0
|
|
32
192
|
|
|
33
193
|
|
|
34
194
|
def parse_ipfs_url(url: str) -> Optional[str]:
|
memvcs/core/knowledge_graph.py
CHANGED
|
@@ -84,7 +84,14 @@ class KnowledgeGraphBuilder:
|
|
|
84
84
|
1. Wikilinks: [[filename]] references
|
|
85
85
|
2. Semantic similarity: Using embeddings
|
|
86
86
|
3. Shared tags: Files with common tags
|
|
87
|
-
4. Co-occurrence:
|
|
87
|
+
4. Co-occurrence: Files that mention the same entity (e.g. same section/session)
|
|
88
|
+
5. Causal: Phrases like "caused by", "because of" linking concepts (when derivable)
|
|
89
|
+
6. Entity: Person/place/thing links (simple keyword or pattern)
|
|
90
|
+
|
|
91
|
+
Incremental updates: To update when new files are added without full rebuild,
|
|
92
|
+
filter the file list to new/changed paths, run build_graph logic for that subset,
|
|
93
|
+
and merge new nodes/edges into the existing graph (or re-run build_graph; cost is
|
|
94
|
+
linear in file count).
|
|
88
95
|
"""
|
|
89
96
|
|
|
90
97
|
# Pattern for wikilinks: [[target]] or [[target|display text]]
|
|
@@ -262,7 +269,22 @@ class KnowledgeGraphBuilder:
|
|
|
262
269
|
except Exception:
|
|
263
270
|
pass # Skip similarity if vector store fails
|
|
264
271
|
|
|
272
|
+
# Add co-occurrence edges (files sharing entities)
|
|
273
|
+
try:
|
|
274
|
+
edges.extend(self._build_cooccurrence_edges(file_paths, file_contents))
|
|
275
|
+
except Exception:
|
|
276
|
+
pass
|
|
277
|
+
|
|
278
|
+
# Add causal edges (phrases like "caused by", "because of" linking to another file)
|
|
279
|
+
try:
|
|
280
|
+
edges.extend(self._build_causal_edges(file_contents))
|
|
281
|
+
except Exception:
|
|
282
|
+
pass
|
|
283
|
+
|
|
265
284
|
# Build metadata
|
|
285
|
+
edge_type_counts = defaultdict(int)
|
|
286
|
+
for e in edges:
|
|
287
|
+
edge_type_counts[e.edge_type] += 1
|
|
266
288
|
metadata = {
|
|
267
289
|
"total_nodes": len(nodes),
|
|
268
290
|
"total_edges": len(edges),
|
|
@@ -274,15 +296,66 @@ class KnowledgeGraphBuilder:
|
|
|
274
296
|
1 for n in nodes if n.memory_type not in ["episodic", "semantic", "procedural"]
|
|
275
297
|
),
|
|
276
298
|
},
|
|
277
|
-
"edge_types":
|
|
278
|
-
"reference": sum(1 for e in edges if e.edge_type == "reference"),
|
|
279
|
-
"similarity": sum(1 for e in edges if e.edge_type == "similarity"),
|
|
280
|
-
"same_topic": sum(1 for e in edges if e.edge_type == "same_topic"),
|
|
281
|
-
},
|
|
299
|
+
"edge_types": dict(edge_type_counts),
|
|
282
300
|
}
|
|
283
301
|
|
|
284
302
|
return KnowledgeGraphData(nodes=nodes, edges=edges, metadata=metadata)
|
|
285
303
|
|
|
304
|
+
def _extract_entities_simple(self, content: str) -> Set[str]:
|
|
305
|
+
"""Extract simple entity tokens (capitalized words, key phrases) for co-occurrence."""
|
|
306
|
+
entities = set()
|
|
307
|
+
for word in re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", content):
|
|
308
|
+
if len(word) > 2:
|
|
309
|
+
entities.add(word.lower())
|
|
310
|
+
for phrase in ["user", "project", "agent", "memory", "preference", "workflow"]:
|
|
311
|
+
if phrase in content.lower():
|
|
312
|
+
entities.add(phrase)
|
|
313
|
+
return entities
|
|
314
|
+
|
|
315
|
+
def _build_cooccurrence_edges(
|
|
316
|
+
self, file_paths: List[str], file_contents: Dict[str, str]
|
|
317
|
+
) -> List[GraphEdge]:
|
|
318
|
+
"""Build edges between files that share at least one entity (co-occurrence)."""
|
|
319
|
+
file_entities: Dict[str, Set[str]] = {}
|
|
320
|
+
for path, content in file_contents.items():
|
|
321
|
+
file_entities[path] = self._extract_entities_simple(content)
|
|
322
|
+
edges = []
|
|
323
|
+
paths_list = list(file_paths)
|
|
324
|
+
for i, path1 in enumerate(paths_list):
|
|
325
|
+
for path2 in paths_list[i + 1 :]:
|
|
326
|
+
common = file_entities.get(path1, set()) & file_entities.get(path2, set())
|
|
327
|
+
if common:
|
|
328
|
+
w = min(1.0, 0.3 + 0.1 * len(common))
|
|
329
|
+
edge = GraphEdge(
|
|
330
|
+
source=path1, target=path2, edge_type="co_occurrence", weight=w
|
|
331
|
+
)
|
|
332
|
+
edges.append(edge)
|
|
333
|
+
if self._graph is not None:
|
|
334
|
+
self._graph.add_edge(path1, path2, type="co_occurrence", weight=w)
|
|
335
|
+
return edges
|
|
336
|
+
|
|
337
|
+
def _build_causal_edges(self, file_contents: Dict[str, str]) -> List[GraphEdge]:
|
|
338
|
+
"""Build edges when content has causal phrases linking to another file (e.g. caused by [[X]])."""
|
|
339
|
+
causal_phrases = re.compile(
|
|
340
|
+
r"(?:caused by|because of|led to|due to)\s+(?:\[\[([^\]]+)\]\]|(\w+))",
|
|
341
|
+
re.IGNORECASE,
|
|
342
|
+
)
|
|
343
|
+
edges = []
|
|
344
|
+
for source_path, content in file_contents.items():
|
|
345
|
+
for m in causal_phrases.finditer(content):
|
|
346
|
+
target = m.group(1) or m.group(2)
|
|
347
|
+
if not target:
|
|
348
|
+
continue
|
|
349
|
+
target_path = self._normalize_link_target(target.strip(), source_path)
|
|
350
|
+
if target_path and target_path in file_contents and target_path != source_path:
|
|
351
|
+
edge = GraphEdge(
|
|
352
|
+
source=source_path, target=target_path, edge_type="causal", weight=0.7
|
|
353
|
+
)
|
|
354
|
+
edges.append(edge)
|
|
355
|
+
if self._graph is not None:
|
|
356
|
+
self._graph.add_edge(source_path, target_path, type="causal", weight=0.7)
|
|
357
|
+
return edges
|
|
358
|
+
|
|
286
359
|
def _build_similarity_edges(
|
|
287
360
|
self, file_paths: List[str], file_contents: Dict[str, str], threshold: float
|
|
288
361
|
) -> List[GraphEdge]:
|