agmem 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/METADATA +24 -18
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/RECORD +25 -24
- memvcs/commands/daemon.py +20 -3
- memvcs/commands/distill.py +10 -2
- memvcs/commands/federated.py +7 -1
- memvcs/commands/garden.py +10 -2
- memvcs/commands/gc.py +16 -1
- memvcs/commands/prove.py +2 -2
- memvcs/commands/timeline.py +27 -0
- memvcs/commands/when.py +27 -0
- memvcs/core/compression_pipeline.py +157 -0
- memvcs/core/crypto_verify.py +12 -1
- memvcs/core/distiller.py +22 -4
- memvcs/core/federated.py +70 -9
- memvcs/core/gardener.py +24 -5
- memvcs/core/ipfs_remote.py +169 -8
- memvcs/core/knowledge_graph.py +77 -6
- memvcs/core/objects.py +31 -21
- memvcs/core/pack.py +187 -1
- memvcs/core/remote.py +191 -3
- memvcs/core/zk_proofs.py +143 -11
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/WHEEL +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/top_level.txt +0 -0
memvcs/core/crypto_verify.py
CHANGED
|
@@ -12,7 +12,7 @@ import os
|
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from typing import Optional, List, Tuple, Any, Dict
|
|
14
14
|
|
|
15
|
-
from .objects import ObjectStore, Tree, Commit
|
|
15
|
+
from .objects import ObjectStore, Tree, Commit, Blob
|
|
16
16
|
|
|
17
17
|
# Ed25519 via cryptography (optional)
|
|
18
18
|
try:
|
|
@@ -239,6 +239,17 @@ def verify_commit(
|
|
|
239
239
|
stored_sig = (commit.metadata or {}).get("signature")
|
|
240
240
|
if not stored_root:
|
|
241
241
|
return (False, "commit has no merkle_root (unverified)")
|
|
242
|
+
|
|
243
|
+
# Verify that blob objects can be loaded successfully (detects tampering in compressed/encrypted content)
|
|
244
|
+
blob_hashes = _collect_blob_hashes_from_tree(store, commit.tree)
|
|
245
|
+
for blob_hash in blob_hashes:
|
|
246
|
+
try:
|
|
247
|
+
blob = Blob.load(store, blob_hash)
|
|
248
|
+
if blob is None:
|
|
249
|
+
return (False, f"blob {blob_hash[:8]} corrupted or missing")
|
|
250
|
+
except Exception as e:
|
|
251
|
+
return (False, f"merkle_root mismatch (commit tampered)")
|
|
252
|
+
|
|
242
253
|
computed_root = build_merkle_root_for_commit(store, commit_hash)
|
|
243
254
|
if not computed_root:
|
|
244
255
|
return (False, "could not build Merkle tree (missing tree/blobs)")
|
memvcs/core/distiller.py
CHANGED
|
@@ -35,6 +35,9 @@ class DistillerConfig:
|
|
|
35
35
|
llm_provider: Optional[str] = None
|
|
36
36
|
llm_model: Optional[str] = None
|
|
37
37
|
create_safety_branch: bool = True
|
|
38
|
+
use_dp: bool = False
|
|
39
|
+
dp_epsilon: Optional[float] = None
|
|
40
|
+
dp_delta: Optional[float] = None
|
|
38
41
|
|
|
39
42
|
|
|
40
43
|
@dataclass
|
|
@@ -160,13 +163,18 @@ class Distiller:
|
|
|
160
163
|
except ValueError:
|
|
161
164
|
out_path = self.target_dir / f"consolidated-{ts}.md"
|
|
162
165
|
|
|
166
|
+
confidence_score = self.config.extraction_confidence_threshold
|
|
167
|
+
if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
|
|
168
|
+
from .privacy_budget import add_noise
|
|
169
|
+
confidence_score = add_noise(confidence_score, 0.1, self.config.dp_epsilon, self.config.dp_delta)
|
|
170
|
+
confidence_score = max(0.0, min(1.0, confidence_score))
|
|
163
171
|
frontmatter = {
|
|
164
172
|
"schema_version": "1.0",
|
|
165
173
|
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
166
174
|
"source_agent_id": "distiller",
|
|
167
175
|
"memory_type": "semantic",
|
|
168
176
|
"tags": cluster.tags + ["auto-generated", "consolidated"],
|
|
169
|
-
"confidence_score":
|
|
177
|
+
"confidence_score": confidence_score,
|
|
170
178
|
}
|
|
171
179
|
body = f"# Consolidated: {cluster.topic}\n\n" + "\n".join(facts)
|
|
172
180
|
if YAML_AVAILABLE:
|
|
@@ -266,11 +274,21 @@ class Distiller:
|
|
|
266
274
|
except Exception:
|
|
267
275
|
pass
|
|
268
276
|
|
|
277
|
+
clusters_processed = len(clusters)
|
|
278
|
+
facts_extracted = facts_count
|
|
279
|
+
episodes_archived = archived
|
|
280
|
+
if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
|
|
281
|
+
from .privacy_budget import add_noise
|
|
282
|
+
sensitivity = 1.0
|
|
283
|
+
clusters_processed = max(0, int(round(add_noise(float(clusters_processed), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
284
|
+
facts_extracted = max(0, int(round(add_noise(float(facts_extracted), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
285
|
+
episodes_archived = max(0, int(round(add_noise(float(episodes_archived), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
286
|
+
|
|
269
287
|
return DistillerResult(
|
|
270
288
|
success=True,
|
|
271
|
-
clusters_processed=
|
|
272
|
-
facts_extracted=
|
|
273
|
-
episodes_archived=
|
|
289
|
+
clusters_processed=clusters_processed,
|
|
290
|
+
facts_extracted=facts_extracted,
|
|
291
|
+
episodes_archived=episodes_archived,
|
|
274
292
|
branch_created=branch_name,
|
|
275
293
|
commit_hash=commit_hash,
|
|
276
294
|
message=f"Processed {len(clusters)} clusters, extracted {facts_count} facts",
|
memvcs/core/federated.py
CHANGED
|
@@ -5,6 +5,7 @@ Agents share model updates or aggregated summaries instead of raw episodic logs.
|
|
|
5
5
|
Optional coordinator URL; optional differential privacy (Tier 3).
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import hashlib
|
|
8
9
|
import json
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import Optional, List, Dict, Any
|
|
@@ -21,30 +22,90 @@ def get_federated_config(repo_root: Path) -> Optional[Dict[str, Any]]:
|
|
|
21
22
|
url = fed.get("coordinator_url")
|
|
22
23
|
if not url:
|
|
23
24
|
return None
|
|
24
|
-
|
|
25
|
+
out = {
|
|
25
26
|
"coordinator_url": url.rstrip("/"),
|
|
26
27
|
"memory_types": fed.get("memory_types", ["episodic", "semantic"]),
|
|
27
28
|
}
|
|
29
|
+
dp = fed.get("differential_privacy") or config.get("differential_privacy") or {}
|
|
30
|
+
if dp.get("enabled"):
|
|
31
|
+
out["use_dp"] = True
|
|
32
|
+
out["dp_epsilon"] = float(dp.get("epsilon", 0.1))
|
|
33
|
+
out["dp_delta"] = float(dp.get("delta", 1e-5))
|
|
34
|
+
else:
|
|
35
|
+
out["use_dp"] = False
|
|
36
|
+
return out
|
|
28
37
|
|
|
29
38
|
|
|
30
|
-
def
|
|
39
|
+
def _normalize_for_hash(text: str) -> str:
|
|
40
|
+
"""Normalize text for hashing (no raw content sent)."""
|
|
41
|
+
return " ".join(text.strip().split())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _extract_topic_from_md(path: Path, content: str) -> str:
|
|
45
|
+
"""Extract topic from frontmatter tags or first heading."""
|
|
46
|
+
if content.startswith("---"):
|
|
47
|
+
end = content.find("---", 3)
|
|
48
|
+
if end > 0:
|
|
49
|
+
try:
|
|
50
|
+
import yaml
|
|
51
|
+
fm = yaml.safe_load(content[3:end])
|
|
52
|
+
if isinstance(fm, dict):
|
|
53
|
+
tags = fm.get("tags", [])
|
|
54
|
+
if tags:
|
|
55
|
+
return str(tags[0])[:50]
|
|
56
|
+
except (ImportError, Exception):
|
|
57
|
+
pass
|
|
58
|
+
first_line = content.strip().split("\n")[0] if content.strip() else ""
|
|
59
|
+
if first_line.startswith("#"):
|
|
60
|
+
return first_line.lstrip("#").strip()[:50] or "untitled"
|
|
61
|
+
return "untitled"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def produce_local_summary(
|
|
65
|
+
repo_root: Path, memory_types: List[str], use_dp: bool = False, dp_epsilon: float = 0.1, dp_delta: float = 1e-5
|
|
66
|
+
) -> Dict[str, Any]:
|
|
31
67
|
"""
|
|
32
68
|
Produce a local summary from episodic/semantic data (no raw content).
|
|
33
|
-
Returns dict
|
|
69
|
+
Returns dict with topic counts and fact hashes suitable for coordinator.
|
|
34
70
|
"""
|
|
35
71
|
current_dir = repo_root / "current"
|
|
36
|
-
summary = {"memory_types": memory_types, "topics": {}, "fact_count": 0}
|
|
72
|
+
summary = {"memory_types": memory_types, "topics": {}, "topic_hashes": {}, "fact_count": 0}
|
|
73
|
+
all_fact_hashes: List[str] = []
|
|
74
|
+
|
|
37
75
|
for mtype in memory_types:
|
|
38
76
|
d = current_dir / mtype
|
|
39
77
|
if not d.exists():
|
|
78
|
+
summary["topics"][mtype] = 0
|
|
79
|
+
summary["topic_hashes"][mtype] = []
|
|
40
80
|
continue
|
|
41
|
-
|
|
81
|
+
topic_to_count: Dict[str, int] = {}
|
|
82
|
+
topic_to_hashes: Dict[str, List[str]] = {}
|
|
42
83
|
for f in d.rglob("*.md"):
|
|
43
|
-
if f.is_file():
|
|
44
|
-
|
|
45
|
-
|
|
84
|
+
if not f.is_file():
|
|
85
|
+
continue
|
|
86
|
+
try:
|
|
87
|
+
content = f.read_text(encoding="utf-8", errors="replace")
|
|
88
|
+
except Exception:
|
|
89
|
+
continue
|
|
90
|
+
normalized = _normalize_for_hash(content)
|
|
91
|
+
if normalized:
|
|
92
|
+
h = hashlib.sha256(normalized.encode()).hexdigest()
|
|
93
|
+
all_fact_hashes.append(h)
|
|
94
|
+
topic = _extract_topic_from_md(f, content)
|
|
95
|
+
topic_to_count[topic] = topic_to_count.get(topic, 0) + 1
|
|
96
|
+
topic_to_hashes.setdefault(topic, []).append(h)
|
|
97
|
+
summary["topics"][mtype] = sum(topic_to_count.values())
|
|
98
|
+
summary["topic_hashes"][mtype] = list(topic_to_hashes.keys())
|
|
46
99
|
if mtype == "semantic":
|
|
47
|
-
summary["fact_count"] =
|
|
100
|
+
summary["fact_count"] = len(all_fact_hashes)
|
|
101
|
+
|
|
102
|
+
if use_dp and dp_epsilon and dp_delta:
|
|
103
|
+
from .privacy_budget import add_noise
|
|
104
|
+
for mtype in summary["topics"]:
|
|
105
|
+
raw = summary["topics"][mtype]
|
|
106
|
+
summary["topics"][mtype] = max(0, int(round(add_noise(float(raw), 1.0, dp_epsilon, dp_delta))))
|
|
107
|
+
summary["fact_count"] = max(0, int(round(add_noise(float(summary["fact_count"]), 1.0, dp_epsilon, dp_delta))))
|
|
108
|
+
|
|
48
109
|
return summary
|
|
49
110
|
|
|
50
111
|
|
memvcs/core/gardener.py
CHANGED
|
@@ -43,6 +43,9 @@ class GardenerConfig:
|
|
|
43
43
|
llm_provider: Optional[str] = None # "openai", "anthropic", etc.
|
|
44
44
|
llm_model: Optional[str] = None
|
|
45
45
|
auto_commit: bool = True
|
|
46
|
+
use_dp: bool = False
|
|
47
|
+
dp_epsilon: Optional[float] = None
|
|
48
|
+
dp_delta: Optional[float] = None
|
|
46
49
|
|
|
47
50
|
|
|
48
51
|
@dataclass
|
|
@@ -351,14 +354,20 @@ class Gardener:
|
|
|
351
354
|
except ValueError:
|
|
352
355
|
insight_path = self.semantic_dir / f"insight-{timestamp}.md"
|
|
353
356
|
|
|
354
|
-
# Generate frontmatter
|
|
357
|
+
# Generate frontmatter (optionally noised for differential privacy)
|
|
358
|
+
source_episodes = len(cluster.episodes)
|
|
359
|
+
if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
|
|
360
|
+
from .privacy_budget import add_noise
|
|
361
|
+
source_episodes = max(0, int(round(add_noise(
|
|
362
|
+
float(source_episodes), 1.0, self.config.dp_epsilon, self.config.dp_delta
|
|
363
|
+
))))
|
|
355
364
|
frontmatter = {
|
|
356
365
|
"schema_version": "1.0",
|
|
357
366
|
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
358
367
|
"source_agent_id": "gardener",
|
|
359
368
|
"memory_type": "semantic",
|
|
360
369
|
"tags": cluster.tags + ["auto-generated", "insight"],
|
|
361
|
-
"source_episodes":
|
|
370
|
+
"source_episodes": source_episodes,
|
|
362
371
|
}
|
|
363
372
|
|
|
364
373
|
# Write file
|
|
@@ -487,11 +496,21 @@ class Gardener:
|
|
|
487
496
|
except Exception as e:
|
|
488
497
|
print(f"Warning: Auto-commit failed: {e}")
|
|
489
498
|
|
|
499
|
+
clusters_found = len(clusters)
|
|
500
|
+
insights_generated = insights_written
|
|
501
|
+
episodes_archived = archived_count
|
|
502
|
+
if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
|
|
503
|
+
from .privacy_budget import add_noise
|
|
504
|
+
sensitivity = 1.0
|
|
505
|
+
clusters_found = max(0, int(round(add_noise(float(clusters_found), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
506
|
+
insights_generated = max(0, int(round(add_noise(float(insights_generated), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
507
|
+
episodes_archived = max(0, int(round(add_noise(float(episodes_archived), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
508
|
+
|
|
490
509
|
return GardenerResult(
|
|
491
510
|
success=True,
|
|
492
|
-
clusters_found=
|
|
493
|
-
insights_generated=
|
|
494
|
-
episodes_archived=
|
|
511
|
+
clusters_found=clusters_found,
|
|
512
|
+
insights_generated=insights_generated,
|
|
513
|
+
episodes_archived=episodes_archived,
|
|
495
514
|
commit_hash=commit_hash,
|
|
496
515
|
message=f"Processed {len(clusters)} clusters, generated {insights_written} insights",
|
|
497
516
|
)
|
memvcs/core/ipfs_remote.py
CHANGED
|
@@ -1,25 +1,162 @@
|
|
|
1
1
|
"""
|
|
2
|
-
IPFS remote for agmem
|
|
2
|
+
IPFS remote for agmem.
|
|
3
3
|
|
|
4
|
-
Push/pull via CIDs
|
|
5
|
-
|
|
4
|
+
Push/pull via CIDs using HTTP gateway (POST /api/v0/add, GET /ipfs/<cid>).
|
|
5
|
+
Optional ipfshttpclient for local daemon.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import json
|
|
9
|
+
import struct
|
|
10
|
+
import zlib
|
|
8
11
|
from pathlib import Path
|
|
9
|
-
from typing import Optional, Set
|
|
12
|
+
from typing import Optional, Set, Dict, Tuple
|
|
10
13
|
|
|
11
14
|
from .objects import ObjectStore
|
|
12
15
|
from .remote import _collect_objects_from_commit
|
|
13
16
|
|
|
17
|
+
# Type byte for bundle (same as pack)
|
|
18
|
+
_TYPE_BLOB = 1
|
|
19
|
+
_TYPE_TREE = 2
|
|
20
|
+
_TYPE_COMMIT = 3
|
|
21
|
+
_TYPE_TAG = 4
|
|
22
|
+
_TYPE_TO_BYTE = {"blob": _TYPE_BLOB, "tree": _TYPE_TREE, "commit": _TYPE_COMMIT, "tag": _TYPE_TAG}
|
|
23
|
+
_BYTE_TO_TYPE = {v: k for k, v in _TYPE_TO_BYTE.items()}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _get_object_type_and_content(store: ObjectStore, hash_id: str) -> Optional[Tuple[str, bytes]]:
|
|
27
|
+
"""Return (obj_type, raw_content) for a hash, or None."""
|
|
28
|
+
for obj_type in ["commit", "tree", "blob", "tag"]:
|
|
29
|
+
content = store.retrieve(hash_id, obj_type)
|
|
30
|
+
if content is not None:
|
|
31
|
+
return (obj_type, content)
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _bundle_objects(store: ObjectStore, hash_ids: Set[str]) -> bytes:
|
|
36
|
+
"""Bundle objects into a single byte blob: count + [hash(32) type(1) len(4) zlib_payload]."""
|
|
37
|
+
entries = []
|
|
38
|
+
for h in sorted(hash_ids):
|
|
39
|
+
pair = _get_object_type_and_content(store, h)
|
|
40
|
+
if pair is None:
|
|
41
|
+
continue
|
|
42
|
+
obj_type, content = pair
|
|
43
|
+
header = f"{obj_type} {len(content)}\0".encode()
|
|
44
|
+
full = header + content
|
|
45
|
+
compressed = zlib.compress(full)
|
|
46
|
+
h_bin = bytes.fromhex(h) if len(h) == 64 else h.encode().ljust(32)[:32]
|
|
47
|
+
entries.append((h_bin, _TYPE_TO_BYTE.get(obj_type, _TYPE_BLOB), compressed))
|
|
48
|
+
parts = [struct.pack(">I", len(entries))]
|
|
49
|
+
for h_bin, type_byte, compressed in entries:
|
|
50
|
+
parts.append(h_bin)
|
|
51
|
+
parts.append(bytes([type_byte]))
|
|
52
|
+
parts.append(struct.pack(">I", len(compressed)))
|
|
53
|
+
parts.append(compressed)
|
|
54
|
+
return b"".join(parts)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _unbundle_objects(data: bytes, objects_dir: Path) -> int:
|
|
58
|
+
"""Unbundle and write loose objects. Returns count written."""
|
|
59
|
+
if len(data) < 4:
|
|
60
|
+
return 0
|
|
61
|
+
count = struct.unpack(">I", data[:4])[0]
|
|
62
|
+
offset = 4
|
|
63
|
+
written = 0
|
|
64
|
+
for _ in range(count):
|
|
65
|
+
if offset + 32 + 1 + 4 > len(data):
|
|
66
|
+
break
|
|
67
|
+
h_bin = data[offset : offset + 32]
|
|
68
|
+
offset += 32
|
|
69
|
+
type_byte = data[offset]
|
|
70
|
+
offset += 1
|
|
71
|
+
comp_len = struct.unpack(">I", data[offset : offset + 4])[0]
|
|
72
|
+
offset += 4
|
|
73
|
+
if offset + comp_len > len(data):
|
|
74
|
+
break
|
|
75
|
+
compressed = data[offset : offset + comp_len]
|
|
76
|
+
offset += comp_len
|
|
77
|
+
obj_type = _BYTE_TO_TYPE.get(type_byte)
|
|
78
|
+
if obj_type is None:
|
|
79
|
+
continue
|
|
80
|
+
try:
|
|
81
|
+
full = zlib.decompress(compressed)
|
|
82
|
+
except Exception:
|
|
83
|
+
continue
|
|
84
|
+
null_idx = full.index(b"\0")
|
|
85
|
+
# Validate header
|
|
86
|
+
prefix = full[:null_idx].decode()
|
|
87
|
+
if " " not in prefix:
|
|
88
|
+
continue
|
|
89
|
+
name, size_str = prefix.split(" ", 1)
|
|
90
|
+
hash_hex = h_bin.hex() if len(h_bin) == 32 else h_bin.decode().strip()
|
|
91
|
+
if len(hash_hex) < 4:
|
|
92
|
+
continue
|
|
93
|
+
obj_path = objects_dir / obj_type / hash_hex[:2] / hash_hex[2:]
|
|
94
|
+
obj_path.parent.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
obj_path.write_bytes(compressed)
|
|
96
|
+
written += 1
|
|
97
|
+
return written
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _add_to_ipfs_gateway(bundle: bytes, gateway_url: str) -> Optional[str]:
|
|
101
|
+
"""POST bundle to IPFS gateway /api/v0/add (multipart). Returns CID or None."""
|
|
102
|
+
boundary = "----agmem-boundary-" + str(abs(hash(bundle)))[:12]
|
|
103
|
+
body = (
|
|
104
|
+
b"--" + boundary.encode() + b"\r\n"
|
|
105
|
+
b'Content-Disposition: form-data; name="file"; filename="agmem-bundle.bin"\r\n'
|
|
106
|
+
b"Content-Type: application/octet-stream\r\n\r\n"
|
|
107
|
+
+ bundle + b"\r\n"
|
|
108
|
+
b"--" + boundary.encode() + b"--\r\n"
|
|
109
|
+
)
|
|
110
|
+
try:
|
|
111
|
+
import urllib.request
|
|
112
|
+
|
|
113
|
+
url = gateway_url.rstrip("/") + "/api/v0/add"
|
|
114
|
+
req = urllib.request.Request(url, data=body, method="POST")
|
|
115
|
+
req.add_header("Content-Type", "multipart/form-data; boundary=" + boundary)
|
|
116
|
+
req.add_header("Content-Length", str(len(body)))
|
|
117
|
+
with urllib.request.urlopen(req, timeout=120) as resp:
|
|
118
|
+
if resp.status != 200:
|
|
119
|
+
return None
|
|
120
|
+
data = json.loads(resp.read().decode())
|
|
121
|
+
return data.get("Hash") or data.get("Name")
|
|
122
|
+
except Exception:
|
|
123
|
+
try:
|
|
124
|
+
import requests
|
|
125
|
+
|
|
126
|
+
url = gateway_url.rstrip("/") + "/api/v0/add"
|
|
127
|
+
r = requests.post(
|
|
128
|
+
url,
|
|
129
|
+
files={"file": ("agmem-bundle.bin", bundle, "application/octet-stream")},
|
|
130
|
+
timeout=120,
|
|
131
|
+
)
|
|
132
|
+
if r.status_code != 200:
|
|
133
|
+
return None
|
|
134
|
+
return r.json().get("Hash") or r.json().get("Name")
|
|
135
|
+
except Exception:
|
|
136
|
+
return None
|
|
137
|
+
|
|
14
138
|
|
|
15
139
|
def push_to_ipfs(
|
|
16
140
|
objects_dir: Path,
|
|
17
141
|
branch: str,
|
|
18
142
|
commit_hash: str,
|
|
19
143
|
gateway_url: str = "https://ipfs.io",
|
|
144
|
+
store: Optional[ObjectStore] = None,
|
|
20
145
|
) -> Optional[str]:
|
|
21
|
-
"""
|
|
22
|
-
return
|
|
146
|
+
"""
|
|
147
|
+
Push branch objects to IPFS and return root CID.
|
|
148
|
+
Uses gateway POST /api/v0/add (multipart).
|
|
149
|
+
"""
|
|
150
|
+
if store is None:
|
|
151
|
+
store = ObjectStore(objects_dir)
|
|
152
|
+
try:
|
|
153
|
+
reachable = _collect_objects_from_commit(store, commit_hash)
|
|
154
|
+
except Exception:
|
|
155
|
+
return None
|
|
156
|
+
if not reachable:
|
|
157
|
+
return None
|
|
158
|
+
bundle = _bundle_objects(store, reachable)
|
|
159
|
+
return _add_to_ipfs_gateway(bundle, gateway_url)
|
|
23
160
|
|
|
24
161
|
|
|
25
162
|
def pull_from_ipfs(
|
|
@@ -27,8 +164,32 @@ def pull_from_ipfs(
|
|
|
27
164
|
cid: str,
|
|
28
165
|
gateway_url: str = "https://ipfs.io",
|
|
29
166
|
) -> bool:
|
|
30
|
-
"""
|
|
31
|
-
|
|
167
|
+
"""
|
|
168
|
+
Pull objects by CID from IPFS into objects_dir (loose objects).
|
|
169
|
+
Uses GET gateway_url/ipfs/<cid>.
|
|
170
|
+
"""
|
|
171
|
+
try:
|
|
172
|
+
import urllib.request
|
|
173
|
+
|
|
174
|
+
url = gateway_url.rstrip("/") + "/ipfs/" + cid
|
|
175
|
+
req = urllib.request.Request(url, method="GET")
|
|
176
|
+
with urllib.request.urlopen(req, timeout=60) as resp:
|
|
177
|
+
if resp.status != 200:
|
|
178
|
+
return False
|
|
179
|
+
data = resp.read()
|
|
180
|
+
except Exception:
|
|
181
|
+
try:
|
|
182
|
+
import requests
|
|
183
|
+
|
|
184
|
+
url = gateway_url.rstrip("/") + "/ipfs/" + cid
|
|
185
|
+
r = requests.get(url, timeout=60)
|
|
186
|
+
if r.status_code != 200:
|
|
187
|
+
return False
|
|
188
|
+
data = r.content
|
|
189
|
+
except Exception:
|
|
190
|
+
return False
|
|
191
|
+
written = _unbundle_objects(data, objects_dir)
|
|
192
|
+
return written > 0
|
|
32
193
|
|
|
33
194
|
|
|
34
195
|
def parse_ipfs_url(url: str) -> Optional[str]:
|
memvcs/core/knowledge_graph.py
CHANGED
|
@@ -84,7 +84,14 @@ class KnowledgeGraphBuilder:
|
|
|
84
84
|
1. Wikilinks: [[filename]] references
|
|
85
85
|
2. Semantic similarity: Using embeddings
|
|
86
86
|
3. Shared tags: Files with common tags
|
|
87
|
-
4. Co-occurrence:
|
|
87
|
+
4. Co-occurrence: Files that mention the same entity (e.g. same section/session)
|
|
88
|
+
5. Causal: Phrases like "caused by", "because of" linking concepts (when derivable)
|
|
89
|
+
6. Entity: Person/place/thing links (simple keyword or pattern)
|
|
90
|
+
|
|
91
|
+
Incremental updates: To update when new files are added without full rebuild,
|
|
92
|
+
filter the file list to new/changed paths, run build_graph logic for that subset,
|
|
93
|
+
and merge new nodes/edges into the existing graph (or re-run build_graph; cost is
|
|
94
|
+
linear in file count).
|
|
88
95
|
"""
|
|
89
96
|
|
|
90
97
|
# Pattern for wikilinks: [[target]] or [[target|display text]]
|
|
@@ -262,7 +269,22 @@ class KnowledgeGraphBuilder:
|
|
|
262
269
|
except Exception:
|
|
263
270
|
pass # Skip similarity if vector store fails
|
|
264
271
|
|
|
272
|
+
# Add co-occurrence edges (files sharing entities)
|
|
273
|
+
try:
|
|
274
|
+
edges.extend(self._build_cooccurrence_edges(file_paths, file_contents))
|
|
275
|
+
except Exception:
|
|
276
|
+
pass
|
|
277
|
+
|
|
278
|
+
# Add causal edges (phrases like "caused by", "because of" linking to another file)
|
|
279
|
+
try:
|
|
280
|
+
edges.extend(self._build_causal_edges(file_contents))
|
|
281
|
+
except Exception:
|
|
282
|
+
pass
|
|
283
|
+
|
|
265
284
|
# Build metadata
|
|
285
|
+
edge_type_counts = defaultdict(int)
|
|
286
|
+
for e in edges:
|
|
287
|
+
edge_type_counts[e.edge_type] += 1
|
|
266
288
|
metadata = {
|
|
267
289
|
"total_nodes": len(nodes),
|
|
268
290
|
"total_edges": len(edges),
|
|
@@ -274,15 +296,64 @@ class KnowledgeGraphBuilder:
|
|
|
274
296
|
1 for n in nodes if n.memory_type not in ["episodic", "semantic", "procedural"]
|
|
275
297
|
),
|
|
276
298
|
},
|
|
277
|
-
"edge_types":
|
|
278
|
-
"reference": sum(1 for e in edges if e.edge_type == "reference"),
|
|
279
|
-
"similarity": sum(1 for e in edges if e.edge_type == "similarity"),
|
|
280
|
-
"same_topic": sum(1 for e in edges if e.edge_type == "same_topic"),
|
|
281
|
-
},
|
|
299
|
+
"edge_types": dict(edge_type_counts),
|
|
282
300
|
}
|
|
283
301
|
|
|
284
302
|
return KnowledgeGraphData(nodes=nodes, edges=edges, metadata=metadata)
|
|
285
303
|
|
|
304
|
+
def _extract_entities_simple(self, content: str) -> Set[str]:
|
|
305
|
+
"""Extract simple entity tokens (capitalized words, key phrases) for co-occurrence."""
|
|
306
|
+
entities = set()
|
|
307
|
+
for word in re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", content):
|
|
308
|
+
if len(word) > 2:
|
|
309
|
+
entities.add(word.lower())
|
|
310
|
+
for phrase in ["user", "project", "agent", "memory", "preference", "workflow"]:
|
|
311
|
+
if phrase in content.lower():
|
|
312
|
+
entities.add(phrase)
|
|
313
|
+
return entities
|
|
314
|
+
|
|
315
|
+
def _build_cooccurrence_edges(
|
|
316
|
+
self, file_paths: List[str], file_contents: Dict[str, str]
|
|
317
|
+
) -> List[GraphEdge]:
|
|
318
|
+
"""Build edges between files that share at least one entity (co-occurrence)."""
|
|
319
|
+
file_entities: Dict[str, Set[str]] = {}
|
|
320
|
+
for path, content in file_contents.items():
|
|
321
|
+
file_entities[path] = self._extract_entities_simple(content)
|
|
322
|
+
edges = []
|
|
323
|
+
paths_list = list(file_paths)
|
|
324
|
+
for i, path1 in enumerate(paths_list):
|
|
325
|
+
for path2 in paths_list[i + 1 :]:
|
|
326
|
+
common = file_entities.get(path1, set()) & file_entities.get(path2, set())
|
|
327
|
+
if common:
|
|
328
|
+
w = min(1.0, 0.3 + 0.1 * len(common))
|
|
329
|
+
edge = GraphEdge(source=path1, target=path2, edge_type="co_occurrence", weight=w)
|
|
330
|
+
edges.append(edge)
|
|
331
|
+
if self._graph is not None:
|
|
332
|
+
self._graph.add_edge(path1, path2, type="co_occurrence", weight=w)
|
|
333
|
+
return edges
|
|
334
|
+
|
|
335
|
+
def _build_causal_edges(self, file_contents: Dict[str, str]) -> List[GraphEdge]:
|
|
336
|
+
"""Build edges when content has causal phrases linking to another file (e.g. caused by [[X]])."""
|
|
337
|
+
causal_phrases = re.compile(
|
|
338
|
+
r"(?:caused by|because of|led to|due to)\s+(?:\[\[([^\]]+)\]\]|(\w+))",
|
|
339
|
+
re.IGNORECASE,
|
|
340
|
+
)
|
|
341
|
+
edges = []
|
|
342
|
+
for source_path, content in file_contents.items():
|
|
343
|
+
for m in causal_phrases.finditer(content):
|
|
344
|
+
target = m.group(1) or m.group(2)
|
|
345
|
+
if not target:
|
|
346
|
+
continue
|
|
347
|
+
target_path = self._normalize_link_target(target.strip(), source_path)
|
|
348
|
+
if target_path and target_path in file_contents and target_path != source_path:
|
|
349
|
+
edge = GraphEdge(
|
|
350
|
+
source=source_path, target=target_path, edge_type="causal", weight=0.7
|
|
351
|
+
)
|
|
352
|
+
edges.append(edge)
|
|
353
|
+
if self._graph is not None:
|
|
354
|
+
self._graph.add_edge(source_path, target_path, type="causal", weight=0.7)
|
|
355
|
+
return edges
|
|
356
|
+
|
|
286
357
|
def _build_similarity_edges(
|
|
287
358
|
self, file_paths: List[str], file_contents: Dict[str, str], threshold: float
|
|
288
359
|
) -> List[GraphEdge]:
|
memvcs/core/objects.py
CHANGED
|
@@ -83,7 +83,7 @@ class ObjectStore:
|
|
|
83
83
|
|
|
84
84
|
def retrieve(self, hash_id: str, obj_type: str) -> Optional[bytes]:
|
|
85
85
|
"""
|
|
86
|
-
Retrieve content by hash ID.
|
|
86
|
+
Retrieve content by hash ID (loose object or pack).
|
|
87
87
|
|
|
88
88
|
Args:
|
|
89
89
|
hash_id: SHA-256 hash of the object
|
|
@@ -94,31 +94,41 @@ class ObjectStore:
|
|
|
94
94
|
"""
|
|
95
95
|
obj_path = self._get_object_path(hash_id, obj_type)
|
|
96
96
|
|
|
97
|
-
if
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
97
|
+
if obj_path.exists():
|
|
98
|
+
raw = obj_path.read_bytes()
|
|
99
|
+
# Optionally decrypt (iv+tag minimum 12+16 bytes)
|
|
100
|
+
if self._encryptor and len(raw) >= 12 + 16:
|
|
101
|
+
try:
|
|
102
|
+
raw = self._encryptor.decrypt_payload(raw)
|
|
103
|
+
except Exception:
|
|
104
|
+
pass # legacy plain compressed
|
|
105
|
+
full_content = zlib.decompress(raw)
|
|
106
|
+
null_idx = full_content.index(b"\0")
|
|
107
|
+
content = full_content[null_idx + 1 :]
|
|
108
|
+
return content
|
|
109
|
+
|
|
110
|
+
# Try pack file when loose object missing
|
|
111
|
+
try:
|
|
112
|
+
from .pack import retrieve_from_pack
|
|
113
|
+
result = retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type)
|
|
114
|
+
if result is not None:
|
|
115
|
+
return result[1]
|
|
116
|
+
except Exception:
|
|
117
|
+
pass
|
|
118
|
+
return None
|
|
115
119
|
|
|
116
120
|
def exists(self, hash_id: str, obj_type: str) -> bool:
|
|
117
|
-
"""Check if an object exists. Returns False for invalid hash (no raise)."""
|
|
121
|
+
"""Check if an object exists (loose or pack). Returns False for invalid hash (no raise)."""
|
|
118
122
|
if not _valid_object_hash(hash_id):
|
|
119
123
|
return False
|
|
120
124
|
obj_path = self._get_object_path(hash_id, obj_type)
|
|
121
|
-
|
|
125
|
+
if obj_path.exists():
|
|
126
|
+
return True
|
|
127
|
+
try:
|
|
128
|
+
from .pack import retrieve_from_pack
|
|
129
|
+
return retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type) is not None
|
|
130
|
+
except Exception:
|
|
131
|
+
return False
|
|
122
132
|
|
|
123
133
|
def delete(self, hash_id: str, obj_type: str) -> bool:
|
|
124
134
|
"""Delete an object. Returns True if deleted, False if not found."""
|