agmem 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ import os
12
12
  from pathlib import Path
13
13
  from typing import Optional, List, Tuple, Any, Dict
14
14
 
15
- from .objects import ObjectStore, Tree, Commit
15
+ from .objects import ObjectStore, Tree, Commit, Blob
16
16
 
17
17
  # Ed25519 via cryptography (optional)
18
18
  try:
@@ -239,6 +239,17 @@ def verify_commit(
239
239
  stored_sig = (commit.metadata or {}).get("signature")
240
240
  if not stored_root:
241
241
  return (False, "commit has no merkle_root (unverified)")
242
+
243
+ # Verify that blob objects can be loaded successfully (detects tampering in compressed/encrypted content)
244
+ blob_hashes = _collect_blob_hashes_from_tree(store, commit.tree)
245
+ for blob_hash in blob_hashes:
246
+ try:
247
+ blob = Blob.load(store, blob_hash)
248
+ if blob is None:
249
+ return (False, f"blob {blob_hash[:8]} corrupted or missing")
250
+ except Exception as e:
251
+ return (False, f"merkle_root mismatch (commit tampered)")
252
+
242
253
  computed_root = build_merkle_root_for_commit(store, commit_hash)
243
254
  if not computed_root:
244
255
  return (False, "could not build Merkle tree (missing tree/blobs)")
memvcs/core/distiller.py CHANGED
@@ -35,6 +35,9 @@ class DistillerConfig:
35
35
  llm_provider: Optional[str] = None
36
36
  llm_model: Optional[str] = None
37
37
  create_safety_branch: bool = True
38
+ use_dp: bool = False
39
+ dp_epsilon: Optional[float] = None
40
+ dp_delta: Optional[float] = None
38
41
 
39
42
 
40
43
  @dataclass
@@ -160,13 +163,18 @@ class Distiller:
160
163
  except ValueError:
161
164
  out_path = self.target_dir / f"consolidated-{ts}.md"
162
165
 
166
+ confidence_score = self.config.extraction_confidence_threshold
167
+ if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
168
+ from .privacy_budget import add_noise
169
+ confidence_score = add_noise(confidence_score, 0.1, self.config.dp_epsilon, self.config.dp_delta)
170
+ confidence_score = max(0.0, min(1.0, confidence_score))
163
171
  frontmatter = {
164
172
  "schema_version": "1.0",
165
173
  "last_updated": datetime.utcnow().isoformat() + "Z",
166
174
  "source_agent_id": "distiller",
167
175
  "memory_type": "semantic",
168
176
  "tags": cluster.tags + ["auto-generated", "consolidated"],
169
- "confidence_score": self.config.extraction_confidence_threshold,
177
+ "confidence_score": confidence_score,
170
178
  }
171
179
  body = f"# Consolidated: {cluster.topic}\n\n" + "\n".join(facts)
172
180
  if YAML_AVAILABLE:
@@ -266,11 +274,21 @@ class Distiller:
266
274
  except Exception:
267
275
  pass
268
276
 
277
+ clusters_processed = len(clusters)
278
+ facts_extracted = facts_count
279
+ episodes_archived = archived
280
+ if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
281
+ from .privacy_budget import add_noise
282
+ sensitivity = 1.0
283
+ clusters_processed = max(0, int(round(add_noise(float(clusters_processed), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
284
+ facts_extracted = max(0, int(round(add_noise(float(facts_extracted), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
285
+ episodes_archived = max(0, int(round(add_noise(float(episodes_archived), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
286
+
269
287
  return DistillerResult(
270
288
  success=True,
271
- clusters_processed=len(clusters),
272
- facts_extracted=facts_count,
273
- episodes_archived=archived,
289
+ clusters_processed=clusters_processed,
290
+ facts_extracted=facts_extracted,
291
+ episodes_archived=episodes_archived,
274
292
  branch_created=branch_name,
275
293
  commit_hash=commit_hash,
276
294
  message=f"Processed {len(clusters)} clusters, extracted {facts_count} facts",
memvcs/core/federated.py CHANGED
@@ -5,6 +5,7 @@ Agents share model updates or aggregated summaries instead of raw episodic logs.
5
5
  Optional coordinator URL; optional differential privacy (Tier 3).
6
6
  """
7
7
 
8
+ import hashlib
8
9
  import json
9
10
  from pathlib import Path
10
11
  from typing import Optional, List, Dict, Any
@@ -21,30 +22,90 @@ def get_federated_config(repo_root: Path) -> Optional[Dict[str, Any]]:
21
22
  url = fed.get("coordinator_url")
22
23
  if not url:
23
24
  return None
24
- return {
25
+ out = {
25
26
  "coordinator_url": url.rstrip("/"),
26
27
  "memory_types": fed.get("memory_types", ["episodic", "semantic"]),
27
28
  }
29
+ dp = fed.get("differential_privacy") or config.get("differential_privacy") or {}
30
+ if dp.get("enabled"):
31
+ out["use_dp"] = True
32
+ out["dp_epsilon"] = float(dp.get("epsilon", 0.1))
33
+ out["dp_delta"] = float(dp.get("delta", 1e-5))
34
+ else:
35
+ out["use_dp"] = False
36
+ return out
28
37
 
29
38
 
30
- def produce_local_summary(repo_root: Path, memory_types: List[str]) -> Dict[str, Any]:
39
+ def _normalize_for_hash(text: str) -> str:
40
+ """Normalize text for hashing (no raw content sent)."""
41
+ return " ".join(text.strip().split())
42
+
43
+
44
+ def _extract_topic_from_md(path: Path, content: str) -> str:
45
+ """Extract topic from frontmatter tags or first heading."""
46
+ if content.startswith("---"):
47
+ end = content.find("---", 3)
48
+ if end > 0:
49
+ try:
50
+ import yaml
51
+ fm = yaml.safe_load(content[3:end])
52
+ if isinstance(fm, dict):
53
+ tags = fm.get("tags", [])
54
+ if tags:
55
+ return str(tags[0])[:50]
56
+ except (ImportError, Exception):
57
+ pass
58
+ first_line = content.strip().split("\n")[0] if content.strip() else ""
59
+ if first_line.startswith("#"):
60
+ return first_line.lstrip("#").strip()[:50] or "untitled"
61
+ return "untitled"
62
+
63
+
64
+ def produce_local_summary(
65
+ repo_root: Path, memory_types: List[str], use_dp: bool = False, dp_epsilon: float = 0.1, dp_delta: float = 1e-5
66
+ ) -> Dict[str, Any]:
31
67
  """
32
68
  Produce a local summary from episodic/semantic data (no raw content).
33
- Returns dict suitable for sending to coordinator (e.g. topic counts, fact hashes).
69
+ Returns dict with topic counts and fact hashes suitable for coordinator.
34
70
  """
35
71
  current_dir = repo_root / "current"
36
- summary = {"memory_types": memory_types, "topics": {}, "fact_count": 0}
72
+ summary = {"memory_types": memory_types, "topics": {}, "topic_hashes": {}, "fact_count": 0}
73
+ all_fact_hashes: List[str] = []
74
+
37
75
  for mtype in memory_types:
38
76
  d = current_dir / mtype
39
77
  if not d.exists():
78
+ summary["topics"][mtype] = 0
79
+ summary["topic_hashes"][mtype] = []
40
80
  continue
41
- count = 0
81
+ topic_to_count: Dict[str, int] = {}
82
+ topic_to_hashes: Dict[str, List[str]] = {}
42
83
  for f in d.rglob("*.md"):
43
- if f.is_file():
44
- count += 1
45
- summary["topics"][mtype] = count
84
+ if not f.is_file():
85
+ continue
86
+ try:
87
+ content = f.read_text(encoding="utf-8", errors="replace")
88
+ except Exception:
89
+ continue
90
+ normalized = _normalize_for_hash(content)
91
+ if normalized:
92
+ h = hashlib.sha256(normalized.encode()).hexdigest()
93
+ all_fact_hashes.append(h)
94
+ topic = _extract_topic_from_md(f, content)
95
+ topic_to_count[topic] = topic_to_count.get(topic, 0) + 1
96
+ topic_to_hashes.setdefault(topic, []).append(h)
97
+ summary["topics"][mtype] = sum(topic_to_count.values())
98
+ summary["topic_hashes"][mtype] = list(topic_to_hashes.keys())
46
99
  if mtype == "semantic":
47
- summary["fact_count"] = count
100
+ summary["fact_count"] = len(all_fact_hashes)
101
+
102
+ if use_dp and dp_epsilon and dp_delta:
103
+ from .privacy_budget import add_noise
104
+ for mtype in summary["topics"]:
105
+ raw = summary["topics"][mtype]
106
+ summary["topics"][mtype] = max(0, int(round(add_noise(float(raw), 1.0, dp_epsilon, dp_delta))))
107
+ summary["fact_count"] = max(0, int(round(add_noise(float(summary["fact_count"]), 1.0, dp_epsilon, dp_delta))))
108
+
48
109
  return summary
49
110
 
50
111
 
memvcs/core/gardener.py CHANGED
@@ -43,6 +43,9 @@ class GardenerConfig:
43
43
  llm_provider: Optional[str] = None # "openai", "anthropic", etc.
44
44
  llm_model: Optional[str] = None
45
45
  auto_commit: bool = True
46
+ use_dp: bool = False
47
+ dp_epsilon: Optional[float] = None
48
+ dp_delta: Optional[float] = None
46
49
 
47
50
 
48
51
  @dataclass
@@ -351,14 +354,20 @@ class Gardener:
351
354
  except ValueError:
352
355
  insight_path = self.semantic_dir / f"insight-{timestamp}.md"
353
356
 
354
- # Generate frontmatter
357
+ # Generate frontmatter (optionally noised for differential privacy)
358
+ source_episodes = len(cluster.episodes)
359
+ if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
360
+ from .privacy_budget import add_noise
361
+ source_episodes = max(0, int(round(add_noise(
362
+ float(source_episodes), 1.0, self.config.dp_epsilon, self.config.dp_delta
363
+ ))))
355
364
  frontmatter = {
356
365
  "schema_version": "1.0",
357
366
  "last_updated": datetime.utcnow().isoformat() + "Z",
358
367
  "source_agent_id": "gardener",
359
368
  "memory_type": "semantic",
360
369
  "tags": cluster.tags + ["auto-generated", "insight"],
361
- "source_episodes": len(cluster.episodes),
370
+ "source_episodes": source_episodes,
362
371
  }
363
372
 
364
373
  # Write file
@@ -487,11 +496,21 @@ class Gardener:
487
496
  except Exception as e:
488
497
  print(f"Warning: Auto-commit failed: {e}")
489
498
 
499
+ clusters_found = len(clusters)
500
+ insights_generated = insights_written
501
+ episodes_archived = archived_count
502
+ if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
503
+ from .privacy_budget import add_noise
504
+ sensitivity = 1.0
505
+ clusters_found = max(0, int(round(add_noise(float(clusters_found), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
506
+ insights_generated = max(0, int(round(add_noise(float(insights_generated), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
507
+ episodes_archived = max(0, int(round(add_noise(float(episodes_archived), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
508
+
490
509
  return GardenerResult(
491
510
  success=True,
492
- clusters_found=len(clusters),
493
- insights_generated=insights_written,
494
- episodes_archived=archived_count,
511
+ clusters_found=clusters_found,
512
+ insights_generated=insights_generated,
513
+ episodes_archived=episodes_archived,
495
514
  commit_hash=commit_hash,
496
515
  message=f"Processed {len(clusters)} clusters, generated {insights_written} insights",
497
516
  )
@@ -1,25 +1,162 @@
1
1
  """
2
- IPFS remote for agmem (stub).
2
+ IPFS remote for agmem.
3
3
 
4
- Push/pull via CIDs; pinning; gateway fallback when daemon unavailable.
5
- Requires optional ipfs extra (ipfshttpclient or gateway requests).
4
+ Push/pull via CIDs using HTTP gateway (POST /api/v0/add, GET /ipfs/<cid>).
5
+ Optional ipfshttpclient for local daemon.
6
6
  """
7
7
 
8
+ import json
9
+ import struct
10
+ import zlib
8
11
  from pathlib import Path
9
- from typing import Optional, Set
12
+ from typing import Optional, Set, Dict, Tuple
10
13
 
11
14
  from .objects import ObjectStore
12
15
  from .remote import _collect_objects_from_commit
13
16
 
17
+ # Type byte for bundle (same as pack)
18
+ _TYPE_BLOB = 1
19
+ _TYPE_TREE = 2
20
+ _TYPE_COMMIT = 3
21
+ _TYPE_TAG = 4
22
+ _TYPE_TO_BYTE = {"blob": _TYPE_BLOB, "tree": _TYPE_TREE, "commit": _TYPE_COMMIT, "tag": _TYPE_TAG}
23
+ _BYTE_TO_TYPE = {v: k for k, v in _TYPE_TO_BYTE.items()}
24
+
25
+
26
+ def _get_object_type_and_content(store: ObjectStore, hash_id: str) -> Optional[Tuple[str, bytes]]:
27
+ """Return (obj_type, raw_content) for a hash, or None."""
28
+ for obj_type in ["commit", "tree", "blob", "tag"]:
29
+ content = store.retrieve(hash_id, obj_type)
30
+ if content is not None:
31
+ return (obj_type, content)
32
+ return None
33
+
34
+
35
+ def _bundle_objects(store: ObjectStore, hash_ids: Set[str]) -> bytes:
36
+ """Bundle objects into a single byte blob: count + [hash(32) type(1) len(4) zlib_payload]."""
37
+ entries = []
38
+ for h in sorted(hash_ids):
39
+ pair = _get_object_type_and_content(store, h)
40
+ if pair is None:
41
+ continue
42
+ obj_type, content = pair
43
+ header = f"{obj_type} {len(content)}\0".encode()
44
+ full = header + content
45
+ compressed = zlib.compress(full)
46
+ h_bin = bytes.fromhex(h) if len(h) == 64 else h.encode().ljust(32)[:32]
47
+ entries.append((h_bin, _TYPE_TO_BYTE.get(obj_type, _TYPE_BLOB), compressed))
48
+ parts = [struct.pack(">I", len(entries))]
49
+ for h_bin, type_byte, compressed in entries:
50
+ parts.append(h_bin)
51
+ parts.append(bytes([type_byte]))
52
+ parts.append(struct.pack(">I", len(compressed)))
53
+ parts.append(compressed)
54
+ return b"".join(parts)
55
+
56
+
57
+ def _unbundle_objects(data: bytes, objects_dir: Path) -> int:
58
+ """Unbundle and write loose objects. Returns count written."""
59
+ if len(data) < 4:
60
+ return 0
61
+ count = struct.unpack(">I", data[:4])[0]
62
+ offset = 4
63
+ written = 0
64
+ for _ in range(count):
65
+ if offset + 32 + 1 + 4 > len(data):
66
+ break
67
+ h_bin = data[offset : offset + 32]
68
+ offset += 32
69
+ type_byte = data[offset]
70
+ offset += 1
71
+ comp_len = struct.unpack(">I", data[offset : offset + 4])[0]
72
+ offset += 4
73
+ if offset + comp_len > len(data):
74
+ break
75
+ compressed = data[offset : offset + comp_len]
76
+ offset += comp_len
77
+ obj_type = _BYTE_TO_TYPE.get(type_byte)
78
+ if obj_type is None:
79
+ continue
80
+ try:
81
+ full = zlib.decompress(compressed)
82
+ except Exception:
83
+ continue
84
+ null_idx = full.index(b"\0")
85
+ # Validate header
86
+ prefix = full[:null_idx].decode()
87
+ if " " not in prefix:
88
+ continue
89
+ name, size_str = prefix.split(" ", 1)
90
+ hash_hex = h_bin.hex() if len(h_bin) == 32 else h_bin.decode().strip()
91
+ if len(hash_hex) < 4:
92
+ continue
93
+ obj_path = objects_dir / obj_type / hash_hex[:2] / hash_hex[2:]
94
+ obj_path.parent.mkdir(parents=True, exist_ok=True)
95
+ obj_path.write_bytes(compressed)
96
+ written += 1
97
+ return written
98
+
99
+
100
+ def _add_to_ipfs_gateway(bundle: bytes, gateway_url: str) -> Optional[str]:
101
+ """POST bundle to IPFS gateway /api/v0/add (multipart). Returns CID or None."""
102
+ boundary = "----agmem-boundary-" + str(abs(hash(bundle)))[:12]
103
+ body = (
104
+ b"--" + boundary.encode() + b"\r\n"
105
+ b'Content-Disposition: form-data; name="file"; filename="agmem-bundle.bin"\r\n'
106
+ b"Content-Type: application/octet-stream\r\n\r\n"
107
+ + bundle + b"\r\n"
108
+ b"--" + boundary.encode() + b"--\r\n"
109
+ )
110
+ try:
111
+ import urllib.request
112
+
113
+ url = gateway_url.rstrip("/") + "/api/v0/add"
114
+ req = urllib.request.Request(url, data=body, method="POST")
115
+ req.add_header("Content-Type", "multipart/form-data; boundary=" + boundary)
116
+ req.add_header("Content-Length", str(len(body)))
117
+ with urllib.request.urlopen(req, timeout=120) as resp:
118
+ if resp.status != 200:
119
+ return None
120
+ data = json.loads(resp.read().decode())
121
+ return data.get("Hash") or data.get("Name")
122
+ except Exception:
123
+ try:
124
+ import requests
125
+
126
+ url = gateway_url.rstrip("/") + "/api/v0/add"
127
+ r = requests.post(
128
+ url,
129
+ files={"file": ("agmem-bundle.bin", bundle, "application/octet-stream")},
130
+ timeout=120,
131
+ )
132
+ if r.status_code != 200:
133
+ return None
134
+ return r.json().get("Hash") or r.json().get("Name")
135
+ except Exception:
136
+ return None
137
+
14
138
 
15
139
  def push_to_ipfs(
16
140
  objects_dir: Path,
17
141
  branch: str,
18
142
  commit_hash: str,
19
143
  gateway_url: str = "https://ipfs.io",
144
+ store: Optional[ObjectStore] = None,
20
145
  ) -> Optional[str]:
21
- """Push branch objects to IPFS and return root CID. Stub: returns None until IPFS client added."""
22
- return None
146
+ """
147
+ Push branch objects to IPFS and return root CID.
148
+ Uses gateway POST /api/v0/add (multipart).
149
+ """
150
+ if store is None:
151
+ store = ObjectStore(objects_dir)
152
+ try:
153
+ reachable = _collect_objects_from_commit(store, commit_hash)
154
+ except Exception:
155
+ return None
156
+ if not reachable:
157
+ return None
158
+ bundle = _bundle_objects(store, reachable)
159
+ return _add_to_ipfs_gateway(bundle, gateway_url)
23
160
 
24
161
 
25
162
  def pull_from_ipfs(
@@ -27,8 +164,32 @@ def pull_from_ipfs(
27
164
  cid: str,
28
165
  gateway_url: str = "https://ipfs.io",
29
166
  ) -> bool:
30
- """Pull objects by CID from IPFS into objects_dir. Stub: returns False until IPFS client added."""
31
- return False
167
+ """
168
+ Pull objects by CID from IPFS into objects_dir (loose objects).
169
+ Uses GET gateway_url/ipfs/<cid>.
170
+ """
171
+ try:
172
+ import urllib.request
173
+
174
+ url = gateway_url.rstrip("/") + "/ipfs/" + cid
175
+ req = urllib.request.Request(url, method="GET")
176
+ with urllib.request.urlopen(req, timeout=60) as resp:
177
+ if resp.status != 200:
178
+ return False
179
+ data = resp.read()
180
+ except Exception:
181
+ try:
182
+ import requests
183
+
184
+ url = gateway_url.rstrip("/") + "/ipfs/" + cid
185
+ r = requests.get(url, timeout=60)
186
+ if r.status_code != 200:
187
+ return False
188
+ data = r.content
189
+ except Exception:
190
+ return False
191
+ written = _unbundle_objects(data, objects_dir)
192
+ return written > 0
32
193
 
33
194
 
34
195
  def parse_ipfs_url(url: str) -> Optional[str]:
@@ -84,7 +84,14 @@ class KnowledgeGraphBuilder:
84
84
  1. Wikilinks: [[filename]] references
85
85
  2. Semantic similarity: Using embeddings
86
86
  3. Shared tags: Files with common tags
87
- 4. Co-occurrence: Facts in same episodic session (optional)
87
+ 4. Co-occurrence: Files that mention the same entity (e.g. same section/session)
88
+ 5. Causal: Phrases like "caused by", "because of" linking concepts (when derivable)
89
+ 6. Entity: Person/place/thing links (simple keyword or pattern)
90
+
91
+ Incremental updates: To update when new files are added without full rebuild,
92
+ filter the file list to new/changed paths, run build_graph logic for that subset,
93
+ and merge new nodes/edges into the existing graph (or re-run build_graph; cost is
94
+ linear in file count).
88
95
  """
89
96
 
90
97
  # Pattern for wikilinks: [[target]] or [[target|display text]]
@@ -262,7 +269,22 @@ class KnowledgeGraphBuilder:
262
269
  except Exception:
263
270
  pass # Skip similarity if vector store fails
264
271
 
272
+ # Add co-occurrence edges (files sharing entities)
273
+ try:
274
+ edges.extend(self._build_cooccurrence_edges(file_paths, file_contents))
275
+ except Exception:
276
+ pass
277
+
278
+ # Add causal edges (phrases like "caused by", "because of" linking to another file)
279
+ try:
280
+ edges.extend(self._build_causal_edges(file_contents))
281
+ except Exception:
282
+ pass
283
+
265
284
  # Build metadata
285
+ edge_type_counts = defaultdict(int)
286
+ for e in edges:
287
+ edge_type_counts[e.edge_type] += 1
266
288
  metadata = {
267
289
  "total_nodes": len(nodes),
268
290
  "total_edges": len(edges),
@@ -274,15 +296,64 @@ class KnowledgeGraphBuilder:
274
296
  1 for n in nodes if n.memory_type not in ["episodic", "semantic", "procedural"]
275
297
  ),
276
298
  },
277
- "edge_types": {
278
- "reference": sum(1 for e in edges if e.edge_type == "reference"),
279
- "similarity": sum(1 for e in edges if e.edge_type == "similarity"),
280
- "same_topic": sum(1 for e in edges if e.edge_type == "same_topic"),
281
- },
299
+ "edge_types": dict(edge_type_counts),
282
300
  }
283
301
 
284
302
  return KnowledgeGraphData(nodes=nodes, edges=edges, metadata=metadata)
285
303
 
304
+ def _extract_entities_simple(self, content: str) -> Set[str]:
305
+ """Extract simple entity tokens (capitalized words, key phrases) for co-occurrence."""
306
+ entities = set()
307
+ for word in re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", content):
308
+ if len(word) > 2:
309
+ entities.add(word.lower())
310
+ for phrase in ["user", "project", "agent", "memory", "preference", "workflow"]:
311
+ if phrase in content.lower():
312
+ entities.add(phrase)
313
+ return entities
314
+
315
+ def _build_cooccurrence_edges(
316
+ self, file_paths: List[str], file_contents: Dict[str, str]
317
+ ) -> List[GraphEdge]:
318
+ """Build edges between files that share at least one entity (co-occurrence)."""
319
+ file_entities: Dict[str, Set[str]] = {}
320
+ for path, content in file_contents.items():
321
+ file_entities[path] = self._extract_entities_simple(content)
322
+ edges = []
323
+ paths_list = list(file_paths)
324
+ for i, path1 in enumerate(paths_list):
325
+ for path2 in paths_list[i + 1 :]:
326
+ common = file_entities.get(path1, set()) & file_entities.get(path2, set())
327
+ if common:
328
+ w = min(1.0, 0.3 + 0.1 * len(common))
329
+ edge = GraphEdge(source=path1, target=path2, edge_type="co_occurrence", weight=w)
330
+ edges.append(edge)
331
+ if self._graph is not None:
332
+ self._graph.add_edge(path1, path2, type="co_occurrence", weight=w)
333
+ return edges
334
+
335
+ def _build_causal_edges(self, file_contents: Dict[str, str]) -> List[GraphEdge]:
336
+ """Build edges when content has causal phrases linking to another file (e.g. caused by [[X]])."""
337
+ causal_phrases = re.compile(
338
+ r"(?:caused by|because of|led to|due to)\s+(?:\[\[([^\]]+)\]\]|(\w+))",
339
+ re.IGNORECASE,
340
+ )
341
+ edges = []
342
+ for source_path, content in file_contents.items():
343
+ for m in causal_phrases.finditer(content):
344
+ target = m.group(1) or m.group(2)
345
+ if not target:
346
+ continue
347
+ target_path = self._normalize_link_target(target.strip(), source_path)
348
+ if target_path and target_path in file_contents and target_path != source_path:
349
+ edge = GraphEdge(
350
+ source=source_path, target=target_path, edge_type="causal", weight=0.7
351
+ )
352
+ edges.append(edge)
353
+ if self._graph is not None:
354
+ self._graph.add_edge(source_path, target_path, type="causal", weight=0.7)
355
+ return edges
356
+
286
357
  def _build_similarity_edges(
287
358
  self, file_paths: List[str], file_contents: Dict[str, str], threshold: float
288
359
  ) -> List[GraphEdge]:
memvcs/core/objects.py CHANGED
@@ -83,7 +83,7 @@ class ObjectStore:
83
83
 
84
84
  def retrieve(self, hash_id: str, obj_type: str) -> Optional[bytes]:
85
85
  """
86
- Retrieve content by hash ID.
86
+ Retrieve content by hash ID (loose object or pack).
87
87
 
88
88
  Args:
89
89
  hash_id: SHA-256 hash of the object
@@ -94,31 +94,41 @@ class ObjectStore:
94
94
  """
95
95
  obj_path = self._get_object_path(hash_id, obj_type)
96
96
 
97
- if not obj_path.exists():
98
- return None
99
-
100
- raw = obj_path.read_bytes()
101
- # Optionally decrypt (iv+tag minimum 12+16 bytes)
102
- if self._encryptor and len(raw) >= 12 + 16:
103
- try:
104
- raw = self._encryptor.decrypt_payload(raw)
105
- except Exception:
106
- pass # legacy plain compressed
107
- full_content = zlib.decompress(raw)
108
-
109
- # Parse header
110
- null_idx = full_content.index(b"\0")
111
- header = full_content[:null_idx].decode()
112
- content = full_content[null_idx + 1 :]
113
-
114
- return content
97
+ if obj_path.exists():
98
+ raw = obj_path.read_bytes()
99
+ # Optionally decrypt (iv+tag minimum 12+16 bytes)
100
+ if self._encryptor and len(raw) >= 12 + 16:
101
+ try:
102
+ raw = self._encryptor.decrypt_payload(raw)
103
+ except Exception:
104
+ pass # legacy plain compressed
105
+ full_content = zlib.decompress(raw)
106
+ null_idx = full_content.index(b"\0")
107
+ content = full_content[null_idx + 1 :]
108
+ return content
109
+
110
+ # Try pack file when loose object missing
111
+ try:
112
+ from .pack import retrieve_from_pack
113
+ result = retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type)
114
+ if result is not None:
115
+ return result[1]
116
+ except Exception:
117
+ pass
118
+ return None
115
119
 
116
120
  def exists(self, hash_id: str, obj_type: str) -> bool:
117
- """Check if an object exists. Returns False for invalid hash (no raise)."""
121
+ """Check if an object exists (loose or pack). Returns False for invalid hash (no raise)."""
118
122
  if not _valid_object_hash(hash_id):
119
123
  return False
120
124
  obj_path = self._get_object_path(hash_id, obj_type)
121
- return obj_path.exists()
125
+ if obj_path.exists():
126
+ return True
127
+ try:
128
+ from .pack import retrieve_from_pack
129
+ return retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type) is not None
130
+ except Exception:
131
+ return False
122
132
 
123
133
  def delete(self, hash_id: str, obj_type: str) -> bool:
124
134
  """Delete an object. Returns True if deleted, False if not found."""