roampal 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. roampal/__init__.py +29 -0
  2. roampal/__main__.py +6 -0
  3. roampal/backend/__init__.py +1 -0
  4. roampal/backend/modules/__init__.py +1 -0
  5. roampal/backend/modules/memory/__init__.py +43 -0
  6. roampal/backend/modules/memory/chromadb_adapter.py +623 -0
  7. roampal/backend/modules/memory/config.py +102 -0
  8. roampal/backend/modules/memory/content_graph.py +543 -0
  9. roampal/backend/modules/memory/context_service.py +455 -0
  10. roampal/backend/modules/memory/embedding_service.py +96 -0
  11. roampal/backend/modules/memory/knowledge_graph_service.py +1052 -0
  12. roampal/backend/modules/memory/memory_bank_service.py +433 -0
  13. roampal/backend/modules/memory/memory_types.py +296 -0
  14. roampal/backend/modules/memory/outcome_service.py +400 -0
  15. roampal/backend/modules/memory/promotion_service.py +473 -0
  16. roampal/backend/modules/memory/routing_service.py +444 -0
  17. roampal/backend/modules/memory/scoring_service.py +324 -0
  18. roampal/backend/modules/memory/search_service.py +646 -0
  19. roampal/backend/modules/memory/tests/__init__.py +1 -0
  20. roampal/backend/modules/memory/tests/conftest.py +12 -0
  21. roampal/backend/modules/memory/tests/unit/__init__.py +1 -0
  22. roampal/backend/modules/memory/tests/unit/conftest.py +7 -0
  23. roampal/backend/modules/memory/tests/unit/test_knowledge_graph_service.py +517 -0
  24. roampal/backend/modules/memory/tests/unit/test_memory_bank_service.py +504 -0
  25. roampal/backend/modules/memory/tests/unit/test_outcome_service.py +485 -0
  26. roampal/backend/modules/memory/tests/unit/test_scoring_service.py +255 -0
  27. roampal/backend/modules/memory/tests/unit/test_search_service.py +413 -0
  28. roampal/backend/modules/memory/tests/unit/test_unified_memory_system.py +418 -0
  29. roampal/backend/modules/memory/unified_memory_system.py +1277 -0
  30. roampal/cli.py +638 -0
  31. roampal/hooks/__init__.py +16 -0
  32. roampal/hooks/session_manager.py +587 -0
  33. roampal/hooks/stop_hook.py +176 -0
  34. roampal/hooks/user_prompt_submit_hook.py +103 -0
  35. roampal/mcp/__init__.py +7 -0
  36. roampal/mcp/server.py +611 -0
  37. roampal/server/__init__.py +7 -0
  38. roampal/server/main.py +744 -0
  39. roampal-0.1.4.dist-info/METADATA +179 -0
  40. roampal-0.1.4.dist-info/RECORD +44 -0
  41. roampal-0.1.4.dist-info/WHEEL +5 -0
  42. roampal-0.1.4.dist-info/entry_points.txt +2 -0
  43. roampal-0.1.4.dist-info/licenses/LICENSE +190 -0
  44. roampal-0.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,102 @@
1
+ """
2
+ Memory System Configuration
3
+
4
+ Centralizes all magic numbers and configuration constants for the memory system.
5
+ Extracted from UnifiedMemorySystem lines 202-206, 709, 993, 398, 4221, and scattered
6
+ hardcoded values throughout the codebase.
7
+
8
+ This replaces scattered class constants with a single, configurable dataclass.
9
+ """
10
+
11
+ from dataclasses import dataclass, field
12
+ from typing import Optional
13
+
14
+
15
+ @dataclass
16
+ class MemoryConfig:
17
+ """
18
+ Configuration for the unified memory system.
19
+
20
+ All values have production defaults matching the original codebase.
21
+ Can be overridden for testing or different deployment scenarios.
22
+ """
23
+
24
+ # Threshold constants (from lines 202-206)
25
+ high_value_threshold: float = 0.9
26
+ """Memories above this score are preserved beyond retention period"""
27
+
28
+ promotion_score_threshold: float = 0.7
29
+ """Minimum score for working->history or history->patterns promotion"""
30
+
31
+ demotion_score_threshold: float = 0.4
32
+ """Below this, patterns demote to history"""
33
+
34
+ deletion_score_threshold: float = 0.2
35
+ """Below this, history items are deleted"""
36
+
37
+ new_item_deletion_threshold: float = 0.1
38
+ """More lenient deletion threshold for items < 7 days old"""
39
+
40
+ # Search and scoring (from lines 709, 1580-1581)
41
+ cross_encoder_blend_ratio: float = 0.6
42
+ """Weight given to cross-encoder in final ranking (0.4 original, 0.6 cross-encoder)"""
43
+
44
+ embedding_weight_proven: float = 0.2
45
+ """Embedding weight for proven memories (high use count)"""
46
+
47
+ learned_weight_proven: float = 0.8
48
+ """Learned/outcome weight for proven memories"""
49
+
50
+ embedding_weight_new: float = 0.8
51
+ """Embedding weight for new memories (low use count)"""
52
+
53
+ learned_weight_new: float = 0.2
54
+ """Learned/outcome weight for new memories"""
55
+
56
+ # Multipliers (from lines 1359, 1398, 1413, 1421)
57
+ search_multiplier: int = 3
58
+ """
59
+ Fetch limit * search_multiplier results for better ranking.
60
+ Currently hardcoded as `limit * 3` in 4 locations.
61
+ """
62
+
63
+ # Storage limits (from line 4221)
64
+ max_memory_bank_items: int = 1000
65
+ """Maximum items in memory_bank collection"""
66
+
67
+ # Timing (from line 398)
68
+ kg_debounce_seconds: int = 5
69
+ """Debounce window for knowledge graph saves"""
70
+
71
+ # Default values (from line 993)
72
+ default_importance: float = 0.7
73
+ """Default importance for new memories"""
74
+
75
+ default_confidence: float = 0.7
76
+ """Default confidence for new memories"""
77
+
78
+ # Promotion timing
79
+ promotion_use_threshold: int = 3
80
+ """Minimum uses before considering for promotion"""
81
+
82
+ promotion_age_days: int = 7
83
+ """Minimum age in days before considering for promotion"""
84
+
85
+ # Retention periods
86
+ working_memory_retention_hours: int = 24
87
+ """Working memory cleanup threshold"""
88
+
89
+ history_retention_days: int = 30
90
+ """History cleanup threshold for low-value items"""
91
+
92
+ # Cross-encoder settings
93
+ cross_encoder_top_k: int = 30
94
+ """Number of candidates to rerank with cross-encoder"""
95
+
96
+ # Wilson score
97
+ wilson_confidence: float = 0.95
98
+ """Confidence level for Wilson score calculation"""
99
+
100
+
101
+ # Default configuration instance
102
+ DEFAULT_CONFIG = MemoryConfig()
@@ -0,0 +1,543 @@
1
+ """
2
+ Content Knowledge Graph - Indexes entity relationships from memory_bank content.
3
+
4
+ Part of dual KG system:
5
+ - Routing KG: Query patterns → collection routing decisions
6
+ - Content KG: Memory content → entity relationships (THIS FILE)
7
+
8
+ Author: Roampal AI
9
+ Date: 2025-11-06
10
+ Version: v0.2.0
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ from typing import Dict, List, Set, Tuple, Any, Optional, Callable
16
+ from collections import defaultdict, deque
17
+ from datetime import datetime
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class ContentGraph:
23
+ """
24
+ Content-based knowledge graph that indexes entity relationships from memory_bank content.
25
+
26
+ Key differences from routing KG:
27
+ - Data source: memory_bank text content (not queries)
28
+ - Purpose: Entity relationship mapping (not search routing)
29
+ - Storage: content_graph.json (separate from knowledge_graph.json)
30
+
31
+ Graph structure:
32
+ {
33
+ "entities": {
34
+ "logan": {
35
+ "mentions": 12,
36
+ "collections": {"memory_bank": 12},
37
+ "documents": ["doc_id_1", "doc_id_2", ...],
38
+ "first_seen": "2025-11-06T10:30:00",
39
+ "last_seen": "2025-11-06T14:45:00"
40
+ }
41
+ },
42
+ "relationships": {
43
+ "logan__everbright": {
44
+ "entities": ["logan", "everbright"],
45
+ "strength": 8.0,
46
+ "co_occurrences": 8,
47
+ "documents": ["doc_id_1", "doc_id_2", ...],
48
+ "first_seen": "2025-11-06T10:30:00",
49
+ "last_seen": "2025-11-06T14:45:00"
50
+ }
51
+ },
52
+ "metadata": {
53
+ "version": "0.2.0",
54
+ "created": "2025-11-06T10:00:00",
55
+ "last_updated": "2025-11-06T14:45:00",
56
+ "total_documents": 29,
57
+ "total_entities": 47,
58
+ "total_relationships": 123
59
+ }
60
+ }
61
+ """
62
+
63
+ def __init__(self):
64
+ """Initialize empty content graph."""
65
+ self.entities: Dict[str, Dict[str, Any]] = {}
66
+ self.relationships: Dict[str, Dict[str, Any]] = {}
67
+ self.entity_metadata: Dict[str, Dict[str, Any]] = {}
68
+ self.metadata = {
69
+ "version": "0.2.0",
70
+ "created": datetime.now().isoformat(),
71
+ "last_updated": datetime.now().isoformat(),
72
+ "total_documents": 0,
73
+ "total_entities": 0,
74
+ "total_relationships": 0
75
+ }
76
+
77
+ # Document tracking for relationship building
78
+ self._doc_entities: Dict[str, Set[str]] = defaultdict(set)
79
+
80
+ logger.info("ContentGraph initialized (empty)")
81
+
82
+ def add_entities_from_text(
83
+ self,
84
+ text: str,
85
+ doc_id: str,
86
+ collection: str,
87
+ extract_concepts_fn: Callable[[str], List[str]],
88
+ quality_score: Optional[float] = None
89
+ ) -> List[str]:
90
+ """
91
+ Extract entities from text and index them in the content graph.
92
+
93
+ Args:
94
+ text: Raw text content to analyze
95
+ doc_id: Unique document identifier
96
+ collection: Collection name (e.g., "memory_bank")
97
+ extract_concepts_fn: Function to extract concepts (reuses _extract_concepts from UnifiedMemorySystem)
98
+ quality_score: Optional quality score (importance × confidence) for this document's entities
99
+
100
+ Returns:
101
+ List of extracted entity names
102
+
103
+ Example:
104
+ >>> cg = ContentGraph()
105
+ >>> entities = cg.add_entities_from_text(
106
+ ... text="User prefers Docker Compose for local development environments",
107
+ ... doc_id="mem_001",
108
+ ... collection="memory_bank",
109
+ ... extract_concepts_fn=memory_system._extract_concepts
110
+ ... )
111
+ >>> entities
112
+ ['docker', 'compose', 'local', 'development', 'environments']
113
+ """
114
+ # Extract concepts using same logic as routing KG (consistency)
115
+ concepts = extract_concepts_fn(text)
116
+
117
+ # Filter to meaningful entities (exclude very short/generic terms)
118
+ entities = [c for c in concepts if len(c) >= 3 and not c.isdigit()]
119
+
120
+ if not entities:
121
+ logger.debug(f"No entities extracted from doc {doc_id}")
122
+ return []
123
+
124
+ now = datetime.now().isoformat()
125
+
126
+ # Index each entity
127
+ for entity in entities:
128
+ if entity not in self.entities:
129
+ self.entities[entity] = {
130
+ "mentions": 0,
131
+ "collections": defaultdict(int),
132
+ "documents": [],
133
+ "first_seen": now,
134
+ "last_seen": now,
135
+ "total_quality": 0.0,
136
+ "avg_quality": 0.0
137
+ }
138
+
139
+ # Update entity metadata
140
+ self.entities[entity]["mentions"] += 1
141
+ self.entities[entity]["collections"][collection] += 1
142
+ self.entities[entity]["last_seen"] = now
143
+
144
+ if doc_id not in self.entities[entity]["documents"]:
145
+ self.entities[entity]["documents"].append(doc_id)
146
+
147
+ # Update quality scores if provided
148
+ if quality_score is not None:
149
+ self.entities[entity]["total_quality"] += quality_score
150
+ self.entities[entity]["avg_quality"] = (
151
+ self.entities[entity]["total_quality"] / self.entities[entity]["mentions"]
152
+ )
153
+
154
+ # Track entities in this document for relationship building
155
+ self._doc_entities[doc_id].update(entities)
156
+
157
+ # Build relationships (co-occurrence within same document)
158
+ self._build_relationships(entities, doc_id, now)
159
+
160
+ # Update metadata
161
+ self.metadata["last_updated"] = now
162
+ self.metadata["total_entities"] = len(self.entities)
163
+ self.metadata["total_relationships"] = len(self.relationships)
164
+
165
+ logger.info(f"Indexed {len(entities)} entities from doc {doc_id}: {entities[:5]}{'...' if len(entities) > 5 else ''}")
166
+
167
+ return entities
168
+
169
+ def _build_relationships(self, entities: List[str], doc_id: str, timestamp: str):
170
+ """
171
+ Build entity relationships based on co-occurrence in the same document.
172
+
173
+ Args:
174
+ entities: List of entities found in document
175
+ doc_id: Document identifier
176
+ timestamp: ISO timestamp for tracking
177
+
178
+ Example:
179
+ If doc contains ["logan", "everbright", "operations_manager"],
180
+ creates relationships:
181
+ - logan <-> everbright
182
+ - logan <-> operations_manager
183
+ - everbright <-> operations_manager
184
+ """
185
+ # Create pairwise relationships (undirected)
186
+ unique_entities = list(set(entities))
187
+
188
+ for i in range(len(unique_entities)):
189
+ for j in range(i + 1, len(unique_entities)):
190
+ entity_a = unique_entities[i]
191
+ entity_b = unique_entities[j]
192
+
193
+ # Create canonical relationship ID (sorted to ensure consistency)
194
+ rel_id = "__".join(sorted([entity_a, entity_b]))
195
+
196
+ if rel_id not in self.relationships:
197
+ self.relationships[rel_id] = {
198
+ "entities": sorted([entity_a, entity_b]),
199
+ "strength": 0.0,
200
+ "co_occurrences": 0,
201
+ "documents": [],
202
+ "first_seen": timestamp,
203
+ "last_seen": timestamp
204
+ }
205
+
206
+ # Update relationship metadata
207
+ self.relationships[rel_id]["co_occurrences"] += 1
208
+ self.relationships[rel_id]["last_seen"] = timestamp
209
+
210
+ if doc_id not in self.relationships[rel_id]["documents"]:
211
+ self.relationships[rel_id]["documents"].append(doc_id)
212
+
213
+ # Calculate relationship strength (co-occurrence count with decay)
214
+ # Simple formula: log2(co_occurrences + 1) for diminishing returns
215
+ co_occur = self.relationships[rel_id]["co_occurrences"]
216
+ self.relationships[rel_id]["strength"] = round(
217
+ (co_occur ** 0.5) * 2.0, # sqrt for diminishing returns, * 2 for scaling
218
+ 2
219
+ )
220
+
221
+ def remove_entity_mention(self, doc_id: str):
222
+ """
223
+ Remove a document's entity mentions (when memory archived/deleted).
224
+
225
+ Args:
226
+ doc_id: Document identifier to remove
227
+
228
+ Notes:
229
+ - Decrements mention counts
230
+ - Removes doc from entity.documents
231
+ - Removes relationships if no more co-occurrences
232
+ - Does NOT delete entities (preserves historical context)
233
+ """
234
+ if doc_id not in self._doc_entities:
235
+ logger.debug(f"Doc {doc_id} not tracked in content graph")
236
+ return
237
+
238
+ entities_in_doc = self._doc_entities[doc_id]
239
+ now = datetime.now().isoformat()
240
+
241
+ # Decrement entity mentions
242
+ for entity in entities_in_doc:
243
+ if entity in self.entities:
244
+ self.entities[entity]["mentions"] -= 1
245
+
246
+ if doc_id in self.entities[entity]["documents"]:
247
+ self.entities[entity]["documents"].remove(doc_id)
248
+
249
+ # Keep entity metadata even if mentions reach 0 (historical context)
250
+
251
+ # Remove relationships involving these entities
252
+ rels_to_remove = []
253
+ for rel_id, rel_data in self.relationships.items():
254
+ if doc_id in rel_data["documents"]:
255
+ rel_data["documents"].remove(doc_id)
256
+ rel_data["co_occurrences"] -= 1
257
+
258
+ # Recalculate strength
259
+ if rel_data["co_occurrences"] > 0:
260
+ co_occur = rel_data["co_occurrences"]
261
+ rel_data["strength"] = round((co_occur ** 0.5) * 2.0, 2)
262
+ else:
263
+ # Mark for deletion if no more co-occurrences
264
+ rels_to_remove.append(rel_id)
265
+
266
+ # Clean up relationships with 0 co-occurrences
267
+ for rel_id in rels_to_remove:
268
+ del self.relationships[rel_id]
269
+
270
+ # Remove from tracking
271
+ del self._doc_entities[doc_id]
272
+
273
+ # Update metadata
274
+ self.metadata["last_updated"] = now
275
+ self.metadata["total_relationships"] = len(self.relationships)
276
+
277
+ logger.info(f"Removed doc {doc_id} from content graph ({len(entities_in_doc)} entities, {len(rels_to_remove)} relationships)")
278
+
279
+ def get_entity_relationships(self, entity: str, min_strength: float = 0.0) -> List[Dict[str, Any]]:
280
+ """
281
+ Get all relationships for a specific entity.
282
+
283
+ Args:
284
+ entity: Entity name to query
285
+ min_strength: Minimum relationship strength threshold
286
+
287
+ Returns:
288
+ List of relationships with connected entities and metadata
289
+
290
+ Example:
291
+ >>> cg.get_entity_relationships("logan", min_strength=2.0)
292
+ [
293
+ {
294
+ "related_entity": "everbright",
295
+ "strength": 5.66,
296
+ "co_occurrences": 8,
297
+ "documents": ["mem_001", "mem_003", ...],
298
+ "first_seen": "2025-11-06T10:30:00",
299
+ "last_seen": "2025-11-06T14:45:00"
300
+ },
301
+ ...
302
+ ]
303
+ """
304
+ if entity not in self.entities:
305
+ logger.debug(f"Entity '{entity}' not found in content graph")
306
+ return []
307
+
308
+ relationships = []
309
+
310
+ for rel_id, rel_data in self.relationships.items():
311
+ if entity in rel_data["entities"] and rel_data["strength"] >= min_strength:
312
+ # Find the other entity in the relationship
313
+ related_entity = [e for e in rel_data["entities"] if e != entity][0]
314
+
315
+ relationships.append({
316
+ "related_entity": related_entity,
317
+ "strength": rel_data["strength"],
318
+ "co_occurrences": rel_data["co_occurrences"],
319
+ "documents": rel_data["documents"],
320
+ "first_seen": rel_data["first_seen"],
321
+ "last_seen": rel_data["last_seen"]
322
+ })
323
+
324
+ # Sort by strength (descending)
325
+ relationships.sort(key=lambda x: x["strength"], reverse=True)
326
+
327
+ logger.debug(f"Found {len(relationships)} relationships for entity '{entity}'")
328
+ return relationships
329
+
330
+ def find_path(
331
+ self,
332
+ from_entity: str,
333
+ to_entity: str,
334
+ max_depth: int = 3
335
+ ) -> Optional[List[str]]:
336
+ """
337
+ Find shortest path between two entities using BFS.
338
+
339
+ Args:
340
+ from_entity: Starting entity
341
+ to_entity: Target entity
342
+ max_depth: Maximum path length to search
343
+
344
+ Returns:
345
+ List of entities forming path, or None if no path found
346
+
347
+ Example:
348
+ >>> cg.find_path("logan", "everbright", max_depth=3)
349
+ ["logan", "everbright"] # Direct connection
350
+
351
+ >>> cg.find_path("logan", "solar_energy", max_depth=3)
352
+ ["logan", "everbright", "solar_energy"] # 2-hop connection
353
+ """
354
+ if from_entity not in self.entities or to_entity not in self.entities:
355
+ logger.debug(f"Entity not found: from='{from_entity}' to='{to_entity}'")
356
+ return None
357
+
358
+ if from_entity == to_entity:
359
+ return [from_entity]
360
+
361
+ # BFS to find shortest path
362
+ queue = deque([(from_entity, [from_entity])])
363
+ visited = {from_entity}
364
+
365
+ while queue:
366
+ current, path = queue.popleft()
367
+
368
+ if len(path) > max_depth:
369
+ continue
370
+
371
+ # Get neighbors
372
+ for rel_id, rel_data in self.relationships.items():
373
+ if current in rel_data["entities"]:
374
+ # Find connected entity
375
+ neighbor = [e for e in rel_data["entities"] if e != current][0]
376
+
377
+ if neighbor == to_entity:
378
+ # Found target
379
+ result_path = path + [neighbor]
380
+ logger.info(f"Found path: {' -> '.join(result_path)}")
381
+ return result_path
382
+
383
+ if neighbor not in visited:
384
+ visited.add(neighbor)
385
+ queue.append((neighbor, path + [neighbor]))
386
+
387
+ logger.debug(f"No path found between '{from_entity}' and '{to_entity}' (max_depth={max_depth})")
388
+ return None
389
+
390
+ def get_all_entities(self, min_mentions: int = 1) -> List[Dict[str, Any]]:
391
+ """
392
+ Get all entities with metadata.
393
+
394
+ Args:
395
+ min_mentions: Minimum mention count threshold
396
+
397
+ Returns:
398
+ List of entities with full metadata
399
+ """
400
+ entities = [
401
+ {
402
+ "entity": name, # Changed from "name" to "entity" for consistency with routing KG
403
+ **data
404
+ }
405
+ for name, data in self.entities.items()
406
+ if data["mentions"] >= min_mentions
407
+ ]
408
+
409
+ # Sort by avg_quality (descending), fallback to mentions for backward compatibility
410
+ entities.sort(key=lambda x: x.get("avg_quality", 0.0), reverse=True)
411
+
412
+ return entities
413
+
414
+ def get_stats(self) -> Dict[str, Any]:
415
+ """
416
+ Get content graph statistics.
417
+
418
+ Returns:
419
+ Dictionary with graph metrics
420
+ """
421
+ return {
422
+ "total_entities": len(self.entities),
423
+ "total_relationships": len(self.relationships),
424
+ "total_documents": len(self._doc_entities),
425
+ "avg_mentions_per_entity": round(
426
+ sum(e["mentions"] for e in self.entities.values()) / len(self.entities), 2
427
+ ) if self.entities else 0.0,
428
+ "avg_relationships_per_entity": round(
429
+ (len(self.relationships) * 2) / len(self.entities), 2
430
+ ) if self.entities else 0.0,
431
+ "strongest_relationship": max(
432
+ (r for r in self.relationships.values()),
433
+ key=lambda x: x["strength"],
434
+ default=None
435
+ ),
436
+ "most_mentioned_entity": max(
437
+ self.entities.items(),
438
+ key=lambda x: x[1]["mentions"],
439
+ default=(None, None)
440
+ )[0],
441
+ "metadata": self.metadata
442
+ }
443
+
444
+ def to_dict(self) -> Dict[str, Any]:
445
+ """
446
+ Serialize content graph to dictionary.
447
+
448
+ Returns:
449
+ Complete graph data as dictionary
450
+ """
451
+ # Convert defaultdicts to regular dicts for JSON serialization
452
+ serializable_entities = {}
453
+ for name, data in self.entities.items():
454
+ entity_copy = data.copy()
455
+ if isinstance(entity_copy.get("collections"), defaultdict):
456
+ entity_copy["collections"] = dict(entity_copy["collections"])
457
+ serializable_entities[name] = entity_copy
458
+
459
+ return {
460
+ "entities": serializable_entities,
461
+ "relationships": self.relationships,
462
+ "metadata": self.metadata
463
+ }
464
+
465
+ @classmethod
466
+ def from_dict(cls, data: Dict[str, Any]) -> "ContentGraph":
467
+ """
468
+ Deserialize content graph from dictionary.
469
+
470
+ Args:
471
+ data: Graph data dictionary
472
+
473
+ Returns:
474
+ ContentGraph instance
475
+ """
476
+ graph = cls()
477
+
478
+ # Load entities
479
+ graph.entities = {}
480
+ for name, entity_data in data.get("entities", {}).items():
481
+ entity_copy = entity_data.copy()
482
+ # Convert collections dict back to defaultdict
483
+ if "collections" in entity_copy:
484
+ entity_copy["collections"] = defaultdict(int, entity_copy["collections"])
485
+ graph.entities[name] = entity_copy
486
+
487
+ # Load relationships
488
+ graph.relationships = data.get("relationships", {})
489
+
490
+ # Load metadata
491
+ graph.metadata = data.get("metadata", graph.metadata)
492
+
493
+ # Rebuild doc_entities tracking
494
+ for entity_name, entity_data in graph.entities.items():
495
+ for doc_id in entity_data.get("documents", []):
496
+ graph._doc_entities[doc_id].add(entity_name)
497
+
498
+ logger.info(
499
+ f"ContentGraph loaded: {len(graph.entities)} entities, "
500
+ f"{len(graph.relationships)} relationships, "
501
+ f"{len(graph._doc_entities)} documents"
502
+ )
503
+
504
+ return graph
505
+
506
+ def save_to_file(self, filepath: str):
507
+ """
508
+ Save content graph to JSON file.
509
+
510
+ Args:
511
+ filepath: Path to save file
512
+ """
513
+ try:
514
+ with open(filepath, "w", encoding="utf-8") as f:
515
+ json.dump(self.to_dict(), f, indent=2, ensure_ascii=False)
516
+ logger.info(f"ContentGraph saved to {filepath}")
517
+ except Exception as e:
518
+ logger.error(f"Failed to save ContentGraph to {filepath}: {e}")
519
+ raise
520
+
521
+ @classmethod
522
+ def load_from_file(cls, filepath: str) -> "ContentGraph":
523
+ """
524
+ Load content graph from JSON file.
525
+
526
+ Args:
527
+ filepath: Path to load file
528
+
529
+ Returns:
530
+ ContentGraph instance
531
+ """
532
+ try:
533
+ with open(filepath, "r", encoding="utf-8") as f:
534
+ data = json.load(f)
535
+ graph = cls.from_dict(data)
536
+ logger.info(f"ContentGraph loaded from {filepath}")
537
+ return graph
538
+ except FileNotFoundError:
539
+ logger.warning(f"ContentGraph file not found: {filepath}, creating new graph")
540
+ return cls()
541
+ except Exception as e:
542
+ logger.error(f"Failed to load ContentGraph from {filepath}: {e}")
543
+ raise