agmem 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/METADATA +20 -3
  2. agmem-0.1.2.dist-info/RECORD +86 -0
  3. memvcs/__init__.py +1 -1
  4. memvcs/cli.py +35 -31
  5. memvcs/commands/__init__.py +9 -9
  6. memvcs/commands/add.py +77 -76
  7. memvcs/commands/blame.py +46 -53
  8. memvcs/commands/branch.py +13 -33
  9. memvcs/commands/checkout.py +27 -32
  10. memvcs/commands/clean.py +18 -23
  11. memvcs/commands/clone.py +4 -1
  12. memvcs/commands/commit.py +40 -39
  13. memvcs/commands/daemon.py +81 -76
  14. memvcs/commands/decay.py +77 -0
  15. memvcs/commands/diff.py +56 -57
  16. memvcs/commands/distill.py +74 -0
  17. memvcs/commands/fsck.py +55 -61
  18. memvcs/commands/garden.py +28 -37
  19. memvcs/commands/graph.py +41 -48
  20. memvcs/commands/init.py +16 -24
  21. memvcs/commands/log.py +25 -40
  22. memvcs/commands/merge.py +16 -28
  23. memvcs/commands/pack.py +129 -0
  24. memvcs/commands/pull.py +4 -1
  25. memvcs/commands/push.py +4 -2
  26. memvcs/commands/recall.py +145 -0
  27. memvcs/commands/reflog.py +13 -22
  28. memvcs/commands/remote.py +1 -0
  29. memvcs/commands/repair.py +66 -0
  30. memvcs/commands/reset.py +23 -33
  31. memvcs/commands/resurrect.py +82 -0
  32. memvcs/commands/search.py +3 -4
  33. memvcs/commands/serve.py +2 -1
  34. memvcs/commands/show.py +66 -36
  35. memvcs/commands/stash.py +34 -34
  36. memvcs/commands/status.py +27 -35
  37. memvcs/commands/tag.py +23 -47
  38. memvcs/commands/test.py +30 -44
  39. memvcs/commands/timeline.py +111 -0
  40. memvcs/commands/tree.py +26 -27
  41. memvcs/commands/verify.py +59 -0
  42. memvcs/commands/when.py +115 -0
  43. memvcs/core/access_index.py +167 -0
  44. memvcs/core/config_loader.py +3 -1
  45. memvcs/core/consistency.py +214 -0
  46. memvcs/core/decay.py +185 -0
  47. memvcs/core/diff.py +158 -143
  48. memvcs/core/distiller.py +277 -0
  49. memvcs/core/gardener.py +164 -132
  50. memvcs/core/hooks.py +48 -14
  51. memvcs/core/knowledge_graph.py +134 -138
  52. memvcs/core/merge.py +248 -171
  53. memvcs/core/objects.py +95 -96
  54. memvcs/core/pii_scanner.py +147 -146
  55. memvcs/core/refs.py +132 -115
  56. memvcs/core/repository.py +174 -164
  57. memvcs/core/schema.py +155 -113
  58. memvcs/core/staging.py +60 -65
  59. memvcs/core/storage/__init__.py +20 -18
  60. memvcs/core/storage/base.py +74 -70
  61. memvcs/core/storage/gcs.py +70 -68
  62. memvcs/core/storage/local.py +42 -40
  63. memvcs/core/storage/s3.py +105 -110
  64. memvcs/core/temporal_index.py +112 -0
  65. memvcs/core/test_runner.py +101 -93
  66. memvcs/core/vector_store.py +41 -35
  67. memvcs/integrations/mcp_server.py +1 -3
  68. memvcs/integrations/web_ui/server.py +25 -26
  69. memvcs/retrieval/__init__.py +22 -0
  70. memvcs/retrieval/base.py +54 -0
  71. memvcs/retrieval/pack.py +128 -0
  72. memvcs/retrieval/recaller.py +105 -0
  73. memvcs/retrieval/strategies.py +314 -0
  74. memvcs/utils/__init__.py +3 -3
  75. memvcs/utils/helpers.py +52 -52
  76. agmem-0.1.1.dist-info/RECORD +0 -67
  77. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/WHEEL +0 -0
  78. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/entry_points.txt +0 -0
  79. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/licenses/LICENSE +0 -0
  80. {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/top_level.txt +0 -0
memvcs/core/hooks.py CHANGED
@@ -16,15 +16,16 @@ PII_SEVERITY_HIGH = "high"
16
16
  @dataclass
17
17
  class HookResult:
18
18
  """Result of running hooks."""
19
+
19
20
  success: bool
20
21
  errors: List[str] = field(default_factory=list)
21
22
  warnings: List[str] = field(default_factory=list)
22
-
23
+
23
24
  def add_error(self, message: str):
24
25
  """Add an error and mark as failed."""
25
26
  self.errors.append(message)
26
27
  self.success = False
27
-
28
+
28
29
  def add_warning(self, message: str):
29
30
  """Add a warning (doesn't affect success)."""
30
31
  self.warnings.append(message)
@@ -39,6 +40,7 @@ def _pii_staged_files_to_scan(repo, staged_files: Dict[str, Any]) -> Dict[str, A
39
40
  """Return staged files to scan for PII (excludes allowlisted paths)."""
40
41
  try:
41
42
  from .config_loader import load_agmem_config, pii_enabled, pii_allowlist
43
+
42
44
  config = load_agmem_config(getattr(repo, "root", None))
43
45
  except ImportError:
44
46
  return staged_files
@@ -58,6 +60,7 @@ def _run_pii_hook(repo, staged_files: Dict[str, Any], result: HookResult) -> Non
58
60
  """Run PII scanner on staged files; high severity → error, else → warning."""
59
61
  try:
60
62
  from .pii_scanner import PIIScanner
63
+
61
64
  to_scan = _pii_staged_files_to_scan(repo, staged_files)
62
65
  pii_result = PIIScanner.scan_staged_files(repo, to_scan)
63
66
  if not pii_result.has_issues:
@@ -77,11 +80,11 @@ def _run_pii_hook(repo, staged_files: Dict[str, Any], result: HookResult) -> Non
77
80
  def run_pre_commit_hooks(repo, staged_files: Dict[str, Any]) -> HookResult:
78
81
  """
79
82
  Run all pre-commit hooks on staged files.
80
-
83
+
81
84
  Args:
82
85
  repo: Repository instance
83
86
  staged_files: Dict of staged files with their info
84
-
87
+
85
88
  Returns:
86
89
  HookResult with success status and any errors/warnings
87
90
  """
@@ -93,42 +96,42 @@ def run_pre_commit_hooks(repo, staged_files: Dict[str, Any]) -> HookResult:
93
96
  result.add_error(error)
94
97
  for warning in file_type_result.warnings:
95
98
  result.add_warning(warning)
96
-
99
+
97
100
  return result
98
101
 
99
102
 
100
103
  def validate_file_types(repo, staged_files: Dict[str, Any]) -> HookResult:
101
104
  """
102
105
  Validate that staged files are allowed types.
103
-
106
+
104
107
  Args:
105
108
  repo: Repository instance
106
109
  staged_files: Dict of staged files
107
-
110
+
108
111
  Returns:
109
112
  HookResult with validation status
110
113
  """
111
114
  result = HookResult(success=True)
112
-
115
+
113
116
  # Get config for allowed extensions
114
117
  config = repo.get_config()
115
- allowed_extensions = config.get('allowed_extensions', ['.md', '.txt', '.json', '.yaml', '.yml'])
116
-
118
+ allowed_extensions = config.get("allowed_extensions", [".md", ".txt", ".json", ".yaml", ".yml"])
119
+
117
120
  for filepath in staged_files.keys():
118
121
  path = Path(filepath)
119
122
  ext = path.suffix.lower()
120
-
123
+
121
124
  # Skip files without extensions (might be valid)
122
125
  if not ext:
123
126
  continue
124
-
127
+
125
128
  # Check if extension is allowed
126
129
  if ext not in allowed_extensions:
127
130
  result.add_warning(
128
131
  f"File '{filepath}' has extension '{ext}' which may not be optimal for memory storage. "
129
132
  f"Recommended: {', '.join(allowed_extensions)}"
130
133
  )
131
-
134
+
132
135
  return result
133
136
 
134
137
 
@@ -139,7 +142,7 @@ _registered_hooks: List[Callable] = []
139
142
  def register_hook(hook_fn: Callable):
140
143
  """
141
144
  Register a custom pre-commit hook.
142
-
145
+
143
146
  Args:
144
147
  hook_fn: Function that takes (repo, staged_files) and returns HookResult
145
148
  """
@@ -149,3 +152,34 @@ def register_hook(hook_fn: Callable):
149
152
  def get_registered_hooks() -> List[Callable]:
150
153
  """Get all registered hooks."""
151
154
  return _registered_hooks.copy()
155
+
156
+
157
+ def compute_suggested_importance(
158
+ repo: Any,
159
+ staged_files: Dict[str, Any],
160
+ message: str,
161
+ metadata: Optional[Dict[str, Any]] = None,
162
+ ) -> float:
163
+ """
164
+ Compute suggested importance score from heuristics.
165
+
166
+ Scoring factors: user emphasis in message, source authority (auto_commit), etc.
167
+
168
+ Returns:
169
+ Float 0.0-1.0; default 0.5 if no heuristics match.
170
+ """
171
+ metadata = metadata or {}
172
+ message_lower = message.lower()
173
+
174
+ # auto_commit or gardener → lower authority
175
+ if metadata.get("auto_commit") or metadata.get("gardener"):
176
+ return 0.5
177
+
178
+ # User emphasis heuristics
179
+ if "important" in message_lower or "important:" in message_lower:
180
+ return 0.8
181
+ if "remember" in message_lower or "remember this" in message_lower:
182
+ return 0.7
183
+
184
+ # Default
185
+ return 0.5
@@ -13,6 +13,7 @@ from collections import defaultdict
13
13
 
14
14
  try:
15
15
  import networkx as nx
16
+
16
17
  NETWORKX_AVAILABLE = True
17
18
  except ImportError:
18
19
  NETWORKX_AVAILABLE = False
@@ -21,53 +22,56 @@ except ImportError:
21
22
  @dataclass
22
23
  class GraphNode:
23
24
  """A node in the knowledge graph (represents a memory file)."""
25
+
24
26
  id: str # File path
25
27
  label: str # Display name
26
28
  memory_type: str # episodic, semantic, procedural
27
29
  size: int # Content size
28
30
  tags: List[str] = field(default_factory=list)
29
-
31
+
30
32
  def to_dict(self) -> Dict[str, Any]:
31
33
  return {
32
- 'id': self.id,
33
- 'label': self.label,
34
- 'type': self.memory_type,
35
- 'size': self.size,
36
- 'tags': self.tags
34
+ "id": self.id,
35
+ "label": self.label,
36
+ "type": self.memory_type,
37
+ "size": self.size,
38
+ "tags": self.tags,
37
39
  }
38
40
 
39
41
 
40
42
  @dataclass
41
43
  class GraphEdge:
42
44
  """An edge in the knowledge graph (represents a connection)."""
45
+
43
46
  source: str
44
47
  target: str
45
48
  edge_type: str # "reference", "similarity", "same_topic"
46
49
  weight: float = 1.0
47
-
50
+
48
51
  def to_dict(self) -> Dict[str, Any]:
49
52
  return {
50
- 'source': self.source,
51
- 'target': self.target,
52
- 'type': self.edge_type,
53
- 'weight': self.weight
53
+ "source": self.source,
54
+ "target": self.target,
55
+ "type": self.edge_type,
56
+ "weight": self.weight,
54
57
  }
55
58
 
56
59
 
57
60
  @dataclass
58
61
  class KnowledgeGraphData:
59
62
  """Complete graph data for export."""
63
+
60
64
  nodes: List[GraphNode]
61
65
  edges: List[GraphEdge]
62
66
  metadata: Dict[str, Any] = field(default_factory=dict)
63
-
67
+
64
68
  def to_dict(self) -> Dict[str, Any]:
65
69
  return {
66
- 'nodes': [n.to_dict() for n in self.nodes],
67
- 'edges': [e.to_dict() for e in self.edges],
68
- 'metadata': self.metadata
70
+ "nodes": [n.to_dict() for n in self.nodes],
71
+ "edges": [e.to_dict() for e in self.edges],
72
+ "metadata": self.metadata,
69
73
  }
70
-
74
+
71
75
  def to_json(self, indent: int = 2) -> str:
72
76
  return json.dumps(self.to_dict(), indent=indent)
73
77
 
@@ -75,106 +79,108 @@ class KnowledgeGraphData:
75
79
  class KnowledgeGraphBuilder:
76
80
  """
77
81
  Builds a knowledge graph from memory files.
78
-
82
+
79
83
  Detects connections through:
80
84
  1. Wikilinks: [[filename]] references
81
85
  2. Semantic similarity: Using embeddings
82
86
  3. Shared tags: Files with common tags
83
87
  """
84
-
88
+
85
89
  # Pattern for wikilinks: [[target]] or [[target|display text]]
86
- WIKILINK_PATTERN = re.compile(r'\[\[([^\]|]+)(?:\|[^\]]+)?\]\]')
87
-
90
+ WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]")
91
+
88
92
  def __init__(self, repo, vector_store=None):
89
93
  """
90
94
  Initialize the graph builder.
91
-
95
+
92
96
  Args:
93
97
  repo: Repository instance
94
98
  vector_store: Optional VectorStore for semantic similarity
95
99
  """
96
100
  self.repo = repo
97
101
  self.vector_store = vector_store
98
- self.current_dir = repo.root / 'current'
99
-
102
+ self.current_dir = repo.root / "current"
103
+
100
104
  self._graph = None
101
105
  if NETWORKX_AVAILABLE:
102
106
  self._graph = nx.DiGraph()
103
-
107
+
104
108
  def _detect_memory_type(self, filepath: str) -> str:
105
109
  """Detect memory type from file path."""
106
110
  path_lower = filepath.lower()
107
- if 'episodic' in path_lower:
108
- return 'episodic'
109
- elif 'semantic' in path_lower:
110
- return 'semantic'
111
- elif 'procedural' in path_lower:
112
- return 'procedural'
113
- elif 'checkpoint' in path_lower:
114
- return 'checkpoints'
115
- elif 'session-summar' in path_lower:
116
- return 'session-summaries'
117
- return 'unknown'
118
-
111
+ if "episodic" in path_lower:
112
+ return "episodic"
113
+ elif "semantic" in path_lower:
114
+ return "semantic"
115
+ elif "procedural" in path_lower:
116
+ return "procedural"
117
+ elif "checkpoint" in path_lower:
118
+ return "checkpoints"
119
+ elif "session-summar" in path_lower:
120
+ return "session-summaries"
121
+ return "unknown"
122
+
119
123
  def _extract_wikilinks(self, content: str) -> Set[str]:
120
124
  """Extract wikilink targets from content."""
121
125
  matches = self.WIKILINK_PATTERN.findall(content)
122
126
  return set(matches)
123
-
127
+
124
128
  def _extract_tags_from_frontmatter(self, content: str) -> List[str]:
125
129
  """Extract tags from YAML frontmatter."""
126
130
  try:
127
131
  import yaml
128
132
  from .schema import FrontmatterParser
129
-
133
+
130
134
  fm, _ = FrontmatterParser.parse(content)
131
135
  if fm and fm.tags:
132
136
  return fm.tags
133
137
  except Exception:
134
138
  pass
135
139
  return []
136
-
140
+
137
141
  def _normalize_link_target(self, target: str, source_path: str) -> Optional[str]:
138
142
  """
139
143
  Normalize a wikilink target to a file path.
140
-
144
+
141
145
  Args:
142
146
  target: Wikilink target (e.g., "user-preferences")
143
147
  source_path: Path of the source file
144
-
148
+
145
149
  Returns:
146
150
  Normalized file path or None if not found
147
151
  """
148
152
  # Try exact match
149
- for ext in ['.md', '.txt', '']:
153
+ for ext in [".md", ".txt", ""]:
150
154
  check_path = self.current_dir / (target + ext)
151
155
  if check_path.exists():
152
156
  return str(check_path.relative_to(self.current_dir))
153
-
157
+
154
158
  # Try in same directory as source
155
159
  source_dir = Path(source_path).parent
156
- for ext in ['.md', '.txt', '']:
160
+ for ext in [".md", ".txt", ""]:
157
161
  check_path = self.current_dir / source_dir / (target + ext)
158
162
  if check_path.exists():
159
163
  return str(check_path.relative_to(self.current_dir))
160
-
164
+
161
165
  # Try in common directories
162
- for subdir in ['semantic', 'episodic', 'procedural']:
163
- for ext in ['.md', '.txt', '']:
166
+ for subdir in ["semantic", "episodic", "procedural"]:
167
+ for ext in [".md", ".txt", ""]:
164
168
  check_path = self.current_dir / subdir / (target + ext)
165
169
  if check_path.exists():
166
170
  return str(check_path.relative_to(self.current_dir))
167
-
171
+
168
172
  return None
169
-
170
- def build_graph(self, include_similarity: bool = True, similarity_threshold: float = 0.7) -> KnowledgeGraphData:
173
+
174
+ def build_graph(
175
+ self, include_similarity: bool = True, similarity_threshold: float = 0.7
176
+ ) -> KnowledgeGraphData:
171
177
  """
172
178
  Build the knowledge graph from memory files.
173
-
179
+
174
180
  Args:
175
181
  include_similarity: Include similarity-based edges
176
182
  similarity_threshold: Minimum similarity for edges (0-1)
177
-
183
+
178
184
  Returns:
179
185
  KnowledgeGraphData with nodes and edges
180
186
  """
@@ -183,42 +189,42 @@ class KnowledgeGraphBuilder:
183
189
  file_paths = []
184
190
  file_contents = {}
185
191
  file_tags = defaultdict(list)
186
-
192
+
187
193
  # Collect all memory files
188
194
  if not self.current_dir.exists():
189
195
  return KnowledgeGraphData(nodes=[], edges=[])
190
-
191
- for memory_file in self.current_dir.glob('**/*.md'):
196
+
197
+ for memory_file in self.current_dir.glob("**/*.md"):
192
198
  try:
193
199
  rel_path = str(memory_file.relative_to(self.current_dir))
194
200
  content = memory_file.read_text()
195
-
201
+
196
202
  # Create node
197
203
  memory_type = self._detect_memory_type(rel_path)
198
204
  tags = self._extract_tags_from_frontmatter(content)
199
-
205
+
200
206
  node = GraphNode(
201
207
  id=rel_path,
202
208
  label=memory_file.stem,
203
209
  memory_type=memory_type,
204
210
  size=len(content),
205
- tags=tags
211
+ tags=tags,
206
212
  )
207
213
  nodes.append(node)
208
214
  file_paths.append(rel_path)
209
215
  file_contents[rel_path] = content
210
-
216
+
211
217
  # Index tags
212
218
  for tag in tags:
213
219
  file_tags[tag].append(rel_path)
214
-
220
+
215
221
  # Add to NetworkX graph if available
216
222
  if self._graph is not None:
217
223
  self._graph.add_node(rel_path, **node.to_dict())
218
-
224
+
219
225
  except Exception:
220
226
  continue
221
-
227
+
222
228
  # Add wikilink edges
223
229
  for source_path, content in file_contents.items():
224
230
  links = self._extract_wikilinks(content)
@@ -226,62 +232,62 @@ class KnowledgeGraphBuilder:
226
232
  target_path = self._normalize_link_target(target, source_path)
227
233
  if target_path and target_path in file_contents:
228
234
  edge = GraphEdge(
229
- source=source_path,
230
- target=target_path,
231
- edge_type='reference',
232
- weight=1.0
235
+ source=source_path, target=target_path, edge_type="reference", weight=1.0
233
236
  )
234
237
  edges.append(edge)
235
-
238
+
236
239
  if self._graph is not None:
237
- self._graph.add_edge(source_path, target_path, type='reference', weight=1.0)
238
-
240
+ self._graph.add_edge(source_path, target_path, type="reference", weight=1.0)
241
+
239
242
  # Add tag-based edges
240
243
  for tag, files in file_tags.items():
241
244
  if len(files) > 1:
242
245
  for i, file1 in enumerate(files):
243
- for file2 in files[i+1:]:
246
+ for file2 in files[i + 1 :]:
244
247
  edge = GraphEdge(
245
- source=file1,
246
- target=file2,
247
- edge_type='same_topic',
248
- weight=0.5
248
+ source=file1, target=file2, edge_type="same_topic", weight=0.5
249
249
  )
250
250
  edges.append(edge)
251
-
251
+
252
252
  if self._graph is not None:
253
- self._graph.add_edge(file1, file2, type='same_topic', weight=0.5)
254
-
253
+ self._graph.add_edge(file1, file2, type="same_topic", weight=0.5)
254
+
255
255
  # Add similarity edges
256
256
  if include_similarity and self.vector_store and len(file_paths) > 1:
257
257
  try:
258
- edges.extend(self._build_similarity_edges(file_paths, file_contents, similarity_threshold))
258
+ edges.extend(
259
+ self._build_similarity_edges(file_paths, file_contents, similarity_threshold)
260
+ )
259
261
  except Exception:
260
262
  pass # Skip similarity if vector store fails
261
-
263
+
262
264
  # Build metadata
263
265
  metadata = {
264
- 'total_nodes': len(nodes),
265
- 'total_edges': len(edges),
266
- 'memory_types': {
267
- 'episodic': sum(1 for n in nodes if n.memory_type == 'episodic'),
268
- 'semantic': sum(1 for n in nodes if n.memory_type == 'semantic'),
269
- 'procedural': sum(1 for n in nodes if n.memory_type == 'procedural'),
270
- 'other': sum(1 for n in nodes if n.memory_type not in ['episodic', 'semantic', 'procedural']),
266
+ "total_nodes": len(nodes),
267
+ "total_edges": len(edges),
268
+ "memory_types": {
269
+ "episodic": sum(1 for n in nodes if n.memory_type == "episodic"),
270
+ "semantic": sum(1 for n in nodes if n.memory_type == "semantic"),
271
+ "procedural": sum(1 for n in nodes if n.memory_type == "procedural"),
272
+ "other": sum(
273
+ 1 for n in nodes if n.memory_type not in ["episodic", "semantic", "procedural"]
274
+ ),
275
+ },
276
+ "edge_types": {
277
+ "reference": sum(1 for e in edges if e.edge_type == "reference"),
278
+ "similarity": sum(1 for e in edges if e.edge_type == "similarity"),
279
+ "same_topic": sum(1 for e in edges if e.edge_type == "same_topic"),
271
280
  },
272
- 'edge_types': {
273
- 'reference': sum(1 for e in edges if e.edge_type == 'reference'),
274
- 'similarity': sum(1 for e in edges if e.edge_type == 'similarity'),
275
- 'same_topic': sum(1 for e in edges if e.edge_type == 'same_topic'),
276
- }
277
281
  }
278
-
282
+
279
283
  return KnowledgeGraphData(nodes=nodes, edges=edges, metadata=metadata)
280
-
281
- def _build_similarity_edges(self, file_paths: List[str], file_contents: Dict[str, str], threshold: float) -> List[GraphEdge]:
284
+
285
+ def _build_similarity_edges(
286
+ self, file_paths: List[str], file_contents: Dict[str, str], threshold: float
287
+ ) -> List[GraphEdge]:
282
288
  """Build edges based on semantic similarity."""
283
289
  edges = []
284
-
290
+
285
291
  # Get embeddings for all files
286
292
  embeddings = {}
287
293
  for path, content in file_contents.items():
@@ -292,90 +298,80 @@ class KnowledgeGraphBuilder:
292
298
  embeddings[path] = emb
293
299
  except Exception:
294
300
  continue
295
-
301
+
296
302
  # Compute pairwise similarities
297
303
  import math
298
-
304
+
299
305
  def cosine_similarity(a: List[float], b: List[float]) -> float:
300
- dot = sum(x*y for x, y in zip(a, b))
301
- norm_a = math.sqrt(sum(x*x for x in a))
302
- norm_b = math.sqrt(sum(x*x for x in b))
306
+ dot = sum(x * y for x, y in zip(a, b))
307
+ norm_a = math.sqrt(sum(x * x for x in a))
308
+ norm_b = math.sqrt(sum(x * x for x in b))
303
309
  if norm_a == 0 or norm_b == 0:
304
310
  return 0
305
311
  return dot / (norm_a * norm_b)
306
-
312
+
307
313
  paths_list = list(embeddings.keys())
308
314
  for i, path1 in enumerate(paths_list):
309
- for path2 in paths_list[i+1:]:
315
+ for path2 in paths_list[i + 1 :]:
310
316
  sim = cosine_similarity(embeddings[path1], embeddings[path2])
311
317
  if sim >= threshold:
312
- edge = GraphEdge(
313
- source=path1,
314
- target=path2,
315
- edge_type='similarity',
316
- weight=sim
317
- )
318
+ edge = GraphEdge(source=path1, target=path2, edge_type="similarity", weight=sim)
318
319
  edges.append(edge)
319
-
320
+
320
321
  if self._graph is not None:
321
- self._graph.add_edge(path1, path2, type='similarity', weight=sim)
322
-
322
+ self._graph.add_edge(path1, path2, type="similarity", weight=sim)
323
+
323
324
  return edges
324
-
325
+
325
326
  def find_isolated_nodes(self) -> List[str]:
326
327
  """Find nodes with no connections (knowledge islands)."""
327
328
  if self._graph is None or len(self._graph) == 0:
328
329
  return []
329
-
330
+
330
331
  # Convert to undirected for analysis
331
332
  undirected = self._graph.to_undirected()
332
333
  return [node for node in undirected.nodes() if undirected.degree(node) == 0]
333
-
334
+
334
335
  def find_potential_contradictions(self) -> List[Tuple[str, str, float]]:
335
336
  """
336
337
  Find files that might have contradictory information.
337
-
338
+
338
339
  Returns files in the same topic cluster with low similarity.
339
340
  """
340
341
  if self._graph is None:
341
342
  return []
342
-
343
+
343
344
  contradictions = []
344
-
345
+
345
346
  # Files connected by same_topic but with low similarity
346
347
  for u, v, data in self._graph.edges(data=True):
347
- if data.get('type') == 'same_topic':
348
+ if data.get("type") == "same_topic":
348
349
  # Check if there's also a similarity edge
349
350
  sim_edge = self._graph.get_edge_data(u, v)
350
- if sim_edge and sim_edge.get('type') == 'similarity':
351
- if sim_edge.get('weight', 1.0) < 0.3:
352
- contradictions.append((u, v, sim_edge.get('weight', 0)))
353
-
351
+ if sim_edge and sim_edge.get("type") == "similarity":
352
+ if sim_edge.get("weight", 1.0) < 0.3:
353
+ contradictions.append((u, v, sim_edge.get("weight", 0)))
354
+
354
355
  return contradictions
355
-
356
+
356
357
  def export_for_d3(self) -> str:
357
358
  """Export graph in D3.js force-graph format."""
358
359
  graph_data = self.build_graph()
359
-
360
+
360
361
  d3_format = {
361
- 'nodes': [
362
+ "nodes": [
362
363
  {
363
- 'id': n.id,
364
- 'name': n.label,
365
- 'group': n.memory_type,
366
- 'size': min(20, max(5, n.size // 100))
364
+ "id": n.id,
365
+ "name": n.label,
366
+ "group": n.memory_type,
367
+ "size": min(20, max(5, n.size // 100)),
367
368
  }
368
369
  for n in graph_data.nodes
369
370
  ],
370
- 'links': [
371
- {
372
- 'source': e.source,
373
- 'target': e.target,
374
- 'type': e.edge_type,
375
- 'value': e.weight
376
- }
371
+ "links": [
372
+ {"source": e.source, "target": e.target, "type": e.edge_type, "value": e.weight}
377
373
  for e in graph_data.edges
378
- ]
374
+ ],
379
375
  }
380
-
376
+
381
377
  return json.dumps(d3_format, indent=2)