agmem 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/METADATA +20 -3
- agmem-0.1.2.dist-info/RECORD +86 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +35 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +77 -76
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +4 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +81 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +74 -0
- memvcs/commands/fsck.py +55 -61
- memvcs/commands/garden.py +28 -37
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +16 -28
- memvcs/commands/pack.py +129 -0
- memvcs/commands/pull.py +4 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +59 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/gardener.py +164 -132
- memvcs/core/hooks.py +48 -14
- memvcs/core/knowledge_graph.py +134 -138
- memvcs/core/merge.py +248 -171
- memvcs/core/objects.py +95 -96
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/refs.py +132 -115
- memvcs/core/repository.py +174 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +112 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/vector_store.py +41 -35
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.2.dist-info}/top_level.txt +0 -0
memvcs/core/hooks.py
CHANGED
|
@@ -16,15 +16,16 @@ PII_SEVERITY_HIGH = "high"
|
|
|
16
16
|
@dataclass
|
|
17
17
|
class HookResult:
|
|
18
18
|
"""Result of running hooks."""
|
|
19
|
+
|
|
19
20
|
success: bool
|
|
20
21
|
errors: List[str] = field(default_factory=list)
|
|
21
22
|
warnings: List[str] = field(default_factory=list)
|
|
22
|
-
|
|
23
|
+
|
|
23
24
|
def add_error(self, message: str):
|
|
24
25
|
"""Add an error and mark as failed."""
|
|
25
26
|
self.errors.append(message)
|
|
26
27
|
self.success = False
|
|
27
|
-
|
|
28
|
+
|
|
28
29
|
def add_warning(self, message: str):
|
|
29
30
|
"""Add a warning (doesn't affect success)."""
|
|
30
31
|
self.warnings.append(message)
|
|
@@ -39,6 +40,7 @@ def _pii_staged_files_to_scan(repo, staged_files: Dict[str, Any]) -> Dict[str, A
|
|
|
39
40
|
"""Return staged files to scan for PII (excludes allowlisted paths)."""
|
|
40
41
|
try:
|
|
41
42
|
from .config_loader import load_agmem_config, pii_enabled, pii_allowlist
|
|
43
|
+
|
|
42
44
|
config = load_agmem_config(getattr(repo, "root", None))
|
|
43
45
|
except ImportError:
|
|
44
46
|
return staged_files
|
|
@@ -58,6 +60,7 @@ def _run_pii_hook(repo, staged_files: Dict[str, Any], result: HookResult) -> Non
|
|
|
58
60
|
"""Run PII scanner on staged files; high severity → error, else → warning."""
|
|
59
61
|
try:
|
|
60
62
|
from .pii_scanner import PIIScanner
|
|
63
|
+
|
|
61
64
|
to_scan = _pii_staged_files_to_scan(repo, staged_files)
|
|
62
65
|
pii_result = PIIScanner.scan_staged_files(repo, to_scan)
|
|
63
66
|
if not pii_result.has_issues:
|
|
@@ -77,11 +80,11 @@ def _run_pii_hook(repo, staged_files: Dict[str, Any], result: HookResult) -> Non
|
|
|
77
80
|
def run_pre_commit_hooks(repo, staged_files: Dict[str, Any]) -> HookResult:
|
|
78
81
|
"""
|
|
79
82
|
Run all pre-commit hooks on staged files.
|
|
80
|
-
|
|
83
|
+
|
|
81
84
|
Args:
|
|
82
85
|
repo: Repository instance
|
|
83
86
|
staged_files: Dict of staged files with their info
|
|
84
|
-
|
|
87
|
+
|
|
85
88
|
Returns:
|
|
86
89
|
HookResult with success status and any errors/warnings
|
|
87
90
|
"""
|
|
@@ -93,42 +96,42 @@ def run_pre_commit_hooks(repo, staged_files: Dict[str, Any]) -> HookResult:
|
|
|
93
96
|
result.add_error(error)
|
|
94
97
|
for warning in file_type_result.warnings:
|
|
95
98
|
result.add_warning(warning)
|
|
96
|
-
|
|
99
|
+
|
|
97
100
|
return result
|
|
98
101
|
|
|
99
102
|
|
|
100
103
|
def validate_file_types(repo, staged_files: Dict[str, Any]) -> HookResult:
|
|
101
104
|
"""
|
|
102
105
|
Validate that staged files are allowed types.
|
|
103
|
-
|
|
106
|
+
|
|
104
107
|
Args:
|
|
105
108
|
repo: Repository instance
|
|
106
109
|
staged_files: Dict of staged files
|
|
107
|
-
|
|
110
|
+
|
|
108
111
|
Returns:
|
|
109
112
|
HookResult with validation status
|
|
110
113
|
"""
|
|
111
114
|
result = HookResult(success=True)
|
|
112
|
-
|
|
115
|
+
|
|
113
116
|
# Get config for allowed extensions
|
|
114
117
|
config = repo.get_config()
|
|
115
|
-
allowed_extensions = config.get(
|
|
116
|
-
|
|
118
|
+
allowed_extensions = config.get("allowed_extensions", [".md", ".txt", ".json", ".yaml", ".yml"])
|
|
119
|
+
|
|
117
120
|
for filepath in staged_files.keys():
|
|
118
121
|
path = Path(filepath)
|
|
119
122
|
ext = path.suffix.lower()
|
|
120
|
-
|
|
123
|
+
|
|
121
124
|
# Skip files without extensions (might be valid)
|
|
122
125
|
if not ext:
|
|
123
126
|
continue
|
|
124
|
-
|
|
127
|
+
|
|
125
128
|
# Check if extension is allowed
|
|
126
129
|
if ext not in allowed_extensions:
|
|
127
130
|
result.add_warning(
|
|
128
131
|
f"File '{filepath}' has extension '{ext}' which may not be optimal for memory storage. "
|
|
129
132
|
f"Recommended: {', '.join(allowed_extensions)}"
|
|
130
133
|
)
|
|
131
|
-
|
|
134
|
+
|
|
132
135
|
return result
|
|
133
136
|
|
|
134
137
|
|
|
@@ -139,7 +142,7 @@ _registered_hooks: List[Callable] = []
|
|
|
139
142
|
def register_hook(hook_fn: Callable):
|
|
140
143
|
"""
|
|
141
144
|
Register a custom pre-commit hook.
|
|
142
|
-
|
|
145
|
+
|
|
143
146
|
Args:
|
|
144
147
|
hook_fn: Function that takes (repo, staged_files) and returns HookResult
|
|
145
148
|
"""
|
|
@@ -149,3 +152,34 @@ def register_hook(hook_fn: Callable):
|
|
|
149
152
|
def get_registered_hooks() -> List[Callable]:
|
|
150
153
|
"""Get all registered hooks."""
|
|
151
154
|
return _registered_hooks.copy()
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def compute_suggested_importance(
|
|
158
|
+
repo: Any,
|
|
159
|
+
staged_files: Dict[str, Any],
|
|
160
|
+
message: str,
|
|
161
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
162
|
+
) -> float:
|
|
163
|
+
"""
|
|
164
|
+
Compute suggested importance score from heuristics.
|
|
165
|
+
|
|
166
|
+
Scoring factors: user emphasis in message, source authority (auto_commit), etc.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Float 0.0-1.0; default 0.5 if no heuristics match.
|
|
170
|
+
"""
|
|
171
|
+
metadata = metadata or {}
|
|
172
|
+
message_lower = message.lower()
|
|
173
|
+
|
|
174
|
+
# auto_commit or gardener → lower authority
|
|
175
|
+
if metadata.get("auto_commit") or metadata.get("gardener"):
|
|
176
|
+
return 0.5
|
|
177
|
+
|
|
178
|
+
# User emphasis heuristics
|
|
179
|
+
if "important" in message_lower or "important:" in message_lower:
|
|
180
|
+
return 0.8
|
|
181
|
+
if "remember" in message_lower or "remember this" in message_lower:
|
|
182
|
+
return 0.7
|
|
183
|
+
|
|
184
|
+
# Default
|
|
185
|
+
return 0.5
|
memvcs/core/knowledge_graph.py
CHANGED
|
@@ -13,6 +13,7 @@ from collections import defaultdict
|
|
|
13
13
|
|
|
14
14
|
try:
|
|
15
15
|
import networkx as nx
|
|
16
|
+
|
|
16
17
|
NETWORKX_AVAILABLE = True
|
|
17
18
|
except ImportError:
|
|
18
19
|
NETWORKX_AVAILABLE = False
|
|
@@ -21,53 +22,56 @@ except ImportError:
|
|
|
21
22
|
@dataclass
|
|
22
23
|
class GraphNode:
|
|
23
24
|
"""A node in the knowledge graph (represents a memory file)."""
|
|
25
|
+
|
|
24
26
|
id: str # File path
|
|
25
27
|
label: str # Display name
|
|
26
28
|
memory_type: str # episodic, semantic, procedural
|
|
27
29
|
size: int # Content size
|
|
28
30
|
tags: List[str] = field(default_factory=list)
|
|
29
|
-
|
|
31
|
+
|
|
30
32
|
def to_dict(self) -> Dict[str, Any]:
|
|
31
33
|
return {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
"id": self.id,
|
|
35
|
+
"label": self.label,
|
|
36
|
+
"type": self.memory_type,
|
|
37
|
+
"size": self.size,
|
|
38
|
+
"tags": self.tags,
|
|
37
39
|
}
|
|
38
40
|
|
|
39
41
|
|
|
40
42
|
@dataclass
|
|
41
43
|
class GraphEdge:
|
|
42
44
|
"""An edge in the knowledge graph (represents a connection)."""
|
|
45
|
+
|
|
43
46
|
source: str
|
|
44
47
|
target: str
|
|
45
48
|
edge_type: str # "reference", "similarity", "same_topic"
|
|
46
49
|
weight: float = 1.0
|
|
47
|
-
|
|
50
|
+
|
|
48
51
|
def to_dict(self) -> Dict[str, Any]:
|
|
49
52
|
return {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
"source": self.source,
|
|
54
|
+
"target": self.target,
|
|
55
|
+
"type": self.edge_type,
|
|
56
|
+
"weight": self.weight,
|
|
54
57
|
}
|
|
55
58
|
|
|
56
59
|
|
|
57
60
|
@dataclass
|
|
58
61
|
class KnowledgeGraphData:
|
|
59
62
|
"""Complete graph data for export."""
|
|
63
|
+
|
|
60
64
|
nodes: List[GraphNode]
|
|
61
65
|
edges: List[GraphEdge]
|
|
62
66
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
63
|
-
|
|
67
|
+
|
|
64
68
|
def to_dict(self) -> Dict[str, Any]:
|
|
65
69
|
return {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
70
|
+
"nodes": [n.to_dict() for n in self.nodes],
|
|
71
|
+
"edges": [e.to_dict() for e in self.edges],
|
|
72
|
+
"metadata": self.metadata,
|
|
69
73
|
}
|
|
70
|
-
|
|
74
|
+
|
|
71
75
|
def to_json(self, indent: int = 2) -> str:
|
|
72
76
|
return json.dumps(self.to_dict(), indent=indent)
|
|
73
77
|
|
|
@@ -75,106 +79,108 @@ class KnowledgeGraphData:
|
|
|
75
79
|
class KnowledgeGraphBuilder:
|
|
76
80
|
"""
|
|
77
81
|
Builds a knowledge graph from memory files.
|
|
78
|
-
|
|
82
|
+
|
|
79
83
|
Detects connections through:
|
|
80
84
|
1. Wikilinks: [[filename]] references
|
|
81
85
|
2. Semantic similarity: Using embeddings
|
|
82
86
|
3. Shared tags: Files with common tags
|
|
83
87
|
"""
|
|
84
|
-
|
|
88
|
+
|
|
85
89
|
# Pattern for wikilinks: [[target]] or [[target|display text]]
|
|
86
|
-
WIKILINK_PATTERN = re.compile(r
|
|
87
|
-
|
|
90
|
+
WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]")
|
|
91
|
+
|
|
88
92
|
def __init__(self, repo, vector_store=None):
|
|
89
93
|
"""
|
|
90
94
|
Initialize the graph builder.
|
|
91
|
-
|
|
95
|
+
|
|
92
96
|
Args:
|
|
93
97
|
repo: Repository instance
|
|
94
98
|
vector_store: Optional VectorStore for semantic similarity
|
|
95
99
|
"""
|
|
96
100
|
self.repo = repo
|
|
97
101
|
self.vector_store = vector_store
|
|
98
|
-
self.current_dir = repo.root /
|
|
99
|
-
|
|
102
|
+
self.current_dir = repo.root / "current"
|
|
103
|
+
|
|
100
104
|
self._graph = None
|
|
101
105
|
if NETWORKX_AVAILABLE:
|
|
102
106
|
self._graph = nx.DiGraph()
|
|
103
|
-
|
|
107
|
+
|
|
104
108
|
def _detect_memory_type(self, filepath: str) -> str:
|
|
105
109
|
"""Detect memory type from file path."""
|
|
106
110
|
path_lower = filepath.lower()
|
|
107
|
-
if
|
|
108
|
-
return
|
|
109
|
-
elif
|
|
110
|
-
return
|
|
111
|
-
elif
|
|
112
|
-
return
|
|
113
|
-
elif
|
|
114
|
-
return
|
|
115
|
-
elif
|
|
116
|
-
return
|
|
117
|
-
return
|
|
118
|
-
|
|
111
|
+
if "episodic" in path_lower:
|
|
112
|
+
return "episodic"
|
|
113
|
+
elif "semantic" in path_lower:
|
|
114
|
+
return "semantic"
|
|
115
|
+
elif "procedural" in path_lower:
|
|
116
|
+
return "procedural"
|
|
117
|
+
elif "checkpoint" in path_lower:
|
|
118
|
+
return "checkpoints"
|
|
119
|
+
elif "session-summar" in path_lower:
|
|
120
|
+
return "session-summaries"
|
|
121
|
+
return "unknown"
|
|
122
|
+
|
|
119
123
|
def _extract_wikilinks(self, content: str) -> Set[str]:
|
|
120
124
|
"""Extract wikilink targets from content."""
|
|
121
125
|
matches = self.WIKILINK_PATTERN.findall(content)
|
|
122
126
|
return set(matches)
|
|
123
|
-
|
|
127
|
+
|
|
124
128
|
def _extract_tags_from_frontmatter(self, content: str) -> List[str]:
|
|
125
129
|
"""Extract tags from YAML frontmatter."""
|
|
126
130
|
try:
|
|
127
131
|
import yaml
|
|
128
132
|
from .schema import FrontmatterParser
|
|
129
|
-
|
|
133
|
+
|
|
130
134
|
fm, _ = FrontmatterParser.parse(content)
|
|
131
135
|
if fm and fm.tags:
|
|
132
136
|
return fm.tags
|
|
133
137
|
except Exception:
|
|
134
138
|
pass
|
|
135
139
|
return []
|
|
136
|
-
|
|
140
|
+
|
|
137
141
|
def _normalize_link_target(self, target: str, source_path: str) -> Optional[str]:
|
|
138
142
|
"""
|
|
139
143
|
Normalize a wikilink target to a file path.
|
|
140
|
-
|
|
144
|
+
|
|
141
145
|
Args:
|
|
142
146
|
target: Wikilink target (e.g., "user-preferences")
|
|
143
147
|
source_path: Path of the source file
|
|
144
|
-
|
|
148
|
+
|
|
145
149
|
Returns:
|
|
146
150
|
Normalized file path or None if not found
|
|
147
151
|
"""
|
|
148
152
|
# Try exact match
|
|
149
|
-
for ext in [
|
|
153
|
+
for ext in [".md", ".txt", ""]:
|
|
150
154
|
check_path = self.current_dir / (target + ext)
|
|
151
155
|
if check_path.exists():
|
|
152
156
|
return str(check_path.relative_to(self.current_dir))
|
|
153
|
-
|
|
157
|
+
|
|
154
158
|
# Try in same directory as source
|
|
155
159
|
source_dir = Path(source_path).parent
|
|
156
|
-
for ext in [
|
|
160
|
+
for ext in [".md", ".txt", ""]:
|
|
157
161
|
check_path = self.current_dir / source_dir / (target + ext)
|
|
158
162
|
if check_path.exists():
|
|
159
163
|
return str(check_path.relative_to(self.current_dir))
|
|
160
|
-
|
|
164
|
+
|
|
161
165
|
# Try in common directories
|
|
162
|
-
for subdir in [
|
|
163
|
-
for ext in [
|
|
166
|
+
for subdir in ["semantic", "episodic", "procedural"]:
|
|
167
|
+
for ext in [".md", ".txt", ""]:
|
|
164
168
|
check_path = self.current_dir / subdir / (target + ext)
|
|
165
169
|
if check_path.exists():
|
|
166
170
|
return str(check_path.relative_to(self.current_dir))
|
|
167
|
-
|
|
171
|
+
|
|
168
172
|
return None
|
|
169
|
-
|
|
170
|
-
def build_graph(
|
|
173
|
+
|
|
174
|
+
def build_graph(
|
|
175
|
+
self, include_similarity: bool = True, similarity_threshold: float = 0.7
|
|
176
|
+
) -> KnowledgeGraphData:
|
|
171
177
|
"""
|
|
172
178
|
Build the knowledge graph from memory files.
|
|
173
|
-
|
|
179
|
+
|
|
174
180
|
Args:
|
|
175
181
|
include_similarity: Include similarity-based edges
|
|
176
182
|
similarity_threshold: Minimum similarity for edges (0-1)
|
|
177
|
-
|
|
183
|
+
|
|
178
184
|
Returns:
|
|
179
185
|
KnowledgeGraphData with nodes and edges
|
|
180
186
|
"""
|
|
@@ -183,42 +189,42 @@ class KnowledgeGraphBuilder:
|
|
|
183
189
|
file_paths = []
|
|
184
190
|
file_contents = {}
|
|
185
191
|
file_tags = defaultdict(list)
|
|
186
|
-
|
|
192
|
+
|
|
187
193
|
# Collect all memory files
|
|
188
194
|
if not self.current_dir.exists():
|
|
189
195
|
return KnowledgeGraphData(nodes=[], edges=[])
|
|
190
|
-
|
|
191
|
-
for memory_file in self.current_dir.glob(
|
|
196
|
+
|
|
197
|
+
for memory_file in self.current_dir.glob("**/*.md"):
|
|
192
198
|
try:
|
|
193
199
|
rel_path = str(memory_file.relative_to(self.current_dir))
|
|
194
200
|
content = memory_file.read_text()
|
|
195
|
-
|
|
201
|
+
|
|
196
202
|
# Create node
|
|
197
203
|
memory_type = self._detect_memory_type(rel_path)
|
|
198
204
|
tags = self._extract_tags_from_frontmatter(content)
|
|
199
|
-
|
|
205
|
+
|
|
200
206
|
node = GraphNode(
|
|
201
207
|
id=rel_path,
|
|
202
208
|
label=memory_file.stem,
|
|
203
209
|
memory_type=memory_type,
|
|
204
210
|
size=len(content),
|
|
205
|
-
tags=tags
|
|
211
|
+
tags=tags,
|
|
206
212
|
)
|
|
207
213
|
nodes.append(node)
|
|
208
214
|
file_paths.append(rel_path)
|
|
209
215
|
file_contents[rel_path] = content
|
|
210
|
-
|
|
216
|
+
|
|
211
217
|
# Index tags
|
|
212
218
|
for tag in tags:
|
|
213
219
|
file_tags[tag].append(rel_path)
|
|
214
|
-
|
|
220
|
+
|
|
215
221
|
# Add to NetworkX graph if available
|
|
216
222
|
if self._graph is not None:
|
|
217
223
|
self._graph.add_node(rel_path, **node.to_dict())
|
|
218
|
-
|
|
224
|
+
|
|
219
225
|
except Exception:
|
|
220
226
|
continue
|
|
221
|
-
|
|
227
|
+
|
|
222
228
|
# Add wikilink edges
|
|
223
229
|
for source_path, content in file_contents.items():
|
|
224
230
|
links = self._extract_wikilinks(content)
|
|
@@ -226,62 +232,62 @@ class KnowledgeGraphBuilder:
|
|
|
226
232
|
target_path = self._normalize_link_target(target, source_path)
|
|
227
233
|
if target_path and target_path in file_contents:
|
|
228
234
|
edge = GraphEdge(
|
|
229
|
-
source=source_path,
|
|
230
|
-
target=target_path,
|
|
231
|
-
edge_type='reference',
|
|
232
|
-
weight=1.0
|
|
235
|
+
source=source_path, target=target_path, edge_type="reference", weight=1.0
|
|
233
236
|
)
|
|
234
237
|
edges.append(edge)
|
|
235
|
-
|
|
238
|
+
|
|
236
239
|
if self._graph is not None:
|
|
237
|
-
self._graph.add_edge(source_path, target_path, type=
|
|
238
|
-
|
|
240
|
+
self._graph.add_edge(source_path, target_path, type="reference", weight=1.0)
|
|
241
|
+
|
|
239
242
|
# Add tag-based edges
|
|
240
243
|
for tag, files in file_tags.items():
|
|
241
244
|
if len(files) > 1:
|
|
242
245
|
for i, file1 in enumerate(files):
|
|
243
|
-
for file2 in files[i+1:]:
|
|
246
|
+
for file2 in files[i + 1 :]:
|
|
244
247
|
edge = GraphEdge(
|
|
245
|
-
source=file1,
|
|
246
|
-
target=file2,
|
|
247
|
-
edge_type='same_topic',
|
|
248
|
-
weight=0.5
|
|
248
|
+
source=file1, target=file2, edge_type="same_topic", weight=0.5
|
|
249
249
|
)
|
|
250
250
|
edges.append(edge)
|
|
251
|
-
|
|
251
|
+
|
|
252
252
|
if self._graph is not None:
|
|
253
|
-
self._graph.add_edge(file1, file2, type=
|
|
254
|
-
|
|
253
|
+
self._graph.add_edge(file1, file2, type="same_topic", weight=0.5)
|
|
254
|
+
|
|
255
255
|
# Add similarity edges
|
|
256
256
|
if include_similarity and self.vector_store and len(file_paths) > 1:
|
|
257
257
|
try:
|
|
258
|
-
edges.extend(
|
|
258
|
+
edges.extend(
|
|
259
|
+
self._build_similarity_edges(file_paths, file_contents, similarity_threshold)
|
|
260
|
+
)
|
|
259
261
|
except Exception:
|
|
260
262
|
pass # Skip similarity if vector store fails
|
|
261
|
-
|
|
263
|
+
|
|
262
264
|
# Build metadata
|
|
263
265
|
metadata = {
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
266
|
+
"total_nodes": len(nodes),
|
|
267
|
+
"total_edges": len(edges),
|
|
268
|
+
"memory_types": {
|
|
269
|
+
"episodic": sum(1 for n in nodes if n.memory_type == "episodic"),
|
|
270
|
+
"semantic": sum(1 for n in nodes if n.memory_type == "semantic"),
|
|
271
|
+
"procedural": sum(1 for n in nodes if n.memory_type == "procedural"),
|
|
272
|
+
"other": sum(
|
|
273
|
+
1 for n in nodes if n.memory_type not in ["episodic", "semantic", "procedural"]
|
|
274
|
+
),
|
|
275
|
+
},
|
|
276
|
+
"edge_types": {
|
|
277
|
+
"reference": sum(1 for e in edges if e.edge_type == "reference"),
|
|
278
|
+
"similarity": sum(1 for e in edges if e.edge_type == "similarity"),
|
|
279
|
+
"same_topic": sum(1 for e in edges if e.edge_type == "same_topic"),
|
|
271
280
|
},
|
|
272
|
-
'edge_types': {
|
|
273
|
-
'reference': sum(1 for e in edges if e.edge_type == 'reference'),
|
|
274
|
-
'similarity': sum(1 for e in edges if e.edge_type == 'similarity'),
|
|
275
|
-
'same_topic': sum(1 for e in edges if e.edge_type == 'same_topic'),
|
|
276
|
-
}
|
|
277
281
|
}
|
|
278
|
-
|
|
282
|
+
|
|
279
283
|
return KnowledgeGraphData(nodes=nodes, edges=edges, metadata=metadata)
|
|
280
|
-
|
|
281
|
-
def _build_similarity_edges(
|
|
284
|
+
|
|
285
|
+
def _build_similarity_edges(
|
|
286
|
+
self, file_paths: List[str], file_contents: Dict[str, str], threshold: float
|
|
287
|
+
) -> List[GraphEdge]:
|
|
282
288
|
"""Build edges based on semantic similarity."""
|
|
283
289
|
edges = []
|
|
284
|
-
|
|
290
|
+
|
|
285
291
|
# Get embeddings for all files
|
|
286
292
|
embeddings = {}
|
|
287
293
|
for path, content in file_contents.items():
|
|
@@ -292,90 +298,80 @@ class KnowledgeGraphBuilder:
|
|
|
292
298
|
embeddings[path] = emb
|
|
293
299
|
except Exception:
|
|
294
300
|
continue
|
|
295
|
-
|
|
301
|
+
|
|
296
302
|
# Compute pairwise similarities
|
|
297
303
|
import math
|
|
298
|
-
|
|
304
|
+
|
|
299
305
|
def cosine_similarity(a: List[float], b: List[float]) -> float:
|
|
300
|
-
dot = sum(x*y for x, y in zip(a, b))
|
|
301
|
-
norm_a = math.sqrt(sum(x*x for x in a))
|
|
302
|
-
norm_b = math.sqrt(sum(x*x for x in b))
|
|
306
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
307
|
+
norm_a = math.sqrt(sum(x * x for x in a))
|
|
308
|
+
norm_b = math.sqrt(sum(x * x for x in b))
|
|
303
309
|
if norm_a == 0 or norm_b == 0:
|
|
304
310
|
return 0
|
|
305
311
|
return dot / (norm_a * norm_b)
|
|
306
|
-
|
|
312
|
+
|
|
307
313
|
paths_list = list(embeddings.keys())
|
|
308
314
|
for i, path1 in enumerate(paths_list):
|
|
309
|
-
for path2 in paths_list[i+1:]:
|
|
315
|
+
for path2 in paths_list[i + 1 :]:
|
|
310
316
|
sim = cosine_similarity(embeddings[path1], embeddings[path2])
|
|
311
317
|
if sim >= threshold:
|
|
312
|
-
edge = GraphEdge(
|
|
313
|
-
source=path1,
|
|
314
|
-
target=path2,
|
|
315
|
-
edge_type='similarity',
|
|
316
|
-
weight=sim
|
|
317
|
-
)
|
|
318
|
+
edge = GraphEdge(source=path1, target=path2, edge_type="similarity", weight=sim)
|
|
318
319
|
edges.append(edge)
|
|
319
|
-
|
|
320
|
+
|
|
320
321
|
if self._graph is not None:
|
|
321
|
-
self._graph.add_edge(path1, path2, type=
|
|
322
|
-
|
|
322
|
+
self._graph.add_edge(path1, path2, type="similarity", weight=sim)
|
|
323
|
+
|
|
323
324
|
return edges
|
|
324
|
-
|
|
325
|
+
|
|
325
326
|
def find_isolated_nodes(self) -> List[str]:
|
|
326
327
|
"""Find nodes with no connections (knowledge islands)."""
|
|
327
328
|
if self._graph is None or len(self._graph) == 0:
|
|
328
329
|
return []
|
|
329
|
-
|
|
330
|
+
|
|
330
331
|
# Convert to undirected for analysis
|
|
331
332
|
undirected = self._graph.to_undirected()
|
|
332
333
|
return [node for node in undirected.nodes() if undirected.degree(node) == 0]
|
|
333
|
-
|
|
334
|
+
|
|
334
335
|
def find_potential_contradictions(self) -> List[Tuple[str, str, float]]:
|
|
335
336
|
"""
|
|
336
337
|
Find files that might have contradictory information.
|
|
337
|
-
|
|
338
|
+
|
|
338
339
|
Returns files in the same topic cluster with low similarity.
|
|
339
340
|
"""
|
|
340
341
|
if self._graph is None:
|
|
341
342
|
return []
|
|
342
|
-
|
|
343
|
+
|
|
343
344
|
contradictions = []
|
|
344
|
-
|
|
345
|
+
|
|
345
346
|
# Files connected by same_topic but with low similarity
|
|
346
347
|
for u, v, data in self._graph.edges(data=True):
|
|
347
|
-
if data.get(
|
|
348
|
+
if data.get("type") == "same_topic":
|
|
348
349
|
# Check if there's also a similarity edge
|
|
349
350
|
sim_edge = self._graph.get_edge_data(u, v)
|
|
350
|
-
if sim_edge and sim_edge.get(
|
|
351
|
-
if sim_edge.get(
|
|
352
|
-
contradictions.append((u, v, sim_edge.get(
|
|
353
|
-
|
|
351
|
+
if sim_edge and sim_edge.get("type") == "similarity":
|
|
352
|
+
if sim_edge.get("weight", 1.0) < 0.3:
|
|
353
|
+
contradictions.append((u, v, sim_edge.get("weight", 0)))
|
|
354
|
+
|
|
354
355
|
return contradictions
|
|
355
|
-
|
|
356
|
+
|
|
356
357
|
def export_for_d3(self) -> str:
|
|
357
358
|
"""Export graph in D3.js force-graph format."""
|
|
358
359
|
graph_data = self.build_graph()
|
|
359
|
-
|
|
360
|
+
|
|
360
361
|
d3_format = {
|
|
361
|
-
|
|
362
|
+
"nodes": [
|
|
362
363
|
{
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
364
|
+
"id": n.id,
|
|
365
|
+
"name": n.label,
|
|
366
|
+
"group": n.memory_type,
|
|
367
|
+
"size": min(20, max(5, n.size // 100)),
|
|
367
368
|
}
|
|
368
369
|
for n in graph_data.nodes
|
|
369
370
|
],
|
|
370
|
-
|
|
371
|
-
{
|
|
372
|
-
'source': e.source,
|
|
373
|
-
'target': e.target,
|
|
374
|
-
'type': e.edge_type,
|
|
375
|
-
'value': e.weight
|
|
376
|
-
}
|
|
371
|
+
"links": [
|
|
372
|
+
{"source": e.source, "target": e.target, "type": e.edge_type, "value": e.weight}
|
|
377
373
|
for e in graph_data.edges
|
|
378
|
-
]
|
|
374
|
+
],
|
|
379
375
|
}
|
|
380
|
-
|
|
376
|
+
|
|
381
377
|
return json.dumps(d3_format, indent=2)
|