codegraph-cli 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_cli/__init__.py +4 -0
- codegraph_cli/agents.py +191 -0
- codegraph_cli/bug_detector.py +386 -0
- codegraph_cli/chat_agent.py +352 -0
- codegraph_cli/chat_session.py +220 -0
- codegraph_cli/cli.py +330 -0
- codegraph_cli/cli_chat.py +367 -0
- codegraph_cli/cli_diagnose.py +133 -0
- codegraph_cli/cli_refactor.py +230 -0
- codegraph_cli/cli_setup.py +470 -0
- codegraph_cli/cli_test.py +177 -0
- codegraph_cli/cli_v2.py +267 -0
- codegraph_cli/codegen_agent.py +265 -0
- codegraph_cli/config.py +31 -0
- codegraph_cli/config_manager.py +341 -0
- codegraph_cli/context_manager.py +500 -0
- codegraph_cli/crew_agents.py +123 -0
- codegraph_cli/crew_chat.py +159 -0
- codegraph_cli/crew_tools.py +497 -0
- codegraph_cli/diff_engine.py +265 -0
- codegraph_cli/embeddings.py +241 -0
- codegraph_cli/graph_export.py +144 -0
- codegraph_cli/llm.py +642 -0
- codegraph_cli/models.py +47 -0
- codegraph_cli/models_v2.py +185 -0
- codegraph_cli/orchestrator.py +49 -0
- codegraph_cli/parser.py +800 -0
- codegraph_cli/performance_analyzer.py +223 -0
- codegraph_cli/project_context.py +230 -0
- codegraph_cli/rag.py +200 -0
- codegraph_cli/refactor_agent.py +452 -0
- codegraph_cli/security_scanner.py +366 -0
- codegraph_cli/storage.py +390 -0
- codegraph_cli/templates/graph_interactive.html +257 -0
- codegraph_cli/testgen_agent.py +316 -0
- codegraph_cli/validation_engine.py +285 -0
- codegraph_cli/vector_store.py +293 -0
- codegraph_cli-2.0.0.dist-info/METADATA +318 -0
- codegraph_cli-2.0.0.dist-info/RECORD +43 -0
- codegraph_cli-2.0.0.dist-info/WHEEL +5 -0
- codegraph_cli-2.0.0.dist-info/entry_points.txt +2 -0
- codegraph_cli-2.0.0.dist-info/licenses/LICENSE +21 -0
- codegraph_cli-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""DiffEngine for previewing and applying code changes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import difflib
|
|
6
|
+
import shutil
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from .models_v2 import ApplyResult, CodeProposal, FileChange
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DiffEngine:
|
|
16
|
+
"""Handles previewing and applying code changes safely."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, backup_dir: Optional[Path] = None):
|
|
19
|
+
"""Initialize DiffEngine.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
backup_dir: Directory to store backups. Defaults to .codegraph/backups/
|
|
23
|
+
"""
|
|
24
|
+
self.backup_dir = backup_dir or Path.home() / ".codegraph" / "backups"
|
|
25
|
+
self.backup_dir.mkdir(parents=True, exist_ok=True)
|
|
26
|
+
|
|
27
|
+
def create_diff(self, original: str, modified: str, filename: str = "file") -> str:
|
|
28
|
+
"""Create unified diff between two versions.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
original: Original content
|
|
32
|
+
modified: Modified content
|
|
33
|
+
filename: Name of file for diff header
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Unified diff string
|
|
37
|
+
"""
|
|
38
|
+
original_lines = original.splitlines(keepends=True)
|
|
39
|
+
modified_lines = modified.splitlines(keepends=True)
|
|
40
|
+
|
|
41
|
+
diff = difflib.unified_diff(
|
|
42
|
+
original_lines,
|
|
43
|
+
modified_lines,
|
|
44
|
+
fromfile=f"a/{filename}",
|
|
45
|
+
tofile=f"b/{filename}",
|
|
46
|
+
lineterm=""
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return "".join(diff)
|
|
50
|
+
|
|
51
|
+
def preview_changes(self, proposal: CodeProposal) -> str:
|
|
52
|
+
"""Generate preview of all changes in a proposal.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
proposal: Code proposal to preview
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Formatted preview string
|
|
59
|
+
"""
|
|
60
|
+
lines = []
|
|
61
|
+
lines.append(f"📝 Proposed Changes: {proposal.description}")
|
|
62
|
+
lines.append(f" ID: {proposal.id}")
|
|
63
|
+
lines.append("")
|
|
64
|
+
|
|
65
|
+
# Summary
|
|
66
|
+
if proposal.num_files_created > 0:
|
|
67
|
+
lines.append(f" [NEW] {proposal.num_files_created} file(s)")
|
|
68
|
+
if proposal.num_files_modified > 0:
|
|
69
|
+
lines.append(f" [MODIFY] {proposal.num_files_modified} file(s)")
|
|
70
|
+
if proposal.num_files_deleted > 0:
|
|
71
|
+
lines.append(f" [DELETE] {proposal.num_files_deleted} file(s)")
|
|
72
|
+
lines.append("")
|
|
73
|
+
|
|
74
|
+
# Detailed changes
|
|
75
|
+
for change in proposal.changes:
|
|
76
|
+
lines.append(f"{'='*60}")
|
|
77
|
+
lines.append(f"[{change.change_type.upper()}] {change.file_path}")
|
|
78
|
+
lines.append(f"{'='*60}")
|
|
79
|
+
|
|
80
|
+
if change.change_type == "create":
|
|
81
|
+
lines.append(change.new_content or "")
|
|
82
|
+
elif change.change_type == "delete":
|
|
83
|
+
lines.append(f"File will be deleted")
|
|
84
|
+
elif change.change_type == "modify":
|
|
85
|
+
if change.diff:
|
|
86
|
+
lines.append(change.diff)
|
|
87
|
+
else:
|
|
88
|
+
# Generate diff if not provided
|
|
89
|
+
diff = self.create_diff(
|
|
90
|
+
change.original_content or "",
|
|
91
|
+
change.new_content or "",
|
|
92
|
+
change.file_path
|
|
93
|
+
)
|
|
94
|
+
lines.append(diff)
|
|
95
|
+
lines.append("")
|
|
96
|
+
|
|
97
|
+
# Impact summary
|
|
98
|
+
if proposal.impact_summary:
|
|
99
|
+
lines.append(f"{'='*60}")
|
|
100
|
+
lines.append("📊 Impact Analysis")
|
|
101
|
+
lines.append(f"{'='*60}")
|
|
102
|
+
lines.append(proposal.impact_summary)
|
|
103
|
+
|
|
104
|
+
return "\n".join(lines)
|
|
105
|
+
|
|
106
|
+
def apply_changes(
|
|
107
|
+
self,
|
|
108
|
+
proposal: CodeProposal,
|
|
109
|
+
backup: bool = True,
|
|
110
|
+
dry_run: bool = False
|
|
111
|
+
) -> ApplyResult:
|
|
112
|
+
"""Apply changes from a proposal to the filesystem.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
proposal: Code proposal to apply
|
|
116
|
+
backup: Whether to create backups before applying
|
|
117
|
+
dry_run: If True, don't actually apply changes
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
ApplyResult with success status and details
|
|
121
|
+
"""
|
|
122
|
+
if dry_run:
|
|
123
|
+
return ApplyResult(
|
|
124
|
+
success=True,
|
|
125
|
+
files_changed=[c.file_path for c in proposal.changes],
|
|
126
|
+
backup_id=None
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
backup_id = None
|
|
130
|
+
if backup:
|
|
131
|
+
backup_id = self._create_backup(proposal)
|
|
132
|
+
|
|
133
|
+
files_changed = []
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
for change in proposal.changes:
|
|
137
|
+
file_path = Path(change.file_path)
|
|
138
|
+
|
|
139
|
+
if change.change_type == "create":
|
|
140
|
+
# Create new file
|
|
141
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
142
|
+
file_path.write_text(change.new_content or "")
|
|
143
|
+
files_changed.append(str(file_path))
|
|
144
|
+
|
|
145
|
+
elif change.change_type == "modify":
|
|
146
|
+
# Modify existing file
|
|
147
|
+
if not file_path.exists():
|
|
148
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
149
|
+
file_path.write_text(change.new_content or "")
|
|
150
|
+
files_changed.append(str(file_path))
|
|
151
|
+
|
|
152
|
+
elif change.change_type == "delete":
|
|
153
|
+
# Delete file
|
|
154
|
+
if file_path.exists():
|
|
155
|
+
file_path.unlink()
|
|
156
|
+
files_changed.append(str(file_path))
|
|
157
|
+
|
|
158
|
+
return ApplyResult(
|
|
159
|
+
success=True,
|
|
160
|
+
files_changed=files_changed,
|
|
161
|
+
backup_id=backup_id
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
except Exception as e:
|
|
165
|
+
# Rollback if backup exists
|
|
166
|
+
if backup_id:
|
|
167
|
+
self.rollback(backup_id)
|
|
168
|
+
|
|
169
|
+
return ApplyResult(
|
|
170
|
+
success=False,
|
|
171
|
+
files_changed=[],
|
|
172
|
+
error=str(e)
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
def _create_backup(self, proposal: CodeProposal) -> str:
|
|
176
|
+
"""Create backup of files before applying changes.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
proposal: Proposal containing files to backup
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Backup ID for rollback
|
|
183
|
+
"""
|
|
184
|
+
backup_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
|
|
185
|
+
backup_path = self.backup_dir / backup_id
|
|
186
|
+
backup_path.mkdir(parents=True, exist_ok=True)
|
|
187
|
+
|
|
188
|
+
# Save metadata
|
|
189
|
+
metadata = {
|
|
190
|
+
"proposal_id": proposal.id,
|
|
191
|
+
"description": proposal.description,
|
|
192
|
+
"timestamp": datetime.now().isoformat(),
|
|
193
|
+
"files": []
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
# Backup each file
|
|
197
|
+
for change in proposal.changes:
|
|
198
|
+
file_path = Path(change.file_path)
|
|
199
|
+
|
|
200
|
+
if change.change_type in ["modify", "delete"] and file_path.exists():
|
|
201
|
+
# Copy original file to backup
|
|
202
|
+
backup_file = backup_path / file_path.name
|
|
203
|
+
shutil.copy2(file_path, backup_file)
|
|
204
|
+
metadata["files"].append({
|
|
205
|
+
"original": str(file_path),
|
|
206
|
+
"backup": str(backup_file),
|
|
207
|
+
"change_type": change.change_type
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
# Save metadata
|
|
211
|
+
import json
|
|
212
|
+
(backup_path / "metadata.json").write_text(json.dumps(metadata, indent=2))
|
|
213
|
+
|
|
214
|
+
return backup_id
|
|
215
|
+
|
|
216
|
+
def rollback(self, backup_id: str) -> bool:
|
|
217
|
+
"""Rollback changes using a backup.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
backup_id: ID of backup to restore
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
True if successful, False otherwise
|
|
224
|
+
"""
|
|
225
|
+
backup_path = self.backup_dir / backup_id
|
|
226
|
+
|
|
227
|
+
if not backup_path.exists():
|
|
228
|
+
return False
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
# Load metadata
|
|
232
|
+
import json
|
|
233
|
+
metadata = json.loads((backup_path / "metadata.json").read_text())
|
|
234
|
+
|
|
235
|
+
# Restore each file
|
|
236
|
+
for file_info in metadata["files"]:
|
|
237
|
+
original_path = Path(file_info["original"])
|
|
238
|
+
backup_file = Path(file_info["backup"])
|
|
239
|
+
|
|
240
|
+
if backup_file.exists():
|
|
241
|
+
shutil.copy2(backup_file, original_path)
|
|
242
|
+
|
|
243
|
+
return True
|
|
244
|
+
|
|
245
|
+
except Exception:
|
|
246
|
+
return False
|
|
247
|
+
|
|
248
|
+
def list_backups(self) -> list[dict]:
|
|
249
|
+
"""List all available backups.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
List of backup metadata
|
|
253
|
+
"""
|
|
254
|
+
backups = []
|
|
255
|
+
|
|
256
|
+
for backup_dir in self.backup_dir.iterdir():
|
|
257
|
+
if backup_dir.is_dir():
|
|
258
|
+
metadata_file = backup_dir / "metadata.json"
|
|
259
|
+
if metadata_file.exists():
|
|
260
|
+
import json
|
|
261
|
+
metadata = json.loads(metadata_file.read_text())
|
|
262
|
+
metadata["backup_id"] = backup_dir.name
|
|
263
|
+
backups.append(metadata)
|
|
264
|
+
|
|
265
|
+
return sorted(backups, key=lambda x: x["timestamp"], reverse=True)
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
"""Neural embedding engine using Sentence Transformers for semantic code understanding.
|
|
2
|
+
|
|
3
|
+
Local-first architecture:
|
|
4
|
+
- Models are downloaded once and cached in ``~/.codegraph/models``.
|
|
5
|
+
- All inference runs on-device (CPU or GPU). No data is ever sent to
|
|
6
|
+
external APIs.
|
|
7
|
+
|
|
8
|
+
Falls back to a lightweight deterministic hash-embedding when
|
|
9
|
+
``sentence-transformers`` is not installed.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import math
|
|
16
|
+
import os
|
|
17
|
+
import re
|
|
18
|
+
from hashlib import blake2b
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Iterable, List, Optional, Union
|
|
21
|
+
|
|
22
|
+
from .config import BASE_DIR
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
# Default local model cache directory
|
|
27
|
+
MODEL_CACHE_DIR: Path = BASE_DIR / "models"
|
|
28
|
+
|
|
29
|
+
# Preferred models in priority order
|
|
30
|
+
PREFERRED_MODELS: List[str] = [
|
|
31
|
+
"all-MiniLM-L6-v2",
|
|
32
|
+
"nomic-ai/nomic-embed-text-v1.5",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
_TOKEN_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# ===================================================================
|
|
39
|
+
# NeuralEmbedder (Primary – Sentence Transformers)
|
|
40
|
+
# ===================================================================
|
|
41
|
+
|
|
42
|
+
class NeuralEmbedder:
|
|
43
|
+
"""Semantic embedding engine powered by Sentence Transformers.
|
|
44
|
+
|
|
45
|
+
The model is downloaded on first use and cached in
|
|
46
|
+
``~/.codegraph/models`` so that subsequent runs are fully offline.
|
|
47
|
+
All computation is local – **no data leaves the machine**.
|
|
48
|
+
|
|
49
|
+
Example::
|
|
50
|
+
|
|
51
|
+
embedder = NeuralEmbedder()
|
|
52
|
+
vecs = embedder.embed_documents(["def hello(): ...", "class Foo: ..."])
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
model_name: str = "all-MiniLM-L6-v2",
|
|
58
|
+
cache_dir: Optional[Path] = None,
|
|
59
|
+
device: str = "cpu",
|
|
60
|
+
) -> None:
|
|
61
|
+
self.model_name = model_name
|
|
62
|
+
self.cache_dir = cache_dir or MODEL_CACHE_DIR
|
|
63
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
self.device = device
|
|
65
|
+
self._model: object = None # lazy-loaded SentenceTransformer
|
|
66
|
+
self._dim: Optional[int] = None
|
|
67
|
+
|
|
68
|
+
# ------------------------------------------------------------------
|
|
69
|
+
# Lazy model loading
|
|
70
|
+
# ------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
def _load_model(self) -> None:
|
|
73
|
+
if self._model is not None:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
from sentence_transformers import SentenceTransformer # type: ignore[import-untyped]
|
|
78
|
+
except ImportError:
|
|
79
|
+
raise ImportError(
|
|
80
|
+
"sentence-transformers is not installed. "
|
|
81
|
+
"Install with: pip install sentence-transformers"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Tell sentence-transformers where to cache
|
|
85
|
+
os.environ.setdefault(
|
|
86
|
+
"SENTENCE_TRANSFORMERS_HOME", str(self.cache_dir),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
self._model = SentenceTransformer(
|
|
91
|
+
self.model_name,
|
|
92
|
+
cache_folder=str(self.cache_dir),
|
|
93
|
+
device=self.device,
|
|
94
|
+
)
|
|
95
|
+
self._dim = self._model.get_sentence_embedding_dimension() # type: ignore[union-attr]
|
|
96
|
+
logger.info(
|
|
97
|
+
"Loaded model '%s' (dim=%d) on %s",
|
|
98
|
+
self.model_name, self._dim, self.device,
|
|
99
|
+
)
|
|
100
|
+
except Exception as exc:
|
|
101
|
+
raise RuntimeError(
|
|
102
|
+
f"Failed to load embedding model '{self.model_name}': {exc}"
|
|
103
|
+
) from exc
|
|
104
|
+
|
|
105
|
+
# ------------------------------------------------------------------
|
|
106
|
+
# Public API
|
|
107
|
+
# ------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def dim(self) -> int:
|
|
111
|
+
"""Dimensionality of the embedding vectors."""
|
|
112
|
+
if self._dim is None:
|
|
113
|
+
self._load_model()
|
|
114
|
+
assert self._dim is not None
|
|
115
|
+
return self._dim
|
|
116
|
+
|
|
117
|
+
def embed_text(self, text: str) -> List[float]:
|
|
118
|
+
"""Embed a single text string and return a unit-norm vector."""
|
|
119
|
+
self._load_model()
|
|
120
|
+
assert self._model is not None
|
|
121
|
+
embedding = self._model.encode( # type: ignore[union-attr]
|
|
122
|
+
text,
|
|
123
|
+
convert_to_numpy=True,
|
|
124
|
+
normalize_embeddings=True,
|
|
125
|
+
show_progress_bar=False,
|
|
126
|
+
)
|
|
127
|
+
return embedding.tolist()
|
|
128
|
+
|
|
129
|
+
def embed_documents(
|
|
130
|
+
self,
|
|
131
|
+
texts: List[str],
|
|
132
|
+
batch_size: int = 32,
|
|
133
|
+
) -> List[List[float]]:
|
|
134
|
+
"""Embed multiple documents with batching for efficiency.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
texts: List of text strings to embed.
|
|
138
|
+
batch_size: Number of texts per forward pass.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
List of embedding vectors (each normalised to unit length).
|
|
142
|
+
"""
|
|
143
|
+
if not texts:
|
|
144
|
+
return []
|
|
145
|
+
self._load_model()
|
|
146
|
+
assert self._model is not None
|
|
147
|
+
embeddings = self._model.encode( # type: ignore[union-attr]
|
|
148
|
+
texts,
|
|
149
|
+
batch_size=batch_size,
|
|
150
|
+
convert_to_numpy=True,
|
|
151
|
+
normalize_embeddings=True,
|
|
152
|
+
show_progress_bar=len(texts) > 100,
|
|
153
|
+
)
|
|
154
|
+
return embeddings.tolist()
|
|
155
|
+
|
|
156
|
+
# Backward-compat alias used by legacy callers
|
|
157
|
+
def embed_many(self, texts: Iterable[str]) -> List[List[float]]:
|
|
158
|
+
"""Alias for :meth:`embed_documents`."""
|
|
159
|
+
return self.embed_documents(list(texts))
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# ===================================================================
|
|
163
|
+
# HashEmbeddingModel (Lightweight Fallback)
|
|
164
|
+
# ===================================================================
|
|
165
|
+
|
|
166
|
+
class HashEmbeddingModel:
|
|
167
|
+
"""Deterministic token-hashing embedder – no ML dependencies.
|
|
168
|
+
|
|
169
|
+
Provides basic keyword-level similarity. Automatically used as a
|
|
170
|
+
fallback when ``sentence-transformers`` is not available.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def __init__(self, dim: int = 256) -> None:
|
|
174
|
+
self.dim = dim
|
|
175
|
+
|
|
176
|
+
def embed_text(self, text: str) -> List[float]:
|
|
177
|
+
vec = [0.0] * self.dim
|
|
178
|
+
tokens = _TOKEN_RE.findall(text.lower())
|
|
179
|
+
if not tokens:
|
|
180
|
+
return vec
|
|
181
|
+
for token in tokens:
|
|
182
|
+
digest = blake2b(token.encode("utf-8"), digest_size=8).digest()
|
|
183
|
+
idx = int.from_bytes(digest[:4], "big") % self.dim
|
|
184
|
+
sign = 1.0 if (digest[4] & 1) == 0 else -1.0
|
|
185
|
+
vec[idx] += sign
|
|
186
|
+
return _l2_normalize(vec)
|
|
187
|
+
|
|
188
|
+
def embed_many(self, texts: Iterable[str]) -> List[List[float]]:
|
|
189
|
+
return [self.embed_text(text) for text in texts]
|
|
190
|
+
|
|
191
|
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
|
192
|
+
"""Alias matching the NeuralEmbedder interface."""
|
|
193
|
+
return self.embed_many(texts)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# ===================================================================
|
|
197
|
+
# Factory
|
|
198
|
+
# ===================================================================
|
|
199
|
+
|
|
200
|
+
def get_embedder(
|
|
201
|
+
model_name: Optional[str] = None,
|
|
202
|
+
cache_dir: Optional[Path] = None,
|
|
203
|
+
device: str = "cpu",
|
|
204
|
+
) -> Union[NeuralEmbedder, HashEmbeddingModel]:
|
|
205
|
+
"""Return the best available embedder.
|
|
206
|
+
|
|
207
|
+
* If ``sentence-transformers`` is installed → :class:`NeuralEmbedder`.
|
|
208
|
+
* Otherwise → :class:`HashEmbeddingModel` (zero-dependency fallback).
|
|
209
|
+
"""
|
|
210
|
+
try:
|
|
211
|
+
import sentence_transformers # noqa: F401
|
|
212
|
+
return NeuralEmbedder(
|
|
213
|
+
model_name=model_name or "all-MiniLM-L6-v2",
|
|
214
|
+
cache_dir=cache_dir,
|
|
215
|
+
device=device,
|
|
216
|
+
)
|
|
217
|
+
except ImportError:
|
|
218
|
+
logger.warning(
|
|
219
|
+
"sentence-transformers not installed – "
|
|
220
|
+
"using hash-based embeddings (no semantic understanding). "
|
|
221
|
+
"Install with: pip install sentence-transformers"
|
|
222
|
+
)
|
|
223
|
+
return HashEmbeddingModel()
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
# ===================================================================
|
|
227
|
+
# Utility
|
|
228
|
+
# ===================================================================
|
|
229
|
+
|
|
230
|
+
def cosine_similarity(vec_a: List[float], vec_b: List[float]) -> float:
|
|
231
|
+
"""Cosine similarity between two L2-normalised vectors."""
|
|
232
|
+
if not vec_a or not vec_b or len(vec_a) != len(vec_b):
|
|
233
|
+
return 0.0
|
|
234
|
+
return sum(a * b for a, b in zip(vec_a, vec_b))
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _l2_normalize(vec: List[float]) -> List[float]:
|
|
238
|
+
norm = math.sqrt(sum(v * v for v in vec))
|
|
239
|
+
if norm == 0:
|
|
240
|
+
return vec
|
|
241
|
+
return [v / norm for v in vec]
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Graph export helpers for DOT and simple standalone HTML outputs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import html
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List
|
|
9
|
+
|
|
10
|
+
from .storage import GraphStore
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def export_dot(store: GraphStore, output_file: Path, focus: str = "") -> None:
|
|
14
|
+
nodes = {row["node_id"]: row for row in store.get_nodes()}
|
|
15
|
+
edges = [dict(e) for e in store.get_edges()]
|
|
16
|
+
|
|
17
|
+
selected = _focused_subgraph(nodes, edges, focus)
|
|
18
|
+
|
|
19
|
+
lines = ["digraph CodeGraph {"]
|
|
20
|
+
lines.append(" rankdir=LR;")
|
|
21
|
+
|
|
22
|
+
for node_id in selected["nodes"]:
|
|
23
|
+
if node_id not in nodes:
|
|
24
|
+
continue
|
|
25
|
+
node = nodes[node_id]
|
|
26
|
+
label = f"{node['node_type']}\\n{node['qualname']}"
|
|
27
|
+
lines.append(f' "{node_id}" [label="{_esc(label)}"];')
|
|
28
|
+
|
|
29
|
+
for edge in selected["edges"]:
|
|
30
|
+
if edge["src"] not in nodes or edge["dst"] not in nodes:
|
|
31
|
+
continue
|
|
32
|
+
lines.append(
|
|
33
|
+
f' "{edge["src"]}" -> "{edge["dst"]}" [label="{_esc(edge["edge_type"])}"];'
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
lines.append("}")
|
|
37
|
+
output_file.write_text("\n".join(lines), encoding="utf-8")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def export_html(store: GraphStore, output_file: Path, focus: str = "") -> None:
|
|
41
|
+
"""Export graph to interactive HTML visualization using vis.js."""
|
|
42
|
+
nodes = {row["node_id"]: row for row in store.get_nodes()}
|
|
43
|
+
edges = [dict(e) for e in store.get_edges()]
|
|
44
|
+
|
|
45
|
+
selected = _focused_subgraph(nodes, edges, focus)
|
|
46
|
+
graph_payload = {
|
|
47
|
+
"nodes": [
|
|
48
|
+
{
|
|
49
|
+
"id": node_id,
|
|
50
|
+
"label": f"{nodes[node_id]['node_type']}: {nodes[node_id]['qualname']}",
|
|
51
|
+
"title": nodes[node_id]["file_path"],
|
|
52
|
+
}
|
|
53
|
+
for node_id in selected["nodes"]
|
|
54
|
+
if node_id in nodes
|
|
55
|
+
],
|
|
56
|
+
"edges": [e for e in selected["edges"] if e["src"] in nodes and e["dst"] in nodes],
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Load interactive template
|
|
60
|
+
template_path = Path(__file__).parent / "templates" / "graph_interactive.html"
|
|
61
|
+
|
|
62
|
+
if template_path.exists():
|
|
63
|
+
template = template_path.read_text(encoding="utf-8")
|
|
64
|
+
# Inject graph data
|
|
65
|
+
doc = template.replace("{{ GRAPH_DATA }}", json.dumps(graph_payload, indent=2))
|
|
66
|
+
else:
|
|
67
|
+
# Fallback to basic HTML if template not found
|
|
68
|
+
doc = _basic_html_export(graph_payload)
|
|
69
|
+
|
|
70
|
+
output_file.write_text(doc, encoding="utf-8")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _basic_html_export(graph_payload: dict) -> str:
|
|
74
|
+
"""Fallback basic HTML export."""
|
|
75
|
+
return f"""<!doctype html>
|
|
76
|
+
<html>
|
|
77
|
+
<head>
|
|
78
|
+
<meta charset="utf-8" />
|
|
79
|
+
<title>CodeGraph Export</title>
|
|
80
|
+
<style>
|
|
81
|
+
body {{ font-family: ui-monospace, SFMono-Regular, Menlo, monospace; margin: 20px; }}
|
|
82
|
+
#container {{ display: grid; grid-template-columns: 1fr 1fr; gap: 18px; }}
|
|
83
|
+
.panel {{ border: 1px solid #ddd; border-radius: 8px; padding: 10px; }}
|
|
84
|
+
ul {{ list-style: none; padding: 0; margin: 0; }}
|
|
85
|
+
li {{ margin: 4px 0; }}
|
|
86
|
+
</style>
|
|
87
|
+
</head>
|
|
88
|
+
<body>
|
|
89
|
+
<h1>CodeGraph Export</h1>
|
|
90
|
+
<div id="container">
|
|
91
|
+
<div class="panel">
|
|
92
|
+
<h2>Nodes</h2>
|
|
93
|
+
<ul id="nodes"></ul>
|
|
94
|
+
</div>
|
|
95
|
+
<div class="panel">
|
|
96
|
+
<h2>Edges</h2>
|
|
97
|
+
<ul id="edges"></ul>
|
|
98
|
+
</div>
|
|
99
|
+
</div>
|
|
100
|
+
<script>
|
|
101
|
+
const graph = {json.dumps(graph_payload)};
|
|
102
|
+
const nodesEl = document.getElementById('nodes');
|
|
103
|
+
const edgesEl = document.getElementById('edges');
|
|
104
|
+
graph.nodes.forEach(n => {{
|
|
105
|
+
const li = document.createElement('li');
|
|
106
|
+
li.textContent = `${{n.id}} -> ${{n.label}} (${{n.title}})`;
|
|
107
|
+
nodesEl.appendChild(li);
|
|
108
|
+
}});
|
|
109
|
+
graph.edges.forEach(e => {{
|
|
110
|
+
const li = document.createElement('li');
|
|
111
|
+
li.textContent = `${{e.src}} --${{e.edge_type}}--> ${{e.dst}}`;
|
|
112
|
+
edgesEl.appendChild(li);
|
|
113
|
+
}});
|
|
114
|
+
</script>
|
|
115
|
+
</body>
|
|
116
|
+
</html>
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _focused_subgraph(nodes: Dict[str, dict], edges: List[dict], focus: str) -> Dict[str, List]:
|
|
121
|
+
if not focus:
|
|
122
|
+
return {"nodes": list(nodes.keys()), "edges": edges}
|
|
123
|
+
|
|
124
|
+
focus_ids = {
|
|
125
|
+
node_id
|
|
126
|
+
for node_id, node in nodes.items()
|
|
127
|
+
if focus in node_id or focus in node["name"] or focus in node["qualname"]
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if not focus_ids:
|
|
131
|
+
return {"nodes": list(nodes.keys()), "edges": edges}
|
|
132
|
+
|
|
133
|
+
edge_subset = [e for e in edges if e["src"] in focus_ids or e["dst"] in focus_ids]
|
|
134
|
+
node_subset = set(focus_ids)
|
|
135
|
+
for e in edge_subset:
|
|
136
|
+
if e["src"] in nodes:
|
|
137
|
+
node_subset.add(e["src"])
|
|
138
|
+
if e["dst"] in nodes:
|
|
139
|
+
node_subset.add(e["dst"])
|
|
140
|
+
return {"nodes": sorted(node_subset), "edges": edge_subset}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _esc(text: str) -> str:
|
|
144
|
+
return text.replace('"', '\\"')
|