cortexcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortexcode/__init__.py +3 -0
- cortexcode/analysis.py +331 -0
- cortexcode/cli.py +845 -0
- cortexcode/context.py +298 -0
- cortexcode/dashboard.py +152 -0
- cortexcode/docs.py +1266 -0
- cortexcode/git_diff.py +157 -0
- cortexcode/indexer.py +1860 -0
- cortexcode/lsp_server.py +315 -0
- cortexcode/mcp_server.py +455 -0
- cortexcode/plugins.py +188 -0
- cortexcode/semantic_search.py +237 -0
- cortexcode/vuln_scan.py +241 -0
- cortexcode/watcher.py +122 -0
- cortexcode/workspace.py +180 -0
- cortexcode-0.1.0.dist-info/METADATA +448 -0
- cortexcode-0.1.0.dist-info/RECORD +21 -0
- cortexcode-0.1.0.dist-info/WHEEL +5 -0
- cortexcode-0.1.0.dist-info/entry_points.txt +2 -0
- cortexcode-0.1.0.dist-info/licenses/LICENSE +21 -0
- cortexcode-0.1.0.dist-info/top_level.txt +1 -0
cortexcode/context.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""Context Provider - Get relevant context for AI assistants."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
# Try to use tiktoken for accurate token counting
|
|
9
|
+
_tiktoken_encoder = None
|
|
10
|
+
try:
|
|
11
|
+
import tiktoken
|
|
12
|
+
_tiktoken_encoder = tiktoken.encoding_for_model("gpt-4")
|
|
13
|
+
except ImportError:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def estimate_tokens(text: str) -> int:
|
|
18
|
+
"""Estimate token count. Uses tiktoken if available, else ~4 chars/token heuristic."""
|
|
19
|
+
if _tiktoken_encoder:
|
|
20
|
+
return len(_tiktoken_encoder.encode(text))
|
|
21
|
+
return max(1, len(text) // 4)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def estimate_file_tokens(file_path: Path) -> int:
|
|
25
|
+
"""Estimate tokens for an entire file."""
|
|
26
|
+
try:
|
|
27
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
28
|
+
return estimate_tokens(content)
|
|
29
|
+
except OSError:
|
|
30
|
+
return 0
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def calculate_token_savings(index_path: Path, query: str | None = None, num_results: int = 5) -> dict[str, Any]:
|
|
34
|
+
"""Calculate how many tokens CortexCode saves vs reading raw files.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
Dictionary with token counts for raw files vs indexed context
|
|
38
|
+
"""
|
|
39
|
+
index = json.loads(index_path.read_text(encoding="utf-8"))
|
|
40
|
+
files = index.get("files", {})
|
|
41
|
+
project_root = Path(index.get("project_root", "."))
|
|
42
|
+
|
|
43
|
+
# Calculate total project tokens (all source files)
|
|
44
|
+
total_raw_tokens = 0
|
|
45
|
+
file_count = 0
|
|
46
|
+
for rel_path in files:
|
|
47
|
+
full_path = project_root / rel_path
|
|
48
|
+
if full_path.exists():
|
|
49
|
+
total_raw_tokens += estimate_file_tokens(full_path)
|
|
50
|
+
file_count += 1
|
|
51
|
+
|
|
52
|
+
# Calculate index size in tokens
|
|
53
|
+
index_text = index_path.read_text(encoding="utf-8")
|
|
54
|
+
index_tokens = estimate_tokens(index_text)
|
|
55
|
+
|
|
56
|
+
# Calculate context output tokens
|
|
57
|
+
result = get_context(index_path, query, num_results)
|
|
58
|
+
context_text = json.dumps(result, indent=2)
|
|
59
|
+
context_tokens = estimate_tokens(context_text)
|
|
60
|
+
|
|
61
|
+
savings_vs_raw = total_raw_tokens - context_tokens
|
|
62
|
+
savings_pct = (savings_vs_raw / total_raw_tokens * 100) if total_raw_tokens > 0 else 0
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
"raw_project_tokens": total_raw_tokens,
|
|
66
|
+
"index_tokens": index_tokens,
|
|
67
|
+
"context_tokens": context_tokens,
|
|
68
|
+
"savings_tokens": savings_vs_raw,
|
|
69
|
+
"savings_percent": round(savings_pct, 1),
|
|
70
|
+
"file_count": file_count,
|
|
71
|
+
"compression_ratio": round(total_raw_tokens / context_tokens, 1) if context_tokens > 0 else 0,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def get_context(index_path: Path, query: str | None = None, num_results: int = 5) -> dict[str, Any]:
|
|
76
|
+
"""Get relevant context from the index for AI assistants.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
index_path: Path to the index.json file
|
|
80
|
+
query: Optional search query. Supports:
|
|
81
|
+
- symbol name: "handleAuth"
|
|
82
|
+
- file-scoped: "auth.ts:handleAuth"
|
|
83
|
+
- fuzzy: "hndlAuth"
|
|
84
|
+
num_results: Number of results to return
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
Dictionary with relevant symbols and their relationships
|
|
88
|
+
"""
|
|
89
|
+
index = json.loads(index_path.read_text(encoding="utf-8"))
|
|
90
|
+
|
|
91
|
+
files = index.get("files", {})
|
|
92
|
+
call_graph = index.get("call_graph", {})
|
|
93
|
+
file_deps = index.get("file_dependencies", {})
|
|
94
|
+
project_root = index.get("project_root", "")
|
|
95
|
+
|
|
96
|
+
if not query:
|
|
97
|
+
return _get_all_symbols(files, call_graph, num_results)
|
|
98
|
+
|
|
99
|
+
# Parse file-scoped query (e.g. "auth.ts:handleAuth")
|
|
100
|
+
file_filter = None
|
|
101
|
+
query_lower = query.lower()
|
|
102
|
+
if ":" in query and not query.startswith(":"):
|
|
103
|
+
parts = query.split(":", 1)
|
|
104
|
+
file_filter = parts[0].lower()
|
|
105
|
+
query_lower = parts[1].lower() if parts[1] else ""
|
|
106
|
+
|
|
107
|
+
results = []
|
|
108
|
+
|
|
109
|
+
for rel_path, file_data in files.items():
|
|
110
|
+
# Apply file filter
|
|
111
|
+
if file_filter and file_filter not in rel_path.lower():
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
symbols = file_data.get("symbols", []) if isinstance(file_data, dict) else file_data
|
|
115
|
+
imports = file_data.get("imports", []) if isinstance(file_data, dict) else []
|
|
116
|
+
|
|
117
|
+
for sym in symbols:
|
|
118
|
+
# If file-scoped with no symbol query, return all symbols in that file
|
|
119
|
+
if file_filter and not query_lower:
|
|
120
|
+
result = _build_symbol_result(sym, rel_path, call_graph)
|
|
121
|
+
results.append(result)
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if _matches_query(sym, query_lower):
|
|
125
|
+
result = _build_symbol_result(sym, rel_path, call_graph)
|
|
126
|
+
results.append(result)
|
|
127
|
+
|
|
128
|
+
if query_lower:
|
|
129
|
+
for imp in imports:
|
|
130
|
+
if query_lower in imp.get("module", "").lower():
|
|
131
|
+
results.append({
|
|
132
|
+
"name": imp.get("module"),
|
|
133
|
+
"type": "import",
|
|
134
|
+
"file": rel_path,
|
|
135
|
+
"imported": imp.get("imported", []),
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
results = _rank_results(results, call_graph, query_lower)
|
|
139
|
+
|
|
140
|
+
response = {
|
|
141
|
+
"query": query,
|
|
142
|
+
"symbols": results[:num_results],
|
|
143
|
+
"total_found": len(results),
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
# Add file dependency info if file-scoped
|
|
147
|
+
if file_filter:
|
|
148
|
+
for rel_path in files:
|
|
149
|
+
if file_filter in rel_path.lower():
|
|
150
|
+
deps = file_deps.get(rel_path, [])
|
|
151
|
+
if deps:
|
|
152
|
+
response["file_dependencies"] = deps
|
|
153
|
+
break
|
|
154
|
+
|
|
155
|
+
return response
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _build_symbol_result(sym: dict, rel_path: str, call_graph: dict) -> dict:
|
|
159
|
+
"""Build a context result dict for a symbol."""
|
|
160
|
+
result = {
|
|
161
|
+
"name": sym.get("name"),
|
|
162
|
+
"type": sym.get("type"),
|
|
163
|
+
"file": rel_path,
|
|
164
|
+
"line": sym.get("line"),
|
|
165
|
+
"params": sym.get("params", []),
|
|
166
|
+
"calls": sym.get("calls", []),
|
|
167
|
+
"class": sym.get("class"),
|
|
168
|
+
"framework": sym.get("framework"),
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if sym.get("return_type"):
|
|
172
|
+
result["return_type"] = sym["return_type"]
|
|
173
|
+
|
|
174
|
+
if sym.get("methods"):
|
|
175
|
+
result["methods"] = [m.get("name") for m in sym["methods"]]
|
|
176
|
+
|
|
177
|
+
callers = [name for name, calls in call_graph.items() if sym.get("name") in calls]
|
|
178
|
+
if callers:
|
|
179
|
+
result["called_by"] = callers[:5]
|
|
180
|
+
|
|
181
|
+
return result
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _get_all_symbols(files: dict, call_graph: dict, limit: int) -> dict[str, Any]:
|
|
185
|
+
"""Get all symbols, limited."""
|
|
186
|
+
all_symbols = []
|
|
187
|
+
|
|
188
|
+
for rel_path, file_data in files.items():
|
|
189
|
+
symbols = file_data.get("symbols", []) if isinstance(file_data, dict) else file_data
|
|
190
|
+
for sym in symbols:
|
|
191
|
+
all_symbols.append({
|
|
192
|
+
"name": sym.get("name"),
|
|
193
|
+
"type": sym.get("type"),
|
|
194
|
+
"file": rel_path,
|
|
195
|
+
"line": sym.get("line"),
|
|
196
|
+
"params": sym.get("params", []),
|
|
197
|
+
"calls": sym.get("calls", []),
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
"symbols": all_symbols[:limit],
|
|
202
|
+
"total_found": len(all_symbols),
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _matches_query(symbol: dict, query: str) -> bool:
|
|
207
|
+
"""Check if symbol matches the query (supports fuzzy and file-scoped)."""
|
|
208
|
+
name = symbol.get("name", "").lower()
|
|
209
|
+
|
|
210
|
+
# Exact or substring match on name
|
|
211
|
+
if query in name:
|
|
212
|
+
return True
|
|
213
|
+
|
|
214
|
+
# Fuzzy: all query chars appear in order in name
|
|
215
|
+
if len(query) >= 3 and _fuzzy_match(query, name):
|
|
216
|
+
return True
|
|
217
|
+
|
|
218
|
+
# Match on calls
|
|
219
|
+
calls = symbol.get("calls", [])
|
|
220
|
+
for call in calls:
|
|
221
|
+
if query in call.lower():
|
|
222
|
+
return True
|
|
223
|
+
|
|
224
|
+
# Match on class
|
|
225
|
+
symbol_class = symbol.get("class")
|
|
226
|
+
if symbol_class and query in symbol_class.lower():
|
|
227
|
+
return True
|
|
228
|
+
|
|
229
|
+
# Match on params
|
|
230
|
+
params = symbol.get("params", [])
|
|
231
|
+
for param in params:
|
|
232
|
+
if query in param.lower():
|
|
233
|
+
return True
|
|
234
|
+
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _fuzzy_match(query: str, target: str) -> bool:
|
|
239
|
+
"""Check if all characters in query appear in order in target."""
|
|
240
|
+
qi = 0
|
|
241
|
+
for ch in target:
|
|
242
|
+
if qi < len(query) and ch == query[qi]:
|
|
243
|
+
qi += 1
|
|
244
|
+
return qi == len(query)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _rank_results(results: list[dict], call_graph: dict, query: str) -> list[dict]:
|
|
248
|
+
"""Rank results by relevance."""
|
|
249
|
+
def relevance_score(result: dict) -> int:
|
|
250
|
+
score = 0
|
|
251
|
+
name = result.get("name", "").lower()
|
|
252
|
+
|
|
253
|
+
if name == query:
|
|
254
|
+
score += 100
|
|
255
|
+
elif name.startswith(query):
|
|
256
|
+
score += 50
|
|
257
|
+
elif query in name:
|
|
258
|
+
score += 10
|
|
259
|
+
|
|
260
|
+
# Boost symbols that have callers (more connected = more important)
|
|
261
|
+
if result.get("called_by"):
|
|
262
|
+
score += len(result["called_by"]) * 5
|
|
263
|
+
|
|
264
|
+
# Boost symbols that make calls (entry points)
|
|
265
|
+
if result.get("calls"):
|
|
266
|
+
score += min(len(result["calls"]), 5) * 2
|
|
267
|
+
|
|
268
|
+
# Boost functions/classes over imports
|
|
269
|
+
sym_type = result.get("type", "")
|
|
270
|
+
if sym_type == "class":
|
|
271
|
+
score += 15
|
|
272
|
+
elif sym_type in ("function", "method"):
|
|
273
|
+
score += 10
|
|
274
|
+
elif sym_type == "interface":
|
|
275
|
+
score += 5
|
|
276
|
+
|
|
277
|
+
return score
|
|
278
|
+
|
|
279
|
+
return sorted(results, key=relevance_score, reverse=True)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def format_context_for_ai(result: dict) -> str:
|
|
283
|
+
"""Format context as a text block suitable for pasting into AI chat."""
|
|
284
|
+
lines = ["## Relevant Code Context\n"]
|
|
285
|
+
|
|
286
|
+
for sym in result.get("symbols", []):
|
|
287
|
+
lines.append(f"### {sym['name']} ({sym.get('type', 'unknown')})")
|
|
288
|
+
lines.append(f"**File:** `{sym.get('file', 'unknown')}:{sym.get('line', '?')}`")
|
|
289
|
+
|
|
290
|
+
if sym.get("params"):
|
|
291
|
+
lines.append(f"**Params:** {', '.join(sym['params'])}")
|
|
292
|
+
|
|
293
|
+
if sym.get("calls"):
|
|
294
|
+
lines.append(f"**Calls:** {', '.join(sym['calls'])}")
|
|
295
|
+
|
|
296
|
+
lines.append("")
|
|
297
|
+
|
|
298
|
+
return "\n".join(lines)
|
cortexcode/dashboard.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Live dashboard server with auto-refresh on file changes."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import http.server
|
|
5
|
+
import threading
|
|
6
|
+
import hashlib
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DashboardServer:
|
|
13
|
+
"""HTTP server that serves the HTML report and auto-regenerates on index changes."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, project_path: Path, port: int = 8787):
|
|
16
|
+
self.project_path = project_path.resolve()
|
|
17
|
+
self.port = port
|
|
18
|
+
self.docs_dir = self.project_path / ".cortexcode" / "docs"
|
|
19
|
+
self.index_path = self.project_path / ".cortexcode" / "index.json"
|
|
20
|
+
self._last_hash: str = ""
|
|
21
|
+
self._running = False
|
|
22
|
+
self._server: http.server.HTTPServer | None = None
|
|
23
|
+
self._poll_interval = 2.0 # seconds
|
|
24
|
+
|
|
25
|
+
def _get_index_hash(self) -> str:
|
|
26
|
+
"""Get hash of the index file for change detection."""
|
|
27
|
+
try:
|
|
28
|
+
return hashlib.md5(self.index_path.read_bytes()).hexdigest()
|
|
29
|
+
except OSError:
|
|
30
|
+
return ""
|
|
31
|
+
|
|
32
|
+
def _regenerate_docs(self) -> bool:
|
|
33
|
+
"""Regenerate HTML docs from the current index."""
|
|
34
|
+
try:
|
|
35
|
+
from cortexcode.docs import generate_all_docs
|
|
36
|
+
generate_all_docs(self.index_path, self.docs_dir)
|
|
37
|
+
return True
|
|
38
|
+
except Exception as e:
|
|
39
|
+
print(f"Failed to regenerate docs: {e}")
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
def _ensure_docs(self) -> bool:
|
|
43
|
+
"""Ensure docs exist, generating if needed."""
|
|
44
|
+
html_file = self.docs_dir / "index.html"
|
|
45
|
+
if not html_file.exists():
|
|
46
|
+
return self._regenerate_docs()
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
def _inject_auto_refresh(self) -> None:
|
|
50
|
+
"""Inject auto-refresh script into the HTML report."""
|
|
51
|
+
html_file = self.docs_dir / "index.html"
|
|
52
|
+
if not html_file.exists():
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
content = html_file.read_text(encoding="utf-8")
|
|
56
|
+
|
|
57
|
+
# Don't inject twice
|
|
58
|
+
if "cortexcode-auto-refresh" in content:
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
refresh_script = f"""
|
|
62
|
+
<script id="cortexcode-auto-refresh">
|
|
63
|
+
(function() {{
|
|
64
|
+
let lastHash = '';
|
|
65
|
+
async function checkForUpdates() {{
|
|
66
|
+
try {{
|
|
67
|
+
const resp = await fetch('/__cortexcode_hash');
|
|
68
|
+
const hash = await resp.text();
|
|
69
|
+
if (lastHash && hash !== lastHash) {{
|
|
70
|
+
console.log('Index changed, reloading...');
|
|
71
|
+
location.reload();
|
|
72
|
+
}}
|
|
73
|
+
lastHash = hash;
|
|
74
|
+
}} catch(e) {{}}
|
|
75
|
+
}}
|
|
76
|
+
setInterval(checkForUpdates, {int(self._poll_interval * 1000)});
|
|
77
|
+
checkForUpdates();
|
|
78
|
+
}})();
|
|
79
|
+
</script>
|
|
80
|
+
"""
|
|
81
|
+
content = content.replace("</body>", f"{refresh_script}</body>")
|
|
82
|
+
html_file.write_text(content, encoding="utf-8")
|
|
83
|
+
|
|
84
|
+
def _poll_for_changes(self) -> None:
|
|
85
|
+
"""Background thread that watches for index changes and regenerates docs."""
|
|
86
|
+
while self._running:
|
|
87
|
+
time.sleep(self._poll_interval)
|
|
88
|
+
current_hash = self._get_index_hash()
|
|
89
|
+
if current_hash and current_hash != self._last_hash:
|
|
90
|
+
self._last_hash = current_hash
|
|
91
|
+
self._regenerate_docs()
|
|
92
|
+
self._inject_auto_refresh()
|
|
93
|
+
|
|
94
|
+
def start(self, open_browser: bool = True) -> None:
|
|
95
|
+
"""Start the dashboard server."""
|
|
96
|
+
if not self.index_path.exists():
|
|
97
|
+
raise FileNotFoundError(f"No index found at {self.index_path}. Run `cortexcode index` first.")
|
|
98
|
+
|
|
99
|
+
self._ensure_docs()
|
|
100
|
+
self._last_hash = self._get_index_hash()
|
|
101
|
+
self._inject_auto_refresh()
|
|
102
|
+
|
|
103
|
+
docs_dir = str(self.docs_dir)
|
|
104
|
+
current_hash_ref = [self._last_hash]
|
|
105
|
+
|
|
106
|
+
# Keep a reference for the hash endpoint
|
|
107
|
+
server_self = self
|
|
108
|
+
|
|
109
|
+
class Handler(http.server.SimpleHTTPRequestHandler):
|
|
110
|
+
def __init__(self, *args, **kwargs):
|
|
111
|
+
super().__init__(*args, directory=docs_dir, **kwargs)
|
|
112
|
+
|
|
113
|
+
def do_GET(self):
|
|
114
|
+
if self.path == "/__cortexcode_hash":
|
|
115
|
+
h = server_self._get_index_hash()
|
|
116
|
+
self.send_response(200)
|
|
117
|
+
self.send_header("Content-Type", "text/plain")
|
|
118
|
+
self.send_header("Access-Control-Allow-Origin", "*")
|
|
119
|
+
self.end_headers()
|
|
120
|
+
self.wfile.write(h.encode())
|
|
121
|
+
return
|
|
122
|
+
return super().do_GET()
|
|
123
|
+
|
|
124
|
+
def log_message(self, format, *args):
|
|
125
|
+
pass # Suppress logs
|
|
126
|
+
|
|
127
|
+
self._server = http.server.HTTPServer(("0.0.0.0", self.port), Handler)
|
|
128
|
+
self._running = True
|
|
129
|
+
|
|
130
|
+
# Start polling thread
|
|
131
|
+
poll_thread = threading.Thread(target=self._poll_for_changes, daemon=True)
|
|
132
|
+
poll_thread.start()
|
|
133
|
+
|
|
134
|
+
# Open browser
|
|
135
|
+
if open_browser:
|
|
136
|
+
import webbrowser
|
|
137
|
+
threading.Timer(0.5, lambda: webbrowser.open(f"http://localhost:{self.port}")).start()
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
self._server.serve_forever()
|
|
141
|
+
except KeyboardInterrupt:
|
|
142
|
+
pass
|
|
143
|
+
finally:
|
|
144
|
+
self._running = False
|
|
145
|
+
if self._server:
|
|
146
|
+
self._server.shutdown()
|
|
147
|
+
|
|
148
|
+
def stop(self) -> None:
|
|
149
|
+
"""Stop the dashboard server."""
|
|
150
|
+
self._running = False
|
|
151
|
+
if self._server:
|
|
152
|
+
self._server.shutdown()
|