cortexcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cortexcode/context.py ADDED
@@ -0,0 +1,298 @@
1
+ """Context Provider - Get relevant context for AI assistants."""
2
+
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ # Try to use tiktoken for accurate token counting
9
+ _tiktoken_encoder = None
10
+ try:
11
+ import tiktoken
12
+ _tiktoken_encoder = tiktoken.encoding_for_model("gpt-4")
13
+ except ImportError:
14
+ pass
15
+
16
+
17
+ def estimate_tokens(text: str) -> int:
18
+ """Estimate token count. Uses tiktoken if available, else ~4 chars/token heuristic."""
19
+ if _tiktoken_encoder:
20
+ return len(_tiktoken_encoder.encode(text))
21
+ return max(1, len(text) // 4)
22
+
23
+
24
+ def estimate_file_tokens(file_path: Path) -> int:
25
+ """Estimate tokens for an entire file."""
26
+ try:
27
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
28
+ return estimate_tokens(content)
29
+ except OSError:
30
+ return 0
31
+
32
+
33
+ def calculate_token_savings(index_path: Path, query: str | None = None, num_results: int = 5) -> dict[str, Any]:
34
+ """Calculate how many tokens CortexCode saves vs reading raw files.
35
+
36
+ Returns:
37
+ Dictionary with token counts for raw files vs indexed context
38
+ """
39
+ index = json.loads(index_path.read_text(encoding="utf-8"))
40
+ files = index.get("files", {})
41
+ project_root = Path(index.get("project_root", "."))
42
+
43
+ # Calculate total project tokens (all source files)
44
+ total_raw_tokens = 0
45
+ file_count = 0
46
+ for rel_path in files:
47
+ full_path = project_root / rel_path
48
+ if full_path.exists():
49
+ total_raw_tokens += estimate_file_tokens(full_path)
50
+ file_count += 1
51
+
52
+ # Calculate index size in tokens
53
+ index_text = index_path.read_text(encoding="utf-8")
54
+ index_tokens = estimate_tokens(index_text)
55
+
56
+ # Calculate context output tokens
57
+ result = get_context(index_path, query, num_results)
58
+ context_text = json.dumps(result, indent=2)
59
+ context_tokens = estimate_tokens(context_text)
60
+
61
+ savings_vs_raw = total_raw_tokens - context_tokens
62
+ savings_pct = (savings_vs_raw / total_raw_tokens * 100) if total_raw_tokens > 0 else 0
63
+
64
+ return {
65
+ "raw_project_tokens": total_raw_tokens,
66
+ "index_tokens": index_tokens,
67
+ "context_tokens": context_tokens,
68
+ "savings_tokens": savings_vs_raw,
69
+ "savings_percent": round(savings_pct, 1),
70
+ "file_count": file_count,
71
+ "compression_ratio": round(total_raw_tokens / context_tokens, 1) if context_tokens > 0 else 0,
72
+ }
73
+
74
+
75
+ def get_context(index_path: Path, query: str | None = None, num_results: int = 5) -> dict[str, Any]:
76
+ """Get relevant context from the index for AI assistants.
77
+
78
+ Args:
79
+ index_path: Path to the index.json file
80
+ query: Optional search query. Supports:
81
+ - symbol name: "handleAuth"
82
+ - file-scoped: "auth.ts:handleAuth"
83
+ - fuzzy: "hndlAuth"
84
+ num_results: Number of results to return
85
+
86
+ Returns:
87
+ Dictionary with relevant symbols and their relationships
88
+ """
89
+ index = json.loads(index_path.read_text(encoding="utf-8"))
90
+
91
+ files = index.get("files", {})
92
+ call_graph = index.get("call_graph", {})
93
+ file_deps = index.get("file_dependencies", {})
94
+ project_root = index.get("project_root", "")
95
+
96
+ if not query:
97
+ return _get_all_symbols(files, call_graph, num_results)
98
+
99
+ # Parse file-scoped query (e.g. "auth.ts:handleAuth")
100
+ file_filter = None
101
+ query_lower = query.lower()
102
+ if ":" in query and not query.startswith(":"):
103
+ parts = query.split(":", 1)
104
+ file_filter = parts[0].lower()
105
+ query_lower = parts[1].lower() if parts[1] else ""
106
+
107
+ results = []
108
+
109
+ for rel_path, file_data in files.items():
110
+ # Apply file filter
111
+ if file_filter and file_filter not in rel_path.lower():
112
+ continue
113
+
114
+ symbols = file_data.get("symbols", []) if isinstance(file_data, dict) else file_data
115
+ imports = file_data.get("imports", []) if isinstance(file_data, dict) else []
116
+
117
+ for sym in symbols:
118
+ # If file-scoped with no symbol query, return all symbols in that file
119
+ if file_filter and not query_lower:
120
+ result = _build_symbol_result(sym, rel_path, call_graph)
121
+ results.append(result)
122
+ continue
123
+
124
+ if _matches_query(sym, query_lower):
125
+ result = _build_symbol_result(sym, rel_path, call_graph)
126
+ results.append(result)
127
+
128
+ if query_lower:
129
+ for imp in imports:
130
+ if query_lower in imp.get("module", "").lower():
131
+ results.append({
132
+ "name": imp.get("module"),
133
+ "type": "import",
134
+ "file": rel_path,
135
+ "imported": imp.get("imported", []),
136
+ })
137
+
138
+ results = _rank_results(results, call_graph, query_lower)
139
+
140
+ response = {
141
+ "query": query,
142
+ "symbols": results[:num_results],
143
+ "total_found": len(results),
144
+ }
145
+
146
+ # Add file dependency info if file-scoped
147
+ if file_filter:
148
+ for rel_path in files:
149
+ if file_filter in rel_path.lower():
150
+ deps = file_deps.get(rel_path, [])
151
+ if deps:
152
+ response["file_dependencies"] = deps
153
+ break
154
+
155
+ return response
156
+
157
+
158
+ def _build_symbol_result(sym: dict, rel_path: str, call_graph: dict) -> dict:
159
+ """Build a context result dict for a symbol."""
160
+ result = {
161
+ "name": sym.get("name"),
162
+ "type": sym.get("type"),
163
+ "file": rel_path,
164
+ "line": sym.get("line"),
165
+ "params": sym.get("params", []),
166
+ "calls": sym.get("calls", []),
167
+ "class": sym.get("class"),
168
+ "framework": sym.get("framework"),
169
+ }
170
+
171
+ if sym.get("return_type"):
172
+ result["return_type"] = sym["return_type"]
173
+
174
+ if sym.get("methods"):
175
+ result["methods"] = [m.get("name") for m in sym["methods"]]
176
+
177
+ callers = [name for name, calls in call_graph.items() if sym.get("name") in calls]
178
+ if callers:
179
+ result["called_by"] = callers[:5]
180
+
181
+ return result
182
+
183
+
184
+ def _get_all_symbols(files: dict, call_graph: dict, limit: int) -> dict[str, Any]:
185
+ """Get all symbols, limited."""
186
+ all_symbols = []
187
+
188
+ for rel_path, file_data in files.items():
189
+ symbols = file_data.get("symbols", []) if isinstance(file_data, dict) else file_data
190
+ for sym in symbols:
191
+ all_symbols.append({
192
+ "name": sym.get("name"),
193
+ "type": sym.get("type"),
194
+ "file": rel_path,
195
+ "line": sym.get("line"),
196
+ "params": sym.get("params", []),
197
+ "calls": sym.get("calls", []),
198
+ })
199
+
200
+ return {
201
+ "symbols": all_symbols[:limit],
202
+ "total_found": len(all_symbols),
203
+ }
204
+
205
+
206
+ def _matches_query(symbol: dict, query: str) -> bool:
207
+ """Check if symbol matches the query (supports fuzzy and file-scoped)."""
208
+ name = symbol.get("name", "").lower()
209
+
210
+ # Exact or substring match on name
211
+ if query in name:
212
+ return True
213
+
214
+ # Fuzzy: all query chars appear in order in name
215
+ if len(query) >= 3 and _fuzzy_match(query, name):
216
+ return True
217
+
218
+ # Match on calls
219
+ calls = symbol.get("calls", [])
220
+ for call in calls:
221
+ if query in call.lower():
222
+ return True
223
+
224
+ # Match on class
225
+ symbol_class = symbol.get("class")
226
+ if symbol_class and query in symbol_class.lower():
227
+ return True
228
+
229
+ # Match on params
230
+ params = symbol.get("params", [])
231
+ for param in params:
232
+ if query in param.lower():
233
+ return True
234
+
235
+ return False
236
+
237
+
238
+ def _fuzzy_match(query: str, target: str) -> bool:
239
+ """Check if all characters in query appear in order in target."""
240
+ qi = 0
241
+ for ch in target:
242
+ if qi < len(query) and ch == query[qi]:
243
+ qi += 1
244
+ return qi == len(query)
245
+
246
+
247
+ def _rank_results(results: list[dict], call_graph: dict, query: str) -> list[dict]:
248
+ """Rank results by relevance."""
249
+ def relevance_score(result: dict) -> int:
250
+ score = 0
251
+ name = result.get("name", "").lower()
252
+
253
+ if name == query:
254
+ score += 100
255
+ elif name.startswith(query):
256
+ score += 50
257
+ elif query in name:
258
+ score += 10
259
+
260
+ # Boost symbols that have callers (more connected = more important)
261
+ if result.get("called_by"):
262
+ score += len(result["called_by"]) * 5
263
+
264
+ # Boost symbols that make calls (entry points)
265
+ if result.get("calls"):
266
+ score += min(len(result["calls"]), 5) * 2
267
+
268
+ # Boost functions/classes over imports
269
+ sym_type = result.get("type", "")
270
+ if sym_type == "class":
271
+ score += 15
272
+ elif sym_type in ("function", "method"):
273
+ score += 10
274
+ elif sym_type == "interface":
275
+ score += 5
276
+
277
+ return score
278
+
279
+ return sorted(results, key=relevance_score, reverse=True)
280
+
281
+
282
+ def format_context_for_ai(result: dict) -> str:
283
+ """Format context as a text block suitable for pasting into AI chat."""
284
+ lines = ["## Relevant Code Context\n"]
285
+
286
+ for sym in result.get("symbols", []):
287
+ lines.append(f"### {sym['name']} ({sym.get('type', 'unknown')})")
288
+ lines.append(f"**File:** `{sym.get('file', 'unknown')}:{sym.get('line', '?')}`")
289
+
290
+ if sym.get("params"):
291
+ lines.append(f"**Params:** {', '.join(sym['params'])}")
292
+
293
+ if sym.get("calls"):
294
+ lines.append(f"**Calls:** {', '.join(sym['calls'])}")
295
+
296
+ lines.append("")
297
+
298
+ return "\n".join(lines)
@@ -0,0 +1,152 @@
1
+ """Live dashboard server with auto-refresh on file changes."""
2
+
3
+ import json
4
+ import http.server
5
+ import threading
6
+ import hashlib
7
+ import time
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+
12
+ class DashboardServer:
13
+ """HTTP server that serves the HTML report and auto-regenerates on index changes."""
14
+
15
+ def __init__(self, project_path: Path, port: int = 8787):
16
+ self.project_path = project_path.resolve()
17
+ self.port = port
18
+ self.docs_dir = self.project_path / ".cortexcode" / "docs"
19
+ self.index_path = self.project_path / ".cortexcode" / "index.json"
20
+ self._last_hash: str = ""
21
+ self._running = False
22
+ self._server: http.server.HTTPServer | None = None
23
+ self._poll_interval = 2.0 # seconds
24
+
25
+ def _get_index_hash(self) -> str:
26
+ """Get hash of the index file for change detection."""
27
+ try:
28
+ return hashlib.md5(self.index_path.read_bytes()).hexdigest()
29
+ except OSError:
30
+ return ""
31
+
32
+ def _regenerate_docs(self) -> bool:
33
+ """Regenerate HTML docs from the current index."""
34
+ try:
35
+ from cortexcode.docs import generate_all_docs
36
+ generate_all_docs(self.index_path, self.docs_dir)
37
+ return True
38
+ except Exception as e:
39
+ print(f"Failed to regenerate docs: {e}")
40
+ return False
41
+
42
+ def _ensure_docs(self) -> bool:
43
+ """Ensure docs exist, generating if needed."""
44
+ html_file = self.docs_dir / "index.html"
45
+ if not html_file.exists():
46
+ return self._regenerate_docs()
47
+ return True
48
+
49
+ def _inject_auto_refresh(self) -> None:
50
+ """Inject auto-refresh script into the HTML report."""
51
+ html_file = self.docs_dir / "index.html"
52
+ if not html_file.exists():
53
+ return
54
+
55
+ content = html_file.read_text(encoding="utf-8")
56
+
57
+ # Don't inject twice
58
+ if "cortexcode-auto-refresh" in content:
59
+ return
60
+
61
+ refresh_script = f"""
62
+ <script id="cortexcode-auto-refresh">
63
+ (function() {{
64
+ let lastHash = '';
65
+ async function checkForUpdates() {{
66
+ try {{
67
+ const resp = await fetch('/__cortexcode_hash');
68
+ const hash = await resp.text();
69
+ if (lastHash && hash !== lastHash) {{
70
+ console.log('Index changed, reloading...');
71
+ location.reload();
72
+ }}
73
+ lastHash = hash;
74
+ }} catch(e) {{}}
75
+ }}
76
+ setInterval(checkForUpdates, {int(self._poll_interval * 1000)});
77
+ checkForUpdates();
78
+ }})();
79
+ </script>
80
+ """
81
+ content = content.replace("</body>", f"{refresh_script}</body>")
82
+ html_file.write_text(content, encoding="utf-8")
83
+
84
+ def _poll_for_changes(self) -> None:
85
+ """Background thread that watches for index changes and regenerates docs."""
86
+ while self._running:
87
+ time.sleep(self._poll_interval)
88
+ current_hash = self._get_index_hash()
89
+ if current_hash and current_hash != self._last_hash:
90
+ self._last_hash = current_hash
91
+ self._regenerate_docs()
92
+ self._inject_auto_refresh()
93
+
94
+ def start(self, open_browser: bool = True) -> None:
95
+ """Start the dashboard server."""
96
+ if not self.index_path.exists():
97
+ raise FileNotFoundError(f"No index found at {self.index_path}. Run `cortexcode index` first.")
98
+
99
+ self._ensure_docs()
100
+ self._last_hash = self._get_index_hash()
101
+ self._inject_auto_refresh()
102
+
103
+ docs_dir = str(self.docs_dir)
104
+ current_hash_ref = [self._last_hash]
105
+
106
+ # Keep a reference for the hash endpoint
107
+ server_self = self
108
+
109
+ class Handler(http.server.SimpleHTTPRequestHandler):
110
+ def __init__(self, *args, **kwargs):
111
+ super().__init__(*args, directory=docs_dir, **kwargs)
112
+
113
+ def do_GET(self):
114
+ if self.path == "/__cortexcode_hash":
115
+ h = server_self._get_index_hash()
116
+ self.send_response(200)
117
+ self.send_header("Content-Type", "text/plain")
118
+ self.send_header("Access-Control-Allow-Origin", "*")
119
+ self.end_headers()
120
+ self.wfile.write(h.encode())
121
+ return
122
+ return super().do_GET()
123
+
124
+ def log_message(self, format, *args):
125
+ pass # Suppress logs
126
+
127
+ self._server = http.server.HTTPServer(("0.0.0.0", self.port), Handler)
128
+ self._running = True
129
+
130
+ # Start polling thread
131
+ poll_thread = threading.Thread(target=self._poll_for_changes, daemon=True)
132
+ poll_thread.start()
133
+
134
+ # Open browser
135
+ if open_browser:
136
+ import webbrowser
137
+ threading.Timer(0.5, lambda: webbrowser.open(f"http://localhost:{self.port}")).start()
138
+
139
+ try:
140
+ self._server.serve_forever()
141
+ except KeyboardInterrupt:
142
+ pass
143
+ finally:
144
+ self._running = False
145
+ if self._server:
146
+ self._server.shutdown()
147
+
148
+ def stop(self) -> None:
149
+ """Stop the dashboard server."""
150
+ self._running = False
151
+ if self._server:
152
+ self._server.shutdown()