codedocent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codedocent/scanner.py ADDED
@@ -0,0 +1,135 @@
1
+ """Scan a directory tree and identify source files by language."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ import pathspec
10
+
11
+ EXTENSION_MAP: dict[str, str] = {
12
+ ".py": "python",
13
+ ".js": "javascript",
14
+ ".ts": "typescript",
15
+ ".jsx": "tsx",
16
+ ".tsx": "tsx",
17
+ ".c": "c",
18
+ ".cpp": "cpp",
19
+ ".cc": "cpp",
20
+ ".cxx": "cpp",
21
+ ".h": "c",
22
+ ".hpp": "cpp",
23
+ ".hxx": "cpp",
24
+ ".rs": "rust",
25
+ ".go": "go",
26
+ ".java": "java",
27
+ ".rb": "ruby",
28
+ ".html": "html",
29
+ ".htm": "html",
30
+ ".css": "css",
31
+ ".json": "json",
32
+ ".yaml": "yaml",
33
+ ".yml": "yaml",
34
+ ".toml": "toml",
35
+ ".md": "markdown",
36
+ ".sh": "bash",
37
+ ".bash": "bash",
38
+ ".sql": "sql",
39
+ }
40
+
41
+ SKIP_DIRS: set[str] = {
42
+ ".git",
43
+ "node_modules",
44
+ "__pycache__",
45
+ ".venv",
46
+ "venv",
47
+ ".env",
48
+ "dist",
49
+ "build",
50
+ ".egg-info",
51
+ ".mypy_cache",
52
+ ".pytest_cache",
53
+ ".tox",
54
+ }
55
+
56
+
57
+ @dataclass
58
+ class ScannedFile:
59
+ """A source file discovered during directory scanning."""
60
+
61
+ filepath: str
62
+ language: str
63
+ extension: str
64
+
65
+
66
+ def _is_binary(filepath: str, sample_size: int = 8192) -> bool:
67
+ """Check if a file is binary by looking for null bytes."""
68
+ try:
69
+ with open(filepath, "rb") as f:
70
+ chunk = f.read(sample_size)
71
+ return b"\x00" in chunk
72
+ except OSError:
73
+ return True
74
+
75
+
76
+ def _load_gitignore(root: str) -> pathspec.PathSpec | None:
77
+ """Load .gitignore patterns from the root directory."""
78
+ gitignore_path = os.path.join(root, ".gitignore")
79
+ if not os.path.isfile(gitignore_path):
80
+ return None
81
+ with open(gitignore_path, encoding="utf-8") as f:
82
+ return pathspec.PathSpec.from_lines("gitignore", f)
83
+
84
+
85
+ def _should_skip_dir(dirname: str) -> bool:
86
+ """Check if a directory name matches skip patterns."""
87
+ if dirname in SKIP_DIRS:
88
+ return True
89
+ if dirname.endswith(".egg-info"):
90
+ return True
91
+ return False
92
+
93
+
94
+ def scan_directory(path: str | Path) -> list[ScannedFile]:
95
+ """Walk a directory and return all recognized source files.
96
+
97
+ Skips hidden/build directories, binary files, and .gitignore'd paths.
98
+ Returns results sorted by filepath for deterministic output.
99
+ """
100
+ root = str(Path(path).resolve())
101
+ gitignore = _load_gitignore(root)
102
+ results: list[ScannedFile] = []
103
+
104
+ for dirpath, dirnames, filenames in os.walk(root):
105
+ # Filter out directories we should skip (in-place prunes walk)
106
+ dirnames[:] = [
107
+ d for d in dirnames
108
+ if not _should_skip_dir(d) and not d.startswith(".")
109
+ ]
110
+ dirnames.sort()
111
+
112
+ for filename in filenames:
113
+ filepath = os.path.join(dirpath, filename)
114
+ rel_path = os.path.relpath(filepath, root)
115
+
116
+ # Skip gitignore'd files
117
+ if gitignore and gitignore.match_file(rel_path):
118
+ continue
119
+
120
+ ext = os.path.splitext(filename)[1].lower()
121
+ language = EXTENSION_MAP.get(ext)
122
+ if language is None:
123
+ continue
124
+
125
+ if _is_binary(filepath):
126
+ continue
127
+
128
+ results.append(ScannedFile(
129
+ filepath=rel_path,
130
+ language=language,
131
+ extension=ext,
132
+ ))
133
+
134
+ results.sort(key=lambda f: f.filepath)
135
+ return results
codedocent/server.py ADDED
@@ -0,0 +1,304 @@
1
+ """Localhost server for lazy (on-demand) AI analysis mode."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import signal
7
+ import socketserver
8
+ import threading
9
+ import time
10
+ import webbrowser
11
+ from http.server import BaseHTTPRequestHandler
12
+
13
+ from codedocent.parser import CodeNode
14
+ from codedocent.renderer import LANGUAGE_COLORS, DEFAULT_COLOR, NODE_ICONS
15
+
16
+
17
+ IDLE_TIMEOUT = 300 # 5 minutes
18
+ IDLE_CHECK_INTERVAL = 30 # seconds
19
+
20
+
21
+ def _node_to_dict(node: CodeNode, include_source: bool = False) -> dict:
22
+ """Serialize a CodeNode to a JSON-safe dict.
23
+
24
+ Excludes ``source`` by default (too large for page load).
25
+ Recursively includes children.
26
+ """
27
+ d: dict = {
28
+ "name": node.name,
29
+ "node_type": node.node_type,
30
+ "language": node.language,
31
+ "filepath": node.filepath,
32
+ "start_line": node.start_line,
33
+ "end_line": node.end_line,
34
+ "line_count": node.line_count,
35
+ "node_id": node.node_id,
36
+ "imports": node.imports,
37
+ "summary": node.summary,
38
+ "pseudocode": node.pseudocode,
39
+ "quality": node.quality,
40
+ "warnings": node.warnings,
41
+ "color": (
42
+ LANGUAGE_COLORS.get(node.language, DEFAULT_COLOR)
43
+ if node.language else DEFAULT_COLOR
44
+ ),
45
+ "icon": NODE_ICONS.get(node.node_type, ""),
46
+ "children": [
47
+ _node_to_dict(c, include_source=include_source)
48
+ for c in node.children
49
+ ],
50
+ }
51
+ if include_source:
52
+ d["source"] = node.source
53
+ return d
54
+
55
+
56
+ def _find_open_port(start: int = 8420) -> int:
57
+ """Find an available port starting from *start*."""
58
+ import socket # pylint: disable=import-outside-toplevel
59
+
60
+ for port in range(start, start + 100):
61
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
62
+ try:
63
+ s.bind(("127.0.0.1", port))
64
+ return port
65
+ except OSError:
66
+ continue
67
+ raise RuntimeError("Could not find an open port")
68
+
69
+
70
+ def start_server( # pylint: disable=too-many-locals,too-many-statements
71
+ root: CodeNode,
72
+ node_lookup: dict[str, CodeNode],
73
+ model: str,
74
+ port: int | None = None,
75
+ open_browser: bool = True,
76
+ ) -> None:
77
+ """Start the interactive server.
78
+
79
+ Blocks until shutdown is triggered (via POST /shutdown, idle timeout,
80
+ or Ctrl-C).
81
+ """
82
+ if port is None:
83
+ port = _find_open_port()
84
+
85
+ # Shared state
86
+ analyze_lock = threading.Lock()
87
+ last_request_time = [time.time()]
88
+ cache_dir = root.filepath or "."
89
+
90
+ # Pre-render HTML once
91
+ from codedocent.renderer import render_interactive # pylint: disable=import-outside-toplevel # noqa: E501
92
+
93
+ html_content = render_interactive(root)
94
+
95
+ class Handler(BaseHTTPRequestHandler):
96
+ """HTTP request handler for codedocent server."""
97
+
98
+ def log_message(self, format, *args): # pylint: disable=redefined-builtin # noqa: A002,E501
99
+ pass # silence default logging
100
+
101
+ def _touch(self):
102
+ last_request_time[0] = time.time()
103
+
104
+ def do_GET(self): # pylint: disable=invalid-name
105
+ """Handle GET requests."""
106
+ self._touch()
107
+ if self.path == "/":
108
+ self._serve_html()
109
+ elif self.path == "/api/tree":
110
+ self._serve_tree()
111
+ else:
112
+ self.send_error(404)
113
+
114
+ def do_POST(self): # pylint: disable=invalid-name
115
+ """Handle POST requests."""
116
+ self._touch()
117
+ if self.path == "/shutdown":
118
+ self._handle_shutdown()
119
+ elif self.path.startswith("/api/analyze/"):
120
+ node_id = self.path[len("/api/analyze/"):]
121
+ self._handle_analyze(node_id)
122
+ elif self.path.startswith("/api/replace/"):
123
+ node_id = self.path[len("/api/replace/"):]
124
+ self._handle_replace(node_id)
125
+ else:
126
+ self.send_error(404)
127
+
128
+ def _serve_html(self):
129
+ data = html_content.encode("utf-8")
130
+ self.send_response(200)
131
+ self.send_header("Content-Type", "text/html; charset=utf-8")
132
+ self.send_header("Content-Length", str(len(data)))
133
+ self.end_headers()
134
+ self.wfile.write(data)
135
+
136
+ def _serve_tree(self):
137
+ tree_dict = _node_to_dict(root)
138
+ data = json.dumps(tree_dict).encode("utf-8")
139
+ self.send_response(200)
140
+ self.send_header("Content-Type", "application/json")
141
+ self.send_header("Content-Length", str(len(data)))
142
+ self.end_headers()
143
+ self.wfile.write(data)
144
+
145
+ def _handle_analyze(self, node_id: str):
146
+ if node_id not in node_lookup:
147
+ self.send_error(404, "Unknown node ID")
148
+ return
149
+
150
+ node = node_lookup[node_id]
151
+
152
+ # Return cached result if already analyzed
153
+ if node.summary is not None:
154
+ result = _node_to_dict(node, include_source=True)
155
+ data = json.dumps(result).encode("utf-8")
156
+ self.send_response(200)
157
+ self.send_header("Content-Type", "application/json")
158
+ self.send_header("Content-Length", str(len(data)))
159
+ self.end_headers()
160
+ self.wfile.write(data)
161
+ return
162
+
163
+ # Run analysis (thread-safe)
164
+ with analyze_lock:
165
+ # Double-check after acquiring lock
166
+ if node.summary is None:
167
+ from codedocent.analyzer import analyze_single_node # pylint: disable=import-outside-toplevel # noqa: E501
168
+
169
+ analyze_single_node(node, model, cache_dir)
170
+
171
+ result = _node_to_dict(node, include_source=True)
172
+ data = json.dumps(result).encode("utf-8")
173
+ self.send_response(200)
174
+ self.send_header("Content-Type", "application/json")
175
+ self.send_header("Content-Length", str(len(data)))
176
+ self.end_headers()
177
+ self.wfile.write(data)
178
+
179
+ def _handle_replace(self, node_id: str):
180
+ if node_id not in node_lookup:
181
+ self.send_error(404, "Unknown node ID")
182
+ return
183
+
184
+ node = node_lookup[node_id]
185
+
186
+ if node.node_type in ("directory", "file"):
187
+ self._send_json(
188
+ 400,
189
+ {"success": False,
190
+ "error": "Cannot replace directory/file blocks"},
191
+ )
192
+ return
193
+
194
+ content_length = int(self.headers["Content-Length"])
195
+ body = json.loads(self.rfile.read(content_length))
196
+ new_source = body.get("source", "")
197
+
198
+ if not isinstance(new_source, str):
199
+ self._send_json(
200
+ 400,
201
+ {"success": False, "error": "source must be a string"},
202
+ )
203
+ return
204
+
205
+ # Resolve filepath
206
+ import os as _os # pylint: disable=import-outside-toplevel
207
+
208
+ filepath = node.filepath or ""
209
+ if _os.path.isabs(filepath):
210
+ abs_path = filepath
211
+ else:
212
+ abs_path = _os.path.join(cache_dir, filepath)
213
+
214
+ from codedocent.editor import replace_block_source # pylint: disable=import-outside-toplevel # noqa: E501
215
+
216
+ with analyze_lock:
217
+ result = replace_block_source(
218
+ abs_path, node.start_line, node.end_line, new_source,
219
+ )
220
+
221
+ if result["success"]:
222
+ # Update in-memory node
223
+ new_line_count = result["lines_after"]
224
+ node.source = new_source
225
+ node.line_count = new_line_count
226
+ node.end_line = node.start_line + new_line_count - 1
227
+
228
+ # Clear cached analysis
229
+ node.summary = None
230
+ node.pseudocode = None
231
+ node.quality = None
232
+ node.warnings = None
233
+
234
+ # Invalidate AI cache entry
235
+ from codedocent.analyzer import ( # pylint: disable=import-outside-toplevel # noqa: E501
236
+ _cache_key, _load_cache, _save_cache, CACHE_FILENAME,
237
+ )
238
+
239
+ cache_path = _os.path.join(cache_dir, CACHE_FILENAME)
240
+ cache = _load_cache(cache_path)
241
+ old_key = _cache_key(node)
242
+ cache.get("entries", {}).pop(old_key, None)
243
+ _save_cache(cache_path, cache)
244
+
245
+ self._send_json(200, result)
246
+
247
+ def _send_json(self, status_code: int, obj: dict):
248
+ data = json.dumps(obj).encode("utf-8")
249
+ self.send_response(status_code)
250
+ self.send_header("Content-Type", "application/json")
251
+ self.send_header("Content-Length", str(len(data)))
252
+ self.end_headers()
253
+ self.wfile.write(data)
254
+
255
+ def _handle_shutdown(self):
256
+ self.send_response(200)
257
+ self.send_header("Content-Type", "text/plain")
258
+ self.end_headers()
259
+ self.wfile.write(b"OK")
260
+ # Trigger shutdown in background thread
261
+ threading.Thread(target=server.shutdown, daemon=True).start()
262
+
263
+ server = socketserver.ThreadingTCPServer(("127.0.0.1", port), Handler)
264
+ server.daemon_threads = True
265
+
266
+ # Idle timeout watcher
267
+ def _idle_watcher():
268
+ while True:
269
+ time.sleep(IDLE_CHECK_INTERVAL)
270
+ elapsed = time.time() - last_request_time[0]
271
+ if elapsed >= IDLE_TIMEOUT:
272
+ print("\nIdle timeout reached, shutting down.", flush=True)
273
+ server.shutdown()
274
+ return
275
+
276
+ watcher = threading.Thread(target=_idle_watcher, daemon=True)
277
+ watcher.start()
278
+
279
+ # Signal handler for clean Ctrl-C (only works in main thread)
280
+ original_sigint = None
281
+
282
+ if threading.current_thread() is threading.main_thread():
283
+ original_sigint = signal.getsignal(signal.SIGINT)
284
+
285
+ def _sigint_handler(_signum, _frame):
286
+ print("\nShutting down...", flush=True)
287
+ threading.Thread(target=server.shutdown, daemon=True).start()
288
+
289
+ signal.signal(signal.SIGINT, _sigint_handler)
290
+
291
+ url = f"http://127.0.0.1:{port}"
292
+ print(f"codedocent server running at {url}", flush=True)
293
+ print("Press Ctrl-C to stop.", flush=True)
294
+
295
+ if open_browser:
296
+ webbrowser.open(url)
297
+
298
+ try:
299
+ server.serve_forever()
300
+ finally:
301
+ if original_sigint is not None:
302
+ signal.signal(signal.SIGINT, original_sigint)
303
+ server.server_close()
304
+ print("Server stopped.", flush=True)