codedocent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codedocent/__init__.py +1 -0
- codedocent/__main__.py +4 -0
- codedocent/analyzer.py +620 -0
- codedocent/cli.py +132 -0
- codedocent/editor.py +85 -0
- codedocent/parser.py +369 -0
- codedocent/renderer.py +79 -0
- codedocent/scanner.py +135 -0
- codedocent/server.py +304 -0
- codedocent/templates/base.html +538 -0
- codedocent/templates/interactive.html +1032 -0
- codedocent-0.1.0.dist-info/METADATA +16 -0
- codedocent-0.1.0.dist-info/RECORD +17 -0
- codedocent-0.1.0.dist-info/WHEEL +5 -0
- codedocent-0.1.0.dist-info/entry_points.txt +2 -0
- codedocent-0.1.0.dist-info/licenses/LICENSE +21 -0
- codedocent-0.1.0.dist-info/top_level.txt +1 -0
codedocent/scanner.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Scan a directory tree and identify source files by language."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import pathspec
|
|
10
|
+
|
|
11
|
+
EXTENSION_MAP: dict[str, str] = {
|
|
12
|
+
".py": "python",
|
|
13
|
+
".js": "javascript",
|
|
14
|
+
".ts": "typescript",
|
|
15
|
+
".jsx": "tsx",
|
|
16
|
+
".tsx": "tsx",
|
|
17
|
+
".c": "c",
|
|
18
|
+
".cpp": "cpp",
|
|
19
|
+
".cc": "cpp",
|
|
20
|
+
".cxx": "cpp",
|
|
21
|
+
".h": "c",
|
|
22
|
+
".hpp": "cpp",
|
|
23
|
+
".hxx": "cpp",
|
|
24
|
+
".rs": "rust",
|
|
25
|
+
".go": "go",
|
|
26
|
+
".java": "java",
|
|
27
|
+
".rb": "ruby",
|
|
28
|
+
".html": "html",
|
|
29
|
+
".htm": "html",
|
|
30
|
+
".css": "css",
|
|
31
|
+
".json": "json",
|
|
32
|
+
".yaml": "yaml",
|
|
33
|
+
".yml": "yaml",
|
|
34
|
+
".toml": "toml",
|
|
35
|
+
".md": "markdown",
|
|
36
|
+
".sh": "bash",
|
|
37
|
+
".bash": "bash",
|
|
38
|
+
".sql": "sql",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
SKIP_DIRS: set[str] = {
|
|
42
|
+
".git",
|
|
43
|
+
"node_modules",
|
|
44
|
+
"__pycache__",
|
|
45
|
+
".venv",
|
|
46
|
+
"venv",
|
|
47
|
+
".env",
|
|
48
|
+
"dist",
|
|
49
|
+
"build",
|
|
50
|
+
".egg-info",
|
|
51
|
+
".mypy_cache",
|
|
52
|
+
".pytest_cache",
|
|
53
|
+
".tox",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class ScannedFile:
|
|
59
|
+
"""A source file discovered during directory scanning."""
|
|
60
|
+
|
|
61
|
+
filepath: str
|
|
62
|
+
language: str
|
|
63
|
+
extension: str
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _is_binary(filepath: str, sample_size: int = 8192) -> bool:
|
|
67
|
+
"""Check if a file is binary by looking for null bytes."""
|
|
68
|
+
try:
|
|
69
|
+
with open(filepath, "rb") as f:
|
|
70
|
+
chunk = f.read(sample_size)
|
|
71
|
+
return b"\x00" in chunk
|
|
72
|
+
except OSError:
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _load_gitignore(root: str) -> pathspec.PathSpec | None:
|
|
77
|
+
"""Load .gitignore patterns from the root directory."""
|
|
78
|
+
gitignore_path = os.path.join(root, ".gitignore")
|
|
79
|
+
if not os.path.isfile(gitignore_path):
|
|
80
|
+
return None
|
|
81
|
+
with open(gitignore_path, encoding="utf-8") as f:
|
|
82
|
+
return pathspec.PathSpec.from_lines("gitignore", f)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _should_skip_dir(dirname: str) -> bool:
|
|
86
|
+
"""Check if a directory name matches skip patterns."""
|
|
87
|
+
if dirname in SKIP_DIRS:
|
|
88
|
+
return True
|
|
89
|
+
if dirname.endswith(".egg-info"):
|
|
90
|
+
return True
|
|
91
|
+
return False
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def scan_directory(path: str | Path) -> list[ScannedFile]:
|
|
95
|
+
"""Walk a directory and return all recognized source files.
|
|
96
|
+
|
|
97
|
+
Skips hidden/build directories, binary files, and .gitignore'd paths.
|
|
98
|
+
Returns results sorted by filepath for deterministic output.
|
|
99
|
+
"""
|
|
100
|
+
root = str(Path(path).resolve())
|
|
101
|
+
gitignore = _load_gitignore(root)
|
|
102
|
+
results: list[ScannedFile] = []
|
|
103
|
+
|
|
104
|
+
for dirpath, dirnames, filenames in os.walk(root):
|
|
105
|
+
# Filter out directories we should skip (in-place prunes walk)
|
|
106
|
+
dirnames[:] = [
|
|
107
|
+
d for d in dirnames
|
|
108
|
+
if not _should_skip_dir(d) and not d.startswith(".")
|
|
109
|
+
]
|
|
110
|
+
dirnames.sort()
|
|
111
|
+
|
|
112
|
+
for filename in filenames:
|
|
113
|
+
filepath = os.path.join(dirpath, filename)
|
|
114
|
+
rel_path = os.path.relpath(filepath, root)
|
|
115
|
+
|
|
116
|
+
# Skip gitignore'd files
|
|
117
|
+
if gitignore and gitignore.match_file(rel_path):
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
ext = os.path.splitext(filename)[1].lower()
|
|
121
|
+
language = EXTENSION_MAP.get(ext)
|
|
122
|
+
if language is None:
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
if _is_binary(filepath):
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
results.append(ScannedFile(
|
|
129
|
+
filepath=rel_path,
|
|
130
|
+
language=language,
|
|
131
|
+
extension=ext,
|
|
132
|
+
))
|
|
133
|
+
|
|
134
|
+
results.sort(key=lambda f: f.filepath)
|
|
135
|
+
return results
|
codedocent/server.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""Localhost server for lazy (on-demand) AI analysis mode."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import signal
|
|
7
|
+
import socketserver
|
|
8
|
+
import threading
|
|
9
|
+
import time
|
|
10
|
+
import webbrowser
|
|
11
|
+
from http.server import BaseHTTPRequestHandler
|
|
12
|
+
|
|
13
|
+
from codedocent.parser import CodeNode
|
|
14
|
+
from codedocent.renderer import LANGUAGE_COLORS, DEFAULT_COLOR, NODE_ICONS
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
IDLE_TIMEOUT = 300 # 5 minutes
|
|
18
|
+
IDLE_CHECK_INTERVAL = 30 # seconds
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _node_to_dict(node: CodeNode, include_source: bool = False) -> dict:
|
|
22
|
+
"""Serialize a CodeNode to a JSON-safe dict.
|
|
23
|
+
|
|
24
|
+
Excludes ``source`` by default (too large for page load).
|
|
25
|
+
Recursively includes children.
|
|
26
|
+
"""
|
|
27
|
+
d: dict = {
|
|
28
|
+
"name": node.name,
|
|
29
|
+
"node_type": node.node_type,
|
|
30
|
+
"language": node.language,
|
|
31
|
+
"filepath": node.filepath,
|
|
32
|
+
"start_line": node.start_line,
|
|
33
|
+
"end_line": node.end_line,
|
|
34
|
+
"line_count": node.line_count,
|
|
35
|
+
"node_id": node.node_id,
|
|
36
|
+
"imports": node.imports,
|
|
37
|
+
"summary": node.summary,
|
|
38
|
+
"pseudocode": node.pseudocode,
|
|
39
|
+
"quality": node.quality,
|
|
40
|
+
"warnings": node.warnings,
|
|
41
|
+
"color": (
|
|
42
|
+
LANGUAGE_COLORS.get(node.language, DEFAULT_COLOR)
|
|
43
|
+
if node.language else DEFAULT_COLOR
|
|
44
|
+
),
|
|
45
|
+
"icon": NODE_ICONS.get(node.node_type, ""),
|
|
46
|
+
"children": [
|
|
47
|
+
_node_to_dict(c, include_source=include_source)
|
|
48
|
+
for c in node.children
|
|
49
|
+
],
|
|
50
|
+
}
|
|
51
|
+
if include_source:
|
|
52
|
+
d["source"] = node.source
|
|
53
|
+
return d
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _find_open_port(start: int = 8420) -> int:
|
|
57
|
+
"""Find an available port starting from *start*."""
|
|
58
|
+
import socket # pylint: disable=import-outside-toplevel
|
|
59
|
+
|
|
60
|
+
for port in range(start, start + 100):
|
|
61
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
62
|
+
try:
|
|
63
|
+
s.bind(("127.0.0.1", port))
|
|
64
|
+
return port
|
|
65
|
+
except OSError:
|
|
66
|
+
continue
|
|
67
|
+
raise RuntimeError("Could not find an open port")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def start_server( # pylint: disable=too-many-locals,too-many-statements
|
|
71
|
+
root: CodeNode,
|
|
72
|
+
node_lookup: dict[str, CodeNode],
|
|
73
|
+
model: str,
|
|
74
|
+
port: int | None = None,
|
|
75
|
+
open_browser: bool = True,
|
|
76
|
+
) -> None:
|
|
77
|
+
"""Start the interactive server.
|
|
78
|
+
|
|
79
|
+
Blocks until shutdown is triggered (via POST /shutdown, idle timeout,
|
|
80
|
+
or Ctrl-C).
|
|
81
|
+
"""
|
|
82
|
+
if port is None:
|
|
83
|
+
port = _find_open_port()
|
|
84
|
+
|
|
85
|
+
# Shared state
|
|
86
|
+
analyze_lock = threading.Lock()
|
|
87
|
+
last_request_time = [time.time()]
|
|
88
|
+
cache_dir = root.filepath or "."
|
|
89
|
+
|
|
90
|
+
# Pre-render HTML once
|
|
91
|
+
from codedocent.renderer import render_interactive # pylint: disable=import-outside-toplevel # noqa: E501
|
|
92
|
+
|
|
93
|
+
html_content = render_interactive(root)
|
|
94
|
+
|
|
95
|
+
class Handler(BaseHTTPRequestHandler):
|
|
96
|
+
"""HTTP request handler for codedocent server."""
|
|
97
|
+
|
|
98
|
+
def log_message(self, format, *args): # pylint: disable=redefined-builtin # noqa: A002,E501
|
|
99
|
+
pass # silence default logging
|
|
100
|
+
|
|
101
|
+
def _touch(self):
|
|
102
|
+
last_request_time[0] = time.time()
|
|
103
|
+
|
|
104
|
+
def do_GET(self): # pylint: disable=invalid-name
|
|
105
|
+
"""Handle GET requests."""
|
|
106
|
+
self._touch()
|
|
107
|
+
if self.path == "/":
|
|
108
|
+
self._serve_html()
|
|
109
|
+
elif self.path == "/api/tree":
|
|
110
|
+
self._serve_tree()
|
|
111
|
+
else:
|
|
112
|
+
self.send_error(404)
|
|
113
|
+
|
|
114
|
+
def do_POST(self): # pylint: disable=invalid-name
|
|
115
|
+
"""Handle POST requests."""
|
|
116
|
+
self._touch()
|
|
117
|
+
if self.path == "/shutdown":
|
|
118
|
+
self._handle_shutdown()
|
|
119
|
+
elif self.path.startswith("/api/analyze/"):
|
|
120
|
+
node_id = self.path[len("/api/analyze/"):]
|
|
121
|
+
self._handle_analyze(node_id)
|
|
122
|
+
elif self.path.startswith("/api/replace/"):
|
|
123
|
+
node_id = self.path[len("/api/replace/"):]
|
|
124
|
+
self._handle_replace(node_id)
|
|
125
|
+
else:
|
|
126
|
+
self.send_error(404)
|
|
127
|
+
|
|
128
|
+
def _serve_html(self):
|
|
129
|
+
data = html_content.encode("utf-8")
|
|
130
|
+
self.send_response(200)
|
|
131
|
+
self.send_header("Content-Type", "text/html; charset=utf-8")
|
|
132
|
+
self.send_header("Content-Length", str(len(data)))
|
|
133
|
+
self.end_headers()
|
|
134
|
+
self.wfile.write(data)
|
|
135
|
+
|
|
136
|
+
def _serve_tree(self):
|
|
137
|
+
tree_dict = _node_to_dict(root)
|
|
138
|
+
data = json.dumps(tree_dict).encode("utf-8")
|
|
139
|
+
self.send_response(200)
|
|
140
|
+
self.send_header("Content-Type", "application/json")
|
|
141
|
+
self.send_header("Content-Length", str(len(data)))
|
|
142
|
+
self.end_headers()
|
|
143
|
+
self.wfile.write(data)
|
|
144
|
+
|
|
145
|
+
def _handle_analyze(self, node_id: str):
|
|
146
|
+
if node_id not in node_lookup:
|
|
147
|
+
self.send_error(404, "Unknown node ID")
|
|
148
|
+
return
|
|
149
|
+
|
|
150
|
+
node = node_lookup[node_id]
|
|
151
|
+
|
|
152
|
+
# Return cached result if already analyzed
|
|
153
|
+
if node.summary is not None:
|
|
154
|
+
result = _node_to_dict(node, include_source=True)
|
|
155
|
+
data = json.dumps(result).encode("utf-8")
|
|
156
|
+
self.send_response(200)
|
|
157
|
+
self.send_header("Content-Type", "application/json")
|
|
158
|
+
self.send_header("Content-Length", str(len(data)))
|
|
159
|
+
self.end_headers()
|
|
160
|
+
self.wfile.write(data)
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
# Run analysis (thread-safe)
|
|
164
|
+
with analyze_lock:
|
|
165
|
+
# Double-check after acquiring lock
|
|
166
|
+
if node.summary is None:
|
|
167
|
+
from codedocent.analyzer import analyze_single_node # pylint: disable=import-outside-toplevel # noqa: E501
|
|
168
|
+
|
|
169
|
+
analyze_single_node(node, model, cache_dir)
|
|
170
|
+
|
|
171
|
+
result = _node_to_dict(node, include_source=True)
|
|
172
|
+
data = json.dumps(result).encode("utf-8")
|
|
173
|
+
self.send_response(200)
|
|
174
|
+
self.send_header("Content-Type", "application/json")
|
|
175
|
+
self.send_header("Content-Length", str(len(data)))
|
|
176
|
+
self.end_headers()
|
|
177
|
+
self.wfile.write(data)
|
|
178
|
+
|
|
179
|
+
def _handle_replace(self, node_id: str):
|
|
180
|
+
if node_id not in node_lookup:
|
|
181
|
+
self.send_error(404, "Unknown node ID")
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
node = node_lookup[node_id]
|
|
185
|
+
|
|
186
|
+
if node.node_type in ("directory", "file"):
|
|
187
|
+
self._send_json(
|
|
188
|
+
400,
|
|
189
|
+
{"success": False,
|
|
190
|
+
"error": "Cannot replace directory/file blocks"},
|
|
191
|
+
)
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
content_length = int(self.headers["Content-Length"])
|
|
195
|
+
body = json.loads(self.rfile.read(content_length))
|
|
196
|
+
new_source = body.get("source", "")
|
|
197
|
+
|
|
198
|
+
if not isinstance(new_source, str):
|
|
199
|
+
self._send_json(
|
|
200
|
+
400,
|
|
201
|
+
{"success": False, "error": "source must be a string"},
|
|
202
|
+
)
|
|
203
|
+
return
|
|
204
|
+
|
|
205
|
+
# Resolve filepath
|
|
206
|
+
import os as _os # pylint: disable=import-outside-toplevel
|
|
207
|
+
|
|
208
|
+
filepath = node.filepath or ""
|
|
209
|
+
if _os.path.isabs(filepath):
|
|
210
|
+
abs_path = filepath
|
|
211
|
+
else:
|
|
212
|
+
abs_path = _os.path.join(cache_dir, filepath)
|
|
213
|
+
|
|
214
|
+
from codedocent.editor import replace_block_source # pylint: disable=import-outside-toplevel # noqa: E501
|
|
215
|
+
|
|
216
|
+
with analyze_lock:
|
|
217
|
+
result = replace_block_source(
|
|
218
|
+
abs_path, node.start_line, node.end_line, new_source,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
if result["success"]:
|
|
222
|
+
# Update in-memory node
|
|
223
|
+
new_line_count = result["lines_after"]
|
|
224
|
+
node.source = new_source
|
|
225
|
+
node.line_count = new_line_count
|
|
226
|
+
node.end_line = node.start_line + new_line_count - 1
|
|
227
|
+
|
|
228
|
+
# Clear cached analysis
|
|
229
|
+
node.summary = None
|
|
230
|
+
node.pseudocode = None
|
|
231
|
+
node.quality = None
|
|
232
|
+
node.warnings = None
|
|
233
|
+
|
|
234
|
+
# Invalidate AI cache entry
|
|
235
|
+
from codedocent.analyzer import ( # pylint: disable=import-outside-toplevel # noqa: E501
|
|
236
|
+
_cache_key, _load_cache, _save_cache, CACHE_FILENAME,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
cache_path = _os.path.join(cache_dir, CACHE_FILENAME)
|
|
240
|
+
cache = _load_cache(cache_path)
|
|
241
|
+
old_key = _cache_key(node)
|
|
242
|
+
cache.get("entries", {}).pop(old_key, None)
|
|
243
|
+
_save_cache(cache_path, cache)
|
|
244
|
+
|
|
245
|
+
self._send_json(200, result)
|
|
246
|
+
|
|
247
|
+
def _send_json(self, status_code: int, obj: dict):
|
|
248
|
+
data = json.dumps(obj).encode("utf-8")
|
|
249
|
+
self.send_response(status_code)
|
|
250
|
+
self.send_header("Content-Type", "application/json")
|
|
251
|
+
self.send_header("Content-Length", str(len(data)))
|
|
252
|
+
self.end_headers()
|
|
253
|
+
self.wfile.write(data)
|
|
254
|
+
|
|
255
|
+
def _handle_shutdown(self):
|
|
256
|
+
self.send_response(200)
|
|
257
|
+
self.send_header("Content-Type", "text/plain")
|
|
258
|
+
self.end_headers()
|
|
259
|
+
self.wfile.write(b"OK")
|
|
260
|
+
# Trigger shutdown in background thread
|
|
261
|
+
threading.Thread(target=server.shutdown, daemon=True).start()
|
|
262
|
+
|
|
263
|
+
server = socketserver.ThreadingTCPServer(("127.0.0.1", port), Handler)
|
|
264
|
+
server.daemon_threads = True
|
|
265
|
+
|
|
266
|
+
# Idle timeout watcher
|
|
267
|
+
def _idle_watcher():
|
|
268
|
+
while True:
|
|
269
|
+
time.sleep(IDLE_CHECK_INTERVAL)
|
|
270
|
+
elapsed = time.time() - last_request_time[0]
|
|
271
|
+
if elapsed >= IDLE_TIMEOUT:
|
|
272
|
+
print("\nIdle timeout reached, shutting down.", flush=True)
|
|
273
|
+
server.shutdown()
|
|
274
|
+
return
|
|
275
|
+
|
|
276
|
+
watcher = threading.Thread(target=_idle_watcher, daemon=True)
|
|
277
|
+
watcher.start()
|
|
278
|
+
|
|
279
|
+
# Signal handler for clean Ctrl-C (only works in main thread)
|
|
280
|
+
original_sigint = None
|
|
281
|
+
|
|
282
|
+
if threading.current_thread() is threading.main_thread():
|
|
283
|
+
original_sigint = signal.getsignal(signal.SIGINT)
|
|
284
|
+
|
|
285
|
+
def _sigint_handler(_signum, _frame):
|
|
286
|
+
print("\nShutting down...", flush=True)
|
|
287
|
+
threading.Thread(target=server.shutdown, daemon=True).start()
|
|
288
|
+
|
|
289
|
+
signal.signal(signal.SIGINT, _sigint_handler)
|
|
290
|
+
|
|
291
|
+
url = f"http://127.0.0.1:{port}"
|
|
292
|
+
print(f"codedocent server running at {url}", flush=True)
|
|
293
|
+
print("Press Ctrl-C to stop.", flush=True)
|
|
294
|
+
|
|
295
|
+
if open_browser:
|
|
296
|
+
webbrowser.open(url)
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
server.serve_forever()
|
|
300
|
+
finally:
|
|
301
|
+
if original_sigint is not None:
|
|
302
|
+
signal.signal(signal.SIGINT, original_sigint)
|
|
303
|
+
server.server_close()
|
|
304
|
+
print("Server stopped.", flush=True)
|