codedocent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codedocent/cli.py ADDED
@@ -0,0 +1,132 @@
1
+ """CLI for codedocent: scan, parse, and render code visualizations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+
7
+ from codedocent.parser import CodeNode, parse_directory
8
+ from codedocent.scanner import scan_directory
9
+
10
+
11
+ def print_tree(node: CodeNode, indent: int = 0) -> None:
12
+ """Print a text representation of the code tree."""
13
+ prefix = " " * indent
14
+ label = node.node_type.upper()
15
+
16
+ if node.node_type == "directory":
17
+ print(f"{prefix}{label}: {node.name}/ ({node.line_count} lines)")
18
+ elif node.node_type == "file":
19
+ parts = [f"{label}: {node.name}"]
20
+ if node.language:
21
+ parts.append(f"[{node.language}]")
22
+ parts.append(f"({node.line_count} lines)")
23
+ if node.imports:
24
+ parts.append(f"imports: {', '.join(node.imports)}")
25
+ print(f"{prefix}{' '.join(parts)}")
26
+ else:
27
+ line_info = f"L{node.start_line}-{node.end_line}"
28
+ print(
29
+ f"{prefix}{label}: {node.name}"
30
+ f" ({line_info}, {node.line_count} lines)"
31
+ )
32
+
33
+ for child in node.children:
34
+ print_tree(child, indent + 1)
35
+
36
+
37
+ def main() -> None:
38
+ """Entry point for the codedocent CLI."""
39
+ parser = argparse.ArgumentParser(
40
+ prog="codedocent",
41
+ description="Code visualization for non-programmers",
42
+ )
43
+ parser.add_argument("path", help="Path to the directory to scan")
44
+ parser.add_argument(
45
+ "--text",
46
+ action="store_true",
47
+ help="Print text tree instead of generating HTML",
48
+ )
49
+ parser.add_argument(
50
+ "-o",
51
+ "--output",
52
+ default="codedocent_output.html",
53
+ help="HTML output file path (default: codedocent_output.html)",
54
+ )
55
+ parser.add_argument(
56
+ "--model",
57
+ default="qwen3:14b",
58
+ help="Ollama model for AI summaries (default: qwen3:14b)",
59
+ )
60
+ parser.add_argument(
61
+ "--no-ai",
62
+ action="store_true",
63
+ help="Skip AI analysis, render with placeholders",
64
+ )
65
+ parser.add_argument(
66
+ "--full",
67
+ action="store_true",
68
+ help=(
69
+ "Analyze everything upfront"
70
+ " (priority-batched), write static HTML"
71
+ ),
72
+ )
73
+ parser.add_argument(
74
+ "--port",
75
+ type=int,
76
+ default=None,
77
+ help=(
78
+ "Port for the interactive server"
79
+ " (default: auto-select from 8420)"
80
+ ),
81
+ )
82
+ parser.add_argument(
83
+ "--workers",
84
+ type=int,
85
+ default=1,
86
+ help="Number of parallel AI workers for --full mode (default: 1)",
87
+ )
88
+
89
+ args = parser.parse_args()
90
+
91
+ scanned = scan_directory(args.path)
92
+ tree = parse_directory(scanned, root=args.path)
93
+
94
+ if args.text:
95
+ # Text mode: quality score only, print tree
96
+ from codedocent.analyzer import analyze_no_ai # pylint: disable=import-outside-toplevel # noqa: E501
97
+
98
+ analyze_no_ai(tree)
99
+ print_tree(tree)
100
+ elif args.no_ai:
101
+ # No-AI mode: quality score only, static HTML
102
+ from codedocent.analyzer import analyze_no_ai # pylint: disable=import-outside-toplevel # noqa: E501
103
+ from codedocent.renderer import render # pylint: disable=import-outside-toplevel # noqa: E501
104
+
105
+ analyze_no_ai(tree)
106
+ render(tree, args.output)
107
+ print(f"HTML output written to {args.output}")
108
+ elif args.full:
109
+ # Full mode: upfront AI analysis, static HTML
110
+ from codedocent.analyzer import analyze # pylint: disable=import-outside-toplevel # noqa: E501
111
+ from codedocent.renderer import render # pylint: disable=import-outside-toplevel # noqa: E501
112
+
113
+ analyze(tree, model=args.model, workers=args.workers)
114
+ render(tree, args.output)
115
+ print(f"HTML output written to {args.output}")
116
+ else:
117
+ # Default lazy mode: interactive server
118
+ from codedocent.analyzer import analyze_no_ai, assign_node_ids # pylint: disable=import-outside-toplevel # noqa: E501
119
+ from codedocent.server import start_server # pylint: disable=import-outside-toplevel # noqa: E501
120
+
121
+ analyze_no_ai(tree)
122
+ node_lookup = assign_node_ids(tree)
123
+ start_server(
124
+ tree,
125
+ node_lookup,
126
+ model=args.model,
127
+ port=args.port,
128
+ )
129
+
130
+
131
+ if __name__ == "__main__":
132
+ main()
codedocent/editor.py ADDED
@@ -0,0 +1,85 @@
1
+ """Code replacement: write modified source back into a file."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import shutil
7
+
8
+
9
+ def replace_block_source(
10
+ filepath: str,
11
+ start_line: int,
12
+ end_line: int,
13
+ new_source: str,
14
+ ) -> dict:
15
+ """Replace lines *start_line* through *end_line* (1-indexed, inclusive).
16
+
17
+ Creates a ``.bak`` backup before writing. Returns a result dict with
18
+ ``success``, ``lines_before``, ``lines_after`` on success, or
19
+ ``success=False`` and ``error`` on failure.
20
+ """
21
+ # --- input validation ---
22
+ if not os.path.isfile(filepath):
23
+ return {"success": False, "error": f"File not found: {filepath}"}
24
+
25
+ if (
26
+ not isinstance(start_line, int)
27
+ or not isinstance(end_line, int)
28
+ or start_line < 1
29
+ or end_line < 1
30
+ or start_line > end_line
31
+ ):
32
+ return {
33
+ "success": False,
34
+ "error": (
35
+ f"Invalid line range: {start_line}-{end_line}"
36
+ ),
37
+ }
38
+
39
+ if not isinstance(new_source, str):
40
+ return {"success": False, "error": "new_source must be a string"}
41
+
42
+ try:
43
+ with open(filepath, encoding="utf-8") as f:
44
+ lines = f.readlines()
45
+
46
+ if end_line > len(lines):
47
+ return {
48
+ "success": False,
49
+ "error": (
50
+ f"end_line {end_line} exceeds file length"
51
+ f" ({len(lines)} lines)"
52
+ ),
53
+ }
54
+
55
+ old_count = end_line - start_line + 1
56
+
57
+ # Backup
58
+ shutil.copy2(filepath, filepath + ".bak")
59
+
60
+ # Build replacement lines
61
+ if new_source == "":
62
+ new_lines: list[str] = []
63
+ else:
64
+ new_lines = new_source.split("\n")
65
+ # Ensure every line ends with \n for consistency, except avoid
66
+ # adding an extra blank line when new_source already ends with \n.
67
+ if new_source.endswith("\n"):
68
+ new_lines = new_lines[:-1] # last split element is ''
69
+ new_lines = [ln + "\n" for ln in new_lines]
70
+
71
+ new_count = len(new_lines)
72
+
73
+ lines[start_line - 1:end_line] = new_lines
74
+
75
+ with open(filepath, "w", encoding="utf-8") as f:
76
+ f.writelines(lines)
77
+
78
+ return {
79
+ "success": True,
80
+ "lines_before": old_count,
81
+ "lines_after": new_count,
82
+ }
83
+
84
+ except OSError as exc:
85
+ return {"success": False, "error": str(exc)}
codedocent/parser.py ADDED
@@ -0,0 +1,369 @@
1
+ """Parse source files into a tree of CodeNodes using tree-sitter."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+
9
+ import tree_sitter_language_pack as tslp
10
+
11
+ from codedocent.scanner import ScannedFile
12
+
13
+
14
+ @dataclass
15
+ class CodeNode: # pylint: disable=too-many-instance-attributes
16
+ """Represents a node in the parsed code tree."""
17
+
18
+ name: str
19
+ node_type: str # 'directory' | 'file' | 'class' | 'function' | 'method'
20
+ language: str | None
21
+ filepath: str | None
22
+ start_line: int # 1-indexed
23
+ end_line: int # 1-indexed, inclusive
24
+ source: str # actual source code of this node
25
+ children: list[CodeNode] = field(default_factory=list)
26
+ imports: list[str] = field(default_factory=list)
27
+ line_count: int = 0
28
+ # Filled in by analyzer later:
29
+ summary: str | None = None
30
+ pseudocode: str | None = None
31
+ quality: str | None = None # 'clean' | 'complex' | 'warning'
32
+ warnings: list[str] | None = None
33
+ node_id: str | None = None
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Language-specific AST extraction rules
38
+ # ---------------------------------------------------------------------------
39
+
40
+ # Maps tree-sitter node types to our node_type values, and how to find the name
41
+ # key: (ts_node_type,) -> (our_node_type, name_child_type)
42
+ _PYTHON_RULES: dict[str, tuple[str, str]] = {
43
+ "function_definition": ("function", "identifier"),
44
+ "class_definition": ("class", "identifier"),
45
+ }
46
+
47
+ _JS_TS_RULES: dict[str, tuple[str, str]] = {
48
+ "function_declaration": ("function", "identifier"),
49
+ "class_declaration": ("class", "identifier"),
50
+ }
51
+
52
+ # Node types that contain the body / children of a class
53
+ _CLASS_BODY_TYPES: dict[str, str] = {
54
+ "python": "block",
55
+ "javascript": "class_body",
56
+ "typescript": "class_body",
57
+ "tsx": "class_body",
58
+ }
59
+
60
+ # Method definition node types inside class bodies
61
+ _METHOD_TYPES: dict[str, dict[str, str]] = {
62
+ "python": {"function_definition": "identifier"},
63
+ "javascript": {"method_definition": "property_identifier"},
64
+ "typescript": {"method_definition": "property_identifier"},
65
+ "tsx": {"method_definition": "property_identifier"},
66
+ }
67
+
68
+
69
+ def _rules_for(language: str) -> dict[str, tuple[str, str]]:
70
+ """Return AST extraction rules for the given language."""
71
+ if language == "python":
72
+ return _PYTHON_RULES
73
+ if language in ("javascript", "typescript", "tsx"):
74
+ return _JS_TS_RULES
75
+ return {}
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Import extraction
80
+ # ---------------------------------------------------------------------------
81
+
82
+ def _extract_imports_python(root_node) -> list[str]:
83
+ """Extract imported module names from a Python AST."""
84
+ imports: list[str] = []
85
+ for child in root_node.children:
86
+ if child.type == "import_statement":
87
+ for gc in child.children:
88
+ if gc.type == "dotted_name":
89
+ imports.append(gc.text.decode())
90
+ elif child.type == "import_from_statement":
91
+ for gc in child.children:
92
+ if gc.type == "dotted_name":
93
+ imports.append(gc.text.decode())
94
+ break # only the module name, not the imported symbols
95
+ return imports
96
+
97
+
98
+ def _extract_imports_js(root_node) -> list[str]:
99
+ """Extract imported module paths from a JS/TS AST."""
100
+ imports: list[str] = []
101
+ for child in root_node.children:
102
+ if child.type == "import_statement":
103
+ for gc in child.children:
104
+ if gc.type == "string":
105
+ # strip quotes
106
+ text = gc.text.decode().strip("'\"")
107
+ imports.append(text)
108
+ return imports
109
+
110
+
111
+ def _extract_imports(root_node, language: str) -> list[str]:
112
+ """Dispatch import extraction by language."""
113
+ if language == "python":
114
+ return _extract_imports_python(root_node)
115
+ if language in ("javascript", "typescript", "tsx"):
116
+ return _extract_imports_js(root_node)
117
+ return []
118
+
119
+
120
+ # ---------------------------------------------------------------------------
121
+ # Arrow-function extraction (JS/TS)
122
+ # ---------------------------------------------------------------------------
123
+
124
+ def _extract_arrow_functions(root_node, language: str) -> list[CodeNode]:
125
+ """Find top-level `const name = () => ...` declarations."""
126
+ if language not in ("javascript", "typescript", "tsx"):
127
+ return []
128
+ results: list[CodeNode] = []
129
+ for child in root_node.children:
130
+ if child.type != "lexical_declaration":
131
+ continue
132
+ for decl in child.children:
133
+ if decl.type != "variable_declarator":
134
+ continue
135
+ name_node = None
136
+ has_arrow = False
137
+ for part in decl.children:
138
+ if part.type == "identifier":
139
+ name_node = part
140
+ if part.type == "arrow_function":
141
+ has_arrow = True
142
+ if name_node and has_arrow:
143
+ results.append(CodeNode(
144
+ name=name_node.text.decode(),
145
+ node_type="function",
146
+ language=language,
147
+ filepath=None, # filled by caller
148
+ start_line=child.start_point[0] + 1,
149
+ end_line=child.end_point[0] + 1,
150
+ source=child.text.decode(),
151
+ line_count=child.end_point[0] - child.start_point[0] + 1,
152
+ ))
153
+ return results
154
+
155
+
156
+ # ---------------------------------------------------------------------------
157
+ # Name extraction helper
158
+ # ---------------------------------------------------------------------------
159
+
160
+ def _find_child_text(node, child_type: str) -> str:
161
+ """Find the first child of the given type and return its text."""
162
+ for child in node.children:
163
+ if child.type == child_type:
164
+ return child.text.decode()
165
+ return "<anonymous>"
166
+
167
+
168
+ # ---------------------------------------------------------------------------
169
+ # Method extraction from class body
170
+ # ---------------------------------------------------------------------------
171
+
172
+ def _extract_methods(class_node, language: str) -> list[CodeNode]:
173
+ """Extract method nodes from a class body."""
174
+ body_type = _CLASS_BODY_TYPES.get(language)
175
+ method_map = _METHOD_TYPES.get(language, {})
176
+ if not body_type or not method_map:
177
+ return []
178
+
179
+ body = None
180
+ for child in class_node.children:
181
+ if child.type == body_type:
182
+ body = child
183
+ break
184
+ if body is None:
185
+ return []
186
+
187
+ methods: list[CodeNode] = []
188
+ for child in body.children:
189
+ if child.type in method_map:
190
+ name_type = method_map[child.type]
191
+ methods.append(CodeNode(
192
+ name=_find_child_text(child, name_type),
193
+ node_type="method",
194
+ language=language,
195
+ filepath=None,
196
+ start_line=child.start_point[0] + 1,
197
+ end_line=child.end_point[0] + 1,
198
+ source=child.text.decode(),
199
+ line_count=child.end_point[0] - child.start_point[0] + 1,
200
+ ))
201
+ return methods
202
+
203
+
204
+ # ---------------------------------------------------------------------------
205
+ # Public API
206
+ # ---------------------------------------------------------------------------
207
+
208
+ def parse_file( # pylint: disable=too-many-locals
209
+ filepath: str, language: str, source: str | None = None,
210
+ ) -> CodeNode:
211
+ """Parse a single source file and return a file-level CodeNode.
212
+
213
+ If *source* is provided it is used directly; otherwise the file is read
214
+ from disk.
215
+ """
216
+ if source is None:
217
+ with open(filepath, encoding="utf-8") as f:
218
+ source = f.read()
219
+
220
+ source_bytes = source.encode()
221
+ lines = source.splitlines()
222
+ line_count = len(lines)
223
+
224
+ # Build the file-level node
225
+ file_node = CodeNode(
226
+ name=os.path.basename(filepath),
227
+ node_type="file",
228
+ language=language,
229
+ filepath=filepath,
230
+ start_line=1,
231
+ end_line=line_count,
232
+ source=source,
233
+ line_count=line_count,
234
+ )
235
+
236
+ # Languages we can parse with tree-sitter
237
+ parseable = _rules_for(language)
238
+ if not parseable:
239
+ return file_node
240
+
241
+ try:
242
+ parser = tslp.get_parser(language) # type: ignore[arg-type]
243
+ except (KeyError, ValueError):
244
+ return file_node
245
+
246
+ tree = parser.parse(source_bytes)
247
+ root = tree.root_node
248
+
249
+ # Extract imports
250
+ file_node.imports = _extract_imports(root, language)
251
+
252
+ # Walk top-level children for classes/functions
253
+ for child in root.children:
254
+ if child.type in parseable:
255
+ our_type, name_child = parseable[child.type]
256
+ node = CodeNode(
257
+ name=_find_child_text(child, name_child),
258
+ node_type=our_type,
259
+ language=language,
260
+ filepath=filepath,
261
+ start_line=child.start_point[0] + 1,
262
+ end_line=child.end_point[0] + 1,
263
+ source=(
264
+ child.text.decode() if child.text else ""
265
+ ),
266
+ line_count=child.end_point[0] - child.start_point[0] + 1,
267
+ )
268
+ # If it's a class, extract methods as children
269
+ if our_type == "class":
270
+ node.children = _extract_methods(child, language)
271
+ for m in node.children:
272
+ m.filepath = filepath
273
+ file_node.children.append(node)
274
+
275
+ # Arrow functions (JS/TS)
276
+ arrows = _extract_arrow_functions(root, language)
277
+ for a in arrows:
278
+ a.filepath = filepath
279
+ file_node.children.extend(arrows)
280
+
281
+ # Sort children by start_line
282
+ file_node.children.sort(key=lambda n: n.start_line)
283
+
284
+ return file_node
285
+
286
+
287
+ def parse_directory( # pylint: disable=too-many-locals
288
+ scanned_files: list[ScannedFile],
289
+ root: str | None = None,
290
+ ) -> CodeNode:
291
+ """Build a full tree with directory nodes from scanner output.
292
+
293
+ *root* is the base directory path. If not provided, it's inferred from
294
+ the common prefix of file paths.
295
+ """
296
+ if root is None:
297
+ root = "."
298
+
299
+ root_path = str(Path(root).resolve())
300
+ root_name = os.path.basename(root_path) or root_path
301
+
302
+ dir_node = CodeNode(
303
+ name=root_name,
304
+ node_type="directory",
305
+ language=None,
306
+ filepath=root_path,
307
+ start_line=0,
308
+ end_line=0,
309
+ source="",
310
+ line_count=0,
311
+ )
312
+
313
+ # Build a tree of directories, then attach file parse results
314
+ dir_nodes: dict[str, CodeNode] = {"": dir_node}
315
+
316
+ for sf in scanned_files:
317
+ # Ensure parent directory nodes exist
318
+ parts = Path(sf.filepath).parts
319
+ for i in range(len(parts) - 1):
320
+ dir_key = os.path.join(*parts[: i + 1])
321
+ if dir_key not in dir_nodes:
322
+ parent_key = os.path.join(*parts[:i]) if i > 0 else ""
323
+ d = CodeNode(
324
+ name=parts[i],
325
+ node_type="directory",
326
+ language=None,
327
+ filepath=os.path.join(root_path, dir_key),
328
+ start_line=0,
329
+ end_line=0,
330
+ source="",
331
+ line_count=0,
332
+ )
333
+ dir_nodes[parent_key].children.append(d)
334
+ dir_nodes[dir_key] = d
335
+
336
+ # Parse the file
337
+ abs_path = os.path.join(root_path, sf.filepath)
338
+ file_node = parse_file(abs_path, sf.language)
339
+ file_node.filepath = sf.filepath # store relative path
340
+
341
+ # Attach to parent directory
342
+ parent_key = os.path.join(*parts[:-1]) if len(parts) > 1 else ""
343
+ dir_nodes[parent_key].children.append(file_node)
344
+
345
+ # Sort all directory children: dirs first, then files, alphabetically
346
+ def _sort_children(node: CodeNode) -> None:
347
+ node.children.sort(
348
+ key=lambda n: (
349
+ 0 if n.node_type == "directory" else 1,
350
+ n.name,
351
+ )
352
+ )
353
+ for child in node.children:
354
+ if child.node_type == "directory":
355
+ _sort_children(child)
356
+
357
+ _sort_children(dir_node)
358
+
359
+ # Accumulate line counts up the tree
360
+ def _accumulate(node: CodeNode) -> int:
361
+ if node.node_type in ("directory",):
362
+ total = sum(_accumulate(c) for c in node.children)
363
+ node.line_count = total
364
+ return total
365
+ return node.line_count
366
+
367
+ _accumulate(dir_node)
368
+
369
+ return dir_node
codedocent/renderer.py ADDED
@@ -0,0 +1,79 @@
1
+ """Render a CodeNode tree as a self-contained HTML file."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ from pathlib import Path
8
+
9
+ from jinja2 import Environment, FileSystemLoader
10
+
11
+ from codedocent.parser import CodeNode
12
+
13
+ LANGUAGE_COLORS: dict[str, str] = {
14
+ "python": "#3572A5",
15
+ "javascript": "#F0DB4F",
16
+ "typescript": "#F0DB4F",
17
+ "tsx": "#F0DB4F",
18
+ "c": "#2E8B57",
19
+ "cpp": "#2E8B57",
20
+ "rust": "#DEA584",
21
+ "go": "#00ADD8",
22
+ "html": "#E34C26",
23
+ "css": "#563D7C",
24
+ "json": "#999999",
25
+ "yaml": "#999999",
26
+ "toml": "#999999",
27
+ }
28
+
29
+ DEFAULT_COLOR = "#CCCCCC"
30
+
31
+ NODE_ICONS: dict[str, str] = {
32
+ "directory": "\U0001f4c1",
33
+ "file": "\U0001f4c4",
34
+ "class": "\U0001f537",
35
+ "function": "\u26a1",
36
+ "method": "\u26a1",
37
+ }
38
+
39
+
40
+ def _get_color(node: CodeNode) -> str:
41
+ """Return the hex color for a node based on its language."""
42
+ if node.language is None:
43
+ return DEFAULT_COLOR
44
+ return LANGUAGE_COLORS.get(node.language, DEFAULT_COLOR)
45
+
46
+
47
+ def render(root: CodeNode, output_path: str) -> None:
48
+ """Render *root* as a self-contained HTML file at *output_path*."""
49
+ template_dir = Path(__file__).parent / "templates"
50
+ env = Environment(
51
+ loader=FileSystemLoader(str(template_dir)),
52
+ autoescape=True,
53
+ )
54
+ env.globals["get_color"] = _get_color
55
+ env.globals["NODE_ICONS"] = NODE_ICONS
56
+
57
+ template = env.get_template("base.html")
58
+ html = template.render(root=root)
59
+
60
+ os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
61
+ with open(output_path, "w", encoding="utf-8") as f:
62
+ f.write(html)
63
+
64
+
65
+ def render_interactive(root: CodeNode) -> str:
66
+ """Render *root* as interactive HTML string (served by localhost server).
67
+
68
+ Embeds the tree as JSON for client-side rendering.
69
+ """
70
+ from codedocent.server import _node_to_dict # pylint: disable=import-outside-toplevel,cyclic-import # noqa: E501
71
+
72
+ template_dir = Path(__file__).parent / "templates"
73
+ env = Environment(
74
+ loader=FileSystemLoader(str(template_dir)),
75
+ autoescape=False, # nosec B701 — we embed raw JSON
76
+ )
77
+ template = env.get_template("interactive.html")
78
+ tree_json = json.dumps(_node_to_dict(root))
79
+ return template.render(tree_json=tree_json)