PyPI - codedocent - Versions diffs - 0.1.0__py3-none-any.whl - Mend

codedocent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

codedocent/__init__.py +1 -0
codedocent/__main__.py +4 -0
codedocent/analyzer.py +620 -0
codedocent/cli.py +132 -0
codedocent/editor.py +85 -0
codedocent/parser.py +369 -0
codedocent/renderer.py +79 -0
codedocent/scanner.py +135 -0
codedocent/server.py +304 -0
codedocent/templates/base.html +538 -0
codedocent/templates/interactive.html +1032 -0
codedocent-0.1.0.dist-info/METADATA +16 -0
codedocent-0.1.0.dist-info/RECORD +17 -0
codedocent-0.1.0.dist-info/WHEEL +5 -0
codedocent-0.1.0.dist-info/entry_points.txt +2 -0
codedocent-0.1.0.dist-info/licenses/LICENSE +21 -0
codedocent-0.1.0.dist-info/top_level.txt +1 -0

codedocent/cli.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""CLI for codedocent: scan, parse, and render code visualizations."""
+from __future__ import annotations
+import argparse
+from codedocent.parser import CodeNode, parse_directory
+from codedocent.scanner import scan_directory
+def print_tree(node: CodeNode, indent: int = 0) -> None:
+    """Print a text representation of the code tree."""
+    prefix = "  " * indent
+    label = node.node_type.upper()
+    if node.node_type == "directory":
+        print(f"{prefix}{label}: {node.name}/  ({node.line_count} lines)")
+    elif node.node_type == "file":
+        parts = [f"{label}: {node.name}"]
+        if node.language:
+            parts.append(f"[{node.language}]")
+        parts.append(f"({node.line_count} lines)")
+        if node.imports:
+            parts.append(f"imports: {', '.join(node.imports)}")
+        print(f"{prefix}{' '.join(parts)}")
+    else:
+        line_info = f"L{node.start_line}-{node.end_line}"
+        print(
+            f"{prefix}{label}: {node.name}"
+            f"  ({line_info}, {node.line_count} lines)"
+        )
+    for child in node.children:
+        print_tree(child, indent + 1)
+def main() -> None:
+    """Entry point for the codedocent CLI."""
+    parser = argparse.ArgumentParser(
+        prog="codedocent",
+        description="Code visualization for non-programmers",
+    )
+    parser.add_argument("path", help="Path to the directory to scan")
+    parser.add_argument(
+        "--text",
+        action="store_true",
+        help="Print text tree instead of generating HTML",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        default="codedocent_output.html",
+        help="HTML output file path (default: codedocent_output.html)",
+    )
+    parser.add_argument(
+        "--model",
+        default="qwen3:14b",
+        help="Ollama model for AI summaries (default: qwen3:14b)",
+    )
+    parser.add_argument(
+        "--no-ai",
+        action="store_true",
+        help="Skip AI analysis, render with placeholders",
+    )
+    parser.add_argument(
+        "--full",
+        action="store_true",
+        help=(
+            "Analyze everything upfront"
+            " (priority-batched), write static HTML"
+        ),
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=None,
+        help=(
+            "Port for the interactive server"
+            " (default: auto-select from 8420)"
+        ),
+    )
+    parser.add_argument(
+        "--workers",
+        type=int,
+        default=1,
+        help="Number of parallel AI workers for --full mode (default: 1)",
+    )
+    args = parser.parse_args()
+    scanned = scan_directory(args.path)
+    tree = parse_directory(scanned, root=args.path)
+    if args.text:
+        # Text mode: quality score only, print tree
+        from codedocent.analyzer import analyze_no_ai  # pylint: disable=import-outside-toplevel  # noqa: E501
+        analyze_no_ai(tree)
+        print_tree(tree)
+    elif args.no_ai:
+        # No-AI mode: quality score only, static HTML
+        from codedocent.analyzer import analyze_no_ai  # pylint: disable=import-outside-toplevel  # noqa: E501
+        from codedocent.renderer import render  # pylint: disable=import-outside-toplevel  # noqa: E501
+        analyze_no_ai(tree)
+        render(tree, args.output)
+        print(f"HTML output written to {args.output}")
+    elif args.full:
+        # Full mode: upfront AI analysis, static HTML
+        from codedocent.analyzer import analyze  # pylint: disable=import-outside-toplevel  # noqa: E501
+        from codedocent.renderer import render  # pylint: disable=import-outside-toplevel  # noqa: E501
+        analyze(tree, model=args.model, workers=args.workers)
+        render(tree, args.output)
+        print(f"HTML output written to {args.output}")
+    else:
+        # Default lazy mode: interactive server
+        from codedocent.analyzer import analyze_no_ai, assign_node_ids  # pylint: disable=import-outside-toplevel  # noqa: E501
+        from codedocent.server import start_server  # pylint: disable=import-outside-toplevel  # noqa: E501
+        analyze_no_ai(tree)
+        node_lookup = assign_node_ids(tree)
+        start_server(
+            tree,
+            node_lookup,
+            model=args.model,
+            port=args.port,
+        )
+if __name__ == "__main__":
+    main()

codedocent/editor.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""Code replacement: write modified source back into a file."""
+from __future__ import annotations
+import os
+import shutil
+def replace_block_source(
+    filepath: str,
+    start_line: int,
+    end_line: int,
+    new_source: str,
+) -> dict:
+    """Replace lines *start_line* through *end_line* (1-indexed, inclusive).
+    Creates a ``.bak`` backup before writing.  Returns a result dict with
+    ``success``, ``lines_before``, ``lines_after`` on success, or
+    ``success=False`` and ``error`` on failure.
+    """
+    # --- input validation ---
+    if not os.path.isfile(filepath):
+        return {"success": False, "error": f"File not found: {filepath}"}
+    if (
+        not isinstance(start_line, int)
+        or not isinstance(end_line, int)
+        or start_line < 1
+        or end_line < 1
+        or start_line > end_line
+    ):
+        return {
+            "success": False,
+            "error": (
+                f"Invalid line range: {start_line}-{end_line}"
+            ),
+        }
+    if not isinstance(new_source, str):
+        return {"success": False, "error": "new_source must be a string"}
+    try:
+        with open(filepath, encoding="utf-8") as f:
+            lines = f.readlines()
+        if end_line > len(lines):
+            return {
+                "success": False,
+                "error": (
+                    f"end_line {end_line} exceeds file length"
+                    f" ({len(lines)} lines)"
+                ),
+            }
+        old_count = end_line - start_line + 1
+        # Backup
+        shutil.copy2(filepath, filepath + ".bak")
+        # Build replacement lines
+        if new_source == "":
+            new_lines: list[str] = []
+        else:
+            new_lines = new_source.split("\n")
+            # Ensure every line ends with \n for consistency, except avoid
+            # adding an extra blank line when new_source already ends with \n.
+            if new_source.endswith("\n"):
+                new_lines = new_lines[:-1]  # last split element is ''
+            new_lines = [ln + "\n" for ln in new_lines]
+        new_count = len(new_lines)
+        lines[start_line - 1:end_line] = new_lines
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.writelines(lines)
+        return {
+            "success": True,
+            "lines_before": old_count,
+            "lines_after": new_count,
+        }
+    except OSError as exc:
+        return {"success": False, "error": str(exc)}

codedocent/parser.py ADDED Viewed

@@ -0,0 +1,369 @@
+"""Parse source files into a tree of CodeNodes using tree-sitter."""
+from __future__ import annotations
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+import tree_sitter_language_pack as tslp
+from codedocent.scanner import ScannedFile
+@dataclass
+class CodeNode:  # pylint: disable=too-many-instance-attributes
+    """Represents a node in the parsed code tree."""
+    name: str
+    node_type: str  # 'directory' | 'file' | 'class' | 'function' | 'method'
+    language: str | None
+    filepath: str | None
+    start_line: int  # 1-indexed
+    end_line: int  # 1-indexed, inclusive
+    source: str  # actual source code of this node
+    children: list[CodeNode] = field(default_factory=list)
+    imports: list[str] = field(default_factory=list)
+    line_count: int = 0
+    # Filled in by analyzer later:
+    summary: str | None = None
+    pseudocode: str | None = None
+    quality: str | None = None  # 'clean' | 'complex' | 'warning'
+    warnings: list[str] | None = None
+    node_id: str | None = None
+# ---------------------------------------------------------------------------
+# Language-specific AST extraction rules
+# ---------------------------------------------------------------------------
+# Maps tree-sitter node types to our node_type values, and how to find the name
+# key: (ts_node_type,) -> (our_node_type, name_child_type)
+_PYTHON_RULES: dict[str, tuple[str, str]] = {
+    "function_definition": ("function", "identifier"),
+    "class_definition": ("class", "identifier"),
+}
+_JS_TS_RULES: dict[str, tuple[str, str]] = {
+    "function_declaration": ("function", "identifier"),
+    "class_declaration": ("class", "identifier"),
+}
+# Node types that contain the body / children of a class
+_CLASS_BODY_TYPES: dict[str, str] = {
+    "python": "block",
+    "javascript": "class_body",
+    "typescript": "class_body",
+    "tsx": "class_body",
+}
+# Method definition node types inside class bodies
+_METHOD_TYPES: dict[str, dict[str, str]] = {
+    "python": {"function_definition": "identifier"},
+    "javascript": {"method_definition": "property_identifier"},
+    "typescript": {"method_definition": "property_identifier"},
+    "tsx": {"method_definition": "property_identifier"},
+}
+def _rules_for(language: str) -> dict[str, tuple[str, str]]:
+    """Return AST extraction rules for the given language."""
+    if language == "python":
+        return _PYTHON_RULES
+    if language in ("javascript", "typescript", "tsx"):
+        return _JS_TS_RULES
+    return {}
+# ---------------------------------------------------------------------------
+# Import extraction
+# ---------------------------------------------------------------------------
+def _extract_imports_python(root_node) -> list[str]:
+    """Extract imported module names from a Python AST."""
+    imports: list[str] = []
+    for child in root_node.children:
+        if child.type == "import_statement":
+            for gc in child.children:
+                if gc.type == "dotted_name":
+                    imports.append(gc.text.decode())
+        elif child.type == "import_from_statement":
+            for gc in child.children:
+                if gc.type == "dotted_name":
+                    imports.append(gc.text.decode())
+                    break  # only the module name, not the imported symbols
+    return imports
+def _extract_imports_js(root_node) -> list[str]:
+    """Extract imported module paths from a JS/TS AST."""
+    imports: list[str] = []
+    for child in root_node.children:
+        if child.type == "import_statement":
+            for gc in child.children:
+                if gc.type == "string":
+                    # strip quotes
+                    text = gc.text.decode().strip("'\"")
+                    imports.append(text)
+    return imports
+def _extract_imports(root_node, language: str) -> list[str]:
+    """Dispatch import extraction by language."""
+    if language == "python":
+        return _extract_imports_python(root_node)
+    if language in ("javascript", "typescript", "tsx"):
+        return _extract_imports_js(root_node)
+    return []
+# ---------------------------------------------------------------------------
+# Arrow-function extraction (JS/TS)
+# ---------------------------------------------------------------------------
+def _extract_arrow_functions(root_node, language: str) -> list[CodeNode]:
+    """Find top-level `const name = () => ...` declarations."""
+    if language not in ("javascript", "typescript", "tsx"):
+        return []
+    results: list[CodeNode] = []
+    for child in root_node.children:
+        if child.type != "lexical_declaration":
+            continue
+        for decl in child.children:
+            if decl.type != "variable_declarator":
+                continue
+            name_node = None
+            has_arrow = False
+            for part in decl.children:
+                if part.type == "identifier":
+                    name_node = part
+                if part.type == "arrow_function":
+                    has_arrow = True
+            if name_node and has_arrow:
+                results.append(CodeNode(
+                    name=name_node.text.decode(),
+                    node_type="function",
+                    language=language,
+                    filepath=None,  # filled by caller
+                    start_line=child.start_point[0] + 1,
+                    end_line=child.end_point[0] + 1,
+                    source=child.text.decode(),
+                    line_count=child.end_point[0] - child.start_point[0] + 1,
+                ))
+    return results
+# ---------------------------------------------------------------------------
+# Name extraction helper
+# ---------------------------------------------------------------------------
+def _find_child_text(node, child_type: str) -> str:
+    """Find the first child of the given type and return its text."""
+    for child in node.children:
+        if child.type == child_type:
+            return child.text.decode()
+    return "<anonymous>"
+# ---------------------------------------------------------------------------
+# Method extraction from class body
+# ---------------------------------------------------------------------------
+def _extract_methods(class_node, language: str) -> list[CodeNode]:
+    """Extract method nodes from a class body."""
+    body_type = _CLASS_BODY_TYPES.get(language)
+    method_map = _METHOD_TYPES.get(language, {})
+    if not body_type or not method_map:
+        return []
+    body = None
+    for child in class_node.children:
+        if child.type == body_type:
+            body = child
+            break
+    if body is None:
+        return []
+    methods: list[CodeNode] = []
+    for child in body.children:
+        if child.type in method_map:
+            name_type = method_map[child.type]
+            methods.append(CodeNode(
+                name=_find_child_text(child, name_type),
+                node_type="method",
+                language=language,
+                filepath=None,
+                start_line=child.start_point[0] + 1,
+                end_line=child.end_point[0] + 1,
+                source=child.text.decode(),
+                line_count=child.end_point[0] - child.start_point[0] + 1,
+            ))
+    return methods
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def parse_file(  # pylint: disable=too-many-locals
+    filepath: str, language: str, source: str | None = None,
+) -> CodeNode:
+    """Parse a single source file and return a file-level CodeNode.
+    If *source* is provided it is used directly; otherwise the file is read
+    from disk.
+    """
+    if source is None:
+        with open(filepath, encoding="utf-8") as f:
+            source = f.read()
+    source_bytes = source.encode()
+    lines = source.splitlines()
+    line_count = len(lines)
+    # Build the file-level node
+    file_node = CodeNode(
+        name=os.path.basename(filepath),
+        node_type="file",
+        language=language,
+        filepath=filepath,
+        start_line=1,
+        end_line=line_count,
+        source=source,
+        line_count=line_count,
+    )
+    # Languages we can parse with tree-sitter
+    parseable = _rules_for(language)
+    if not parseable:
+        return file_node
+    try:
+        parser = tslp.get_parser(language)  # type: ignore[arg-type]
+    except (KeyError, ValueError):
+        return file_node
+    tree = parser.parse(source_bytes)
+    root = tree.root_node
+    # Extract imports
+    file_node.imports = _extract_imports(root, language)
+    # Walk top-level children for classes/functions
+    for child in root.children:
+        if child.type in parseable:
+            our_type, name_child = parseable[child.type]
+            node = CodeNode(
+                name=_find_child_text(child, name_child),
+                node_type=our_type,
+                language=language,
+                filepath=filepath,
+                start_line=child.start_point[0] + 1,
+                end_line=child.end_point[0] + 1,
+                source=(
+                    child.text.decode() if child.text else ""
+                ),
+                line_count=child.end_point[0] - child.start_point[0] + 1,
+            )
+            # If it's a class, extract methods as children
+            if our_type == "class":
+                node.children = _extract_methods(child, language)
+                for m in node.children:
+                    m.filepath = filepath
+            file_node.children.append(node)
+    # Arrow functions (JS/TS)
+    arrows = _extract_arrow_functions(root, language)
+    for a in arrows:
+        a.filepath = filepath
+    file_node.children.extend(arrows)
+    # Sort children by start_line
+    file_node.children.sort(key=lambda n: n.start_line)
+    return file_node
+def parse_directory(  # pylint: disable=too-many-locals
+    scanned_files: list[ScannedFile],
+    root: str | None = None,
+) -> CodeNode:
+    """Build a full tree with directory nodes from scanner output.
+    *root* is the base directory path. If not provided, it's inferred from
+    the common prefix of file paths.
+    """
+    if root is None:
+        root = "."
+    root_path = str(Path(root).resolve())
+    root_name = os.path.basename(root_path) or root_path
+    dir_node = CodeNode(
+        name=root_name,
+        node_type="directory",
+        language=None,
+        filepath=root_path,
+        start_line=0,
+        end_line=0,
+        source="",
+        line_count=0,
+    )
+    # Build a tree of directories, then attach file parse results
+    dir_nodes: dict[str, CodeNode] = {"": dir_node}
+    for sf in scanned_files:
+        # Ensure parent directory nodes exist
+        parts = Path(sf.filepath).parts
+        for i in range(len(parts) - 1):
+            dir_key = os.path.join(*parts[: i + 1])
+            if dir_key not in dir_nodes:
+                parent_key = os.path.join(*parts[:i]) if i > 0 else ""
+                d = CodeNode(
+                    name=parts[i],
+                    node_type="directory",
+                    language=None,
+                    filepath=os.path.join(root_path, dir_key),
+                    start_line=0,
+                    end_line=0,
+                    source="",
+                    line_count=0,
+                )
+                dir_nodes[parent_key].children.append(d)
+                dir_nodes[dir_key] = d
+        # Parse the file
+        abs_path = os.path.join(root_path, sf.filepath)
+        file_node = parse_file(abs_path, sf.language)
+        file_node.filepath = sf.filepath  # store relative path
+        # Attach to parent directory
+        parent_key = os.path.join(*parts[:-1]) if len(parts) > 1 else ""
+        dir_nodes[parent_key].children.append(file_node)
+    # Sort all directory children: dirs first, then files, alphabetically
+    def _sort_children(node: CodeNode) -> None:
+        node.children.sort(
+            key=lambda n: (
+                0 if n.node_type == "directory" else 1,
+                n.name,
+            )
+        )
+        for child in node.children:
+            if child.node_type == "directory":
+                _sort_children(child)
+    _sort_children(dir_node)
+    # Accumulate line counts up the tree
+    def _accumulate(node: CodeNode) -> int:
+        if node.node_type in ("directory",):
+            total = sum(_accumulate(c) for c in node.children)
+            node.line_count = total
+            return total
+        return node.line_count
+    _accumulate(dir_node)
+    return dir_node

codedocent/renderer.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""Render a CodeNode tree as a self-contained HTML file."""
+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+from jinja2 import Environment, FileSystemLoader
+from codedocent.parser import CodeNode
+LANGUAGE_COLORS: dict[str, str] = {
+    "python": "#3572A5",
+    "javascript": "#F0DB4F",
+    "typescript": "#F0DB4F",
+    "tsx": "#F0DB4F",
+    "c": "#2E8B57",
+    "cpp": "#2E8B57",
+    "rust": "#DEA584",
+    "go": "#00ADD8",
+    "html": "#E34C26",
+    "css": "#563D7C",
+    "json": "#999999",
+    "yaml": "#999999",
+    "toml": "#999999",
+}
+DEFAULT_COLOR = "#CCCCCC"
+NODE_ICONS: dict[str, str] = {
+    "directory": "\U0001f4c1",
+    "file": "\U0001f4c4",
+    "class": "\U0001f537",
+    "function": "\u26a1",
+    "method": "\u26a1",
+}
+def _get_color(node: CodeNode) -> str:
+    """Return the hex color for a node based on its language."""
+    if node.language is None:
+        return DEFAULT_COLOR
+    return LANGUAGE_COLORS.get(node.language, DEFAULT_COLOR)
+def render(root: CodeNode, output_path: str) -> None:
+    """Render *root* as a self-contained HTML file at *output_path*."""
+    template_dir = Path(__file__).parent / "templates"
+    env = Environment(
+        loader=FileSystemLoader(str(template_dir)),
+        autoescape=True,
+    )
+    env.globals["get_color"] = _get_color
+    env.globals["NODE_ICONS"] = NODE_ICONS
+    template = env.get_template("base.html")
+    html = template.render(root=root)
+    os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(html)
+def render_interactive(root: CodeNode) -> str:
+    """Render *root* as interactive HTML string (served by localhost server).
+    Embeds the tree as JSON for client-side rendering.
+    """
+    from codedocent.server import _node_to_dict  # pylint: disable=import-outside-toplevel,cyclic-import  # noqa: E501
+    template_dir = Path(__file__).parent / "templates"
+    env = Environment(
+        loader=FileSystemLoader(str(template_dir)),
+        autoescape=False,  # nosec B701 — we embed raw JSON
+    )
+    template = env.get_template("interactive.html")
+    tree_json = json.dumps(_node_to_dict(root))
+    return template.render(tree_json=tree_json)