PyPI - codegraph-ai - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

codegraph-ai 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codegraph-ai
-Version: 0.2.0
+Version: 0.2.2
 Summary: Hybrid graph + vector code intelligence powered by NeuG and zvec
 Requires-Python: >=3.10
 Requires-Dist: neug

codegraph_ai-0.2.2/codegraph/adapters/java_adapter.py ADDED Viewed

@@ -0,0 +1,555 @@
+"""Java source code adapter using tree-sitter.
+Handles ``.java`` files.
+Extracts:
+  - Class, interface, and enum definitions with inheritance
+  - Method and constructor definitions with full generic signatures
+  - Method invocations with receiver context
+  - Import statements (single, wildcard, static)
+  - Annotations on classes and methods
+  - JavaDoc comments
+  - Inner classes (prefixed with outer class name)
+"""
+from __future__ import annotations
+from tree_sitter_language_pack import get_parser
+from codegraph.adapters.base import BaseAdapter
+from codegraph.models import (
+    CallInfo,
+    ParsedClass,
+    ParsedFunction,
+    ParsedImport,
+    ParseResult,
+)
+def _node_text(node) -> str:
+    """Return the UTF-8 text of a tree-sitter node."""
+    return node.text.decode("utf-8") if node and node.text else ""
+def _extract_javadoc(node) -> str:
+    """Extract a preceding JavaDoc block comment (``/** ... */``).
+    Walks backward through siblings to find a block_comment starting
+    with ``/**``.  Strips delimiters and leading ``*``s.
+    """
+    prev = node.prev_sibling
+    if prev is None or prev.type != "block_comment":
+        return ""
+    text = _node_text(prev)
+    if not text.startswith("/**"):
+        return ""
+    text = text[3:]
+    if text.endswith("*/"):
+        text = text[:-2]
+    lines = text.splitlines()
+    cleaned: list[str] = []
+    for line in lines:
+        line = line.strip()
+        if line.startswith("*"):
+            line = line[1:].strip()
+        if line.startswith("@"):
+            break
+        if line:
+            cleaned.append(line)
+    return " ".join(cleaned)
+def _extract_annotations(node) -> list[str]:
+    """Return annotation names from a node's ``modifiers`` child."""
+    annotations: list[str] = []
+    modifiers = None
+    for child in node.children:
+        if child.type == "modifiers":
+            modifiers = child
+            break
+    if modifiers is None:
+        return annotations
+    for child in modifiers.children:
+        if child.type in ("marker_annotation", "annotation"):
+            name_node = child.child_by_field_name("name")
+            if name_node:
+                annotations.append(_node_text(name_node))
+    return annotations
+def _extract_modifiers(node) -> list[str]:
+    """Return modifier keywords (public, static, etc.) from a node."""
+    mods: list[str] = []
+    for child in node.children:
+        if child.type == "modifiers":
+            for mc in child.children:
+                if mc.type in (
+                    "public", "private", "protected", "static",
+                    "final", "abstract", "synchronized", "native",
+                    "default", "transient", "volatile", "strictfp",
+                ):
+                    mods.append(_node_text(mc))
+    return mods
+def _build_method_signature(method_node) -> str:
+    """Build a human-readable signature from a method_declaration node.
+    Example output: ``public static <T> List<T> sort(List<T> items)``
+    """
+    parts: list[str] = []
+    mods = _extract_modifiers(method_node)
+    if mods:
+        parts.append(" ".join(mods))
+    tp = method_node.child_by_field_name("type_parameters")
+    if tp:
+        parts.append(_node_text(tp))
+    ret = method_node.child_by_field_name("type")
+    if ret:
+        parts.append(_node_text(ret))
+    name_node = method_node.child_by_field_name("name")
+    name = _node_text(name_node) if name_node else "?"
+    params = method_node.child_by_field_name("parameters")
+    params_text = _node_text(params) if params else "()"
+    parts.append(f"{name}{params_text}")
+    return " ".join(parts)
+def _build_constructor_signature(ctor_node) -> str:
+    """Build a human-readable signature from a constructor_declaration."""
+    parts: list[str] = []
+    mods = _extract_modifiers(ctor_node)
+    if mods:
+        parts.append(" ".join(mods))
+    name_node = ctor_node.child_by_field_name("name")
+    name = _node_text(name_node) if name_node else "?"
+    params = ctor_node.child_by_field_name("parameters")
+    params_text = _node_text(params) if params else "()"
+    parts.append(f"{name}{params_text}")
+    return " ".join(parts)
+def _collect_calls(node, calls: list[CallInfo]) -> None:
+    """Recursively collect method calls from a Java AST subtree."""
+    if node.type == "method_invocation":
+        name_node = node.child_by_field_name("name")
+        obj_node = node.child_by_field_name("object")
+        callee = _node_text(name_node) if name_node else ""
+        receiver = None
+        if obj_node:
+            receiver = _node_text(obj_node)
+            if "." in receiver:
+                receiver = receiver.rsplit(".", 1)[-1]
+        if callee:
+            calls.append(CallInfo(
+                callee_name=callee,
+                receiver=receiver,
+                raw_expression=_node_text(node).split("(")[0],
+            ))
+    elif node.type == "object_creation_expression":
+        type_node = node.child_by_field_name("type")
+        if type_node:
+            type_text = _node_text(type_node)
+            if "<" in type_text:
+                type_text = type_text[:type_text.index("<")]
+            calls.append(CallInfo(
+                callee_name=f"{type_text}.<init>",
+                receiver=None,
+                raw_expression=_node_text(node).split("(")[0],
+            ))
+    for child in node.children:
+        _collect_calls(child, calls)
+class JavaAdapter(BaseAdapter):
+    """Extract classes, methods, calls and imports from Java files."""
+    def __init__(self) -> None:
+        self._parser = get_parser("java")
+    def language_name(self) -> str:
+        return "java"
+    def supported_extensions(self) -> list[str]:
+        return [".java"]
+    def parse_file(self, source: bytes, file_path: str) -> ParseResult:
+        tree = self._parser.parse(source)
+        root = tree.root_node
+        functions: list[ParsedFunction] = []
+        classes: list[ParsedClass] = []
+        imports: list[ParsedImport] = []
+        package = ""
+        for child in root.children:
+            if child.type == "package_declaration":
+                package = self._extract_package(child)
+        self._walk(root, file_path, package, functions, classes, imports,
+                   outer_class=None)
+        return ParseResult(functions=functions, classes=classes, imports=imports)
+    # -- top-level walk -------------------------------------------------------
+    def _walk(
+        self,
+        node,
+        file_path: str,
+        package: str,
+        functions: list[ParsedFunction],
+        classes: list[ParsedClass],
+        imports: list[ParsedImport],
+        outer_class: str | None,
+    ) -> None:
+        for child in node.children:
+            if child.type == "class_declaration":
+                self._extract_class(
+                    child, file_path, package, functions, classes, imports,
+                    outer_class,
+                )
+            elif child.type == "interface_declaration":
+                self._extract_interface(
+                    child, file_path, package, functions, classes, imports,
+                    outer_class,
+                )
+            elif child.type == "enum_declaration":
+                self._extract_enum(
+                    child, file_path, package, functions, classes, imports,
+                    outer_class,
+                )
+            elif child.type == "import_declaration":
+                self._extract_import(child, file_path, imports)
+    # -- package --------------------------------------------------------------
+    @staticmethod
+    def _extract_package(node) -> str:
+        for child in node.children:
+            if child.type == "scoped_identifier" or child.type == "identifier":
+                return _node_text(child)
+        return ""
+    # -- classes / interfaces / enums -----------------------------------------
+    def _extract_class(
+        self,
+        node,
+        file_path: str,
+        package: str,
+        functions: list[ParsedFunction],
+        classes: list[ParsedClass],
+        imports: list[ParsedImport],
+        outer_class: str | None,
+    ) -> None:
+        name_node = node.child_by_field_name("name")
+        cls_name = _node_text(name_node) if name_node else "Unknown"
+        display_name = f"{outer_class}.{cls_name}" if outer_class else cls_name
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        qualified = f"{file_path}:{display_name}"
+        base_classes: list[str] = []
+        superclass_node = node.child_by_field_name("superclass")
+        if superclass_node:
+            for child in superclass_node.children:
+                if child.is_named:
+                    text = _node_text(child)
+                    if "<" in text:
+                        text = text[:text.index("<")]
+                    base_classes.append(text)
+        interfaces_node = node.child_by_field_name("interfaces")
+        if interfaces_node:
+            for child in interfaces_node.children:
+                if child.type == "type_list":
+                    for tc in child.children:
+                        if tc.is_named:
+                            text = _node_text(tc)
+                            if "<" in text:
+                                text = text[:text.index("<")]
+                            base_classes.append(text)
+        method_names: list[str] = []
+        body = node.child_by_field_name("body")
+        if body:
+            self._process_class_body(
+                body, file_path, package, display_name, functions, classes,
+                imports, method_names,
+            )
+        classes.append(
+            ParsedClass(
+                name=display_name,
+                qualified_name=qualified,
+                file_path=file_path,
+                start_line=start_line,
+                end_line=end_line,
+                method_names=method_names,
+                base_classes=base_classes,
+            )
+        )
+    def _extract_interface(
+        self,
+        node,
+        file_path: str,
+        package: str,
+        functions: list[ParsedFunction],
+        classes: list[ParsedClass],
+        imports: list[ParsedImport],
+        outer_class: str | None,
+    ) -> None:
+        name_node = node.child_by_field_name("name")
+        iface_name = _node_text(name_node) if name_node else "Unknown"
+        display_name = f"{outer_class}.{iface_name}" if outer_class else iface_name
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        qualified = f"{file_path}:{display_name}"
+        base_classes: list[str] = []
+        for child in node.children:
+            if child.type == "extends_interfaces":
+                for tc in child.children:
+                    if tc.type == "type_list":
+                        for item in tc.children:
+                            if item.is_named:
+                                text = _node_text(item)
+                                if "<" in text:
+                                    text = text[:text.index("<")]
+                                base_classes.append(text)
+        method_names: list[str] = []
+        body = node.child_by_field_name("body")
+        if body:
+            self._process_class_body(
+                body, file_path, package, display_name, functions, classes,
+                imports, method_names,
+            )
+        classes.append(
+            ParsedClass(
+                name=display_name,
+                qualified_name=qualified,
+                file_path=file_path,
+                start_line=start_line,
+                end_line=end_line,
+                method_names=method_names,
+                base_classes=base_classes,
+            )
+        )
+    def _extract_enum(
+        self,
+        node,
+        file_path: str,
+        package: str,
+        functions: list[ParsedFunction],
+        classes: list[ParsedClass],
+        imports: list[ParsedImport],
+        outer_class: str | None,
+    ) -> None:
+        name_node = node.child_by_field_name("name")
+        enum_name = _node_text(name_node) if name_node else "Unknown"
+        display_name = f"{outer_class}.{enum_name}" if outer_class else enum_name
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        qualified = f"{file_path}:{display_name}"
+        base_classes: list[str] = []
+        interfaces_node = node.child_by_field_name("interfaces")
+        if interfaces_node:
+            for child in interfaces_node.children:
+                if child.type == "type_list":
+                    for tc in child.children:
+                        if tc.is_named:
+                            text = _node_text(tc)
+                            if "<" in text:
+                                text = text[:text.index("<")]
+                            base_classes.append(text)
+        method_names: list[str] = []
+        body = node.child_by_field_name("body")
+        if body:
+            for child in body.children:
+                if child.type == "enum_body_declarations":
+                    self._process_class_body(
+                        child, file_path, package, display_name, functions,
+                        classes, imports, method_names,
+                    )
+        classes.append(
+            ParsedClass(
+                name=display_name,
+                qualified_name=qualified,
+                file_path=file_path,
+                start_line=start_line,
+                end_line=end_line,
+                method_names=method_names,
+                base_classes=base_classes,
+            )
+        )
+    def _process_class_body(
+        self,
+        body_node,
+        file_path: str,
+        package: str,
+        class_name: str,
+        functions: list[ParsedFunction],
+        classes: list[ParsedClass],
+        imports: list[ParsedImport],
+        method_names: list[str],
+    ) -> None:
+        """Process children of a class/interface/enum body."""
+        for child in body_node.children:
+            if child.type == "method_declaration":
+                name = self._extract_method(
+                    child, file_path, functions, class_name,
+                )
+                if name:
+                    method_names.append(name)
+            elif child.type == "constructor_declaration":
+                name = self._extract_constructor(
+                    child, file_path, functions, class_name,
+                )
+                if name:
+                    method_names.append(name)
+            elif child.type == "class_declaration":
+                self._extract_class(
+                    child, file_path, package, functions, classes, imports,
+                    outer_class=class_name,
+                )
+            elif child.type == "interface_declaration":
+                self._extract_interface(
+                    child, file_path, package, functions, classes, imports,
+                    outer_class=class_name,
+                )
+            elif child.type == "enum_declaration":
+                self._extract_enum(
+                    child, file_path, package, functions, classes, imports,
+                    outer_class=class_name,
+                )
+    # -- methods / constructors -----------------------------------------------
+    def _extract_method(
+        self,
+        node,
+        file_path: str,
+        functions: list[ParsedFunction],
+        class_name: str,
+    ) -> str | None:
+        name_node = node.child_by_field_name("name")
+        name = _node_text(name_node) if name_node else "unknown"
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        qualified = f"{file_path}:{class_name}.{name}"
+        sig = _build_method_signature(node)
+        doc = _extract_javadoc(node)
+        body = node.child_by_field_name("body")
+        calls: list[CallInfo] = []
+        if body:
+            _collect_calls(body, calls)
+        functions.append(
+            ParsedFunction(
+                name=name,
+                qualified_name=qualified,
+                signature=sig,
+                file_path=file_path,
+                start_line=start_line,
+                end_line=end_line,
+                doc_comment=doc,
+                call_names=[c.callee_name for c in calls],
+                calls=calls,
+                class_name=class_name,
+            )
+        )
+        return name
+    def _extract_constructor(
+        self,
+        node,
+        file_path: str,
+        functions: list[ParsedFunction],
+        class_name: str,
+    ) -> str | None:
+        name = "<init>"
+        start_line = node.start_point[0] + 1
+        end_line = node.end_point[0] + 1
+        qualified = f"{file_path}:{class_name}.<init>"
+        sig = _build_constructor_signature(node)
+        doc = _extract_javadoc(node)
+        body = node.child_by_field_name("body")
+        calls: list[CallInfo] = []
+        if body:
+            _collect_calls(body, calls)
+        functions.append(
+            ParsedFunction(
+                name=name,
+                qualified_name=qualified,
+                signature=sig,
+                file_path=file_path,
+                start_line=start_line,
+                end_line=end_line,
+                doc_comment=doc,
+                call_names=[c.callee_name for c in calls],
+                calls=calls,
+                class_name=class_name,
+            )
+        )
+        return name
+    # -- imports --------------------------------------------------------------
+    def _extract_import(
+        self,
+        node,
+        file_path: str,
+        imports: list[ParsedImport],
+    ) -> None:
+        is_static = False
+        is_wildcard = False
+        fqn_parts: list[str] = []
+        for child in node.children:
+            if child.type == "static":
+                is_static = True
+            elif child.type == "asterisk":
+                is_wildcard = True
+            elif child.type == "scoped_identifier" or child.type == "identifier":
+                fqn_parts.append(_node_text(child))
+        if not fqn_parts:
+            return
+        target_module = fqn_parts[0]
+        if is_wildcard:
+            imported_names = ["*"]
+        else:
+            simple_name = target_module.rsplit(".", 1)[-1] if "." in target_module else target_module
+            imported_names = [simple_name]
+        imports.append(
+            ParsedImport(
+                source_path=file_path,
+                target_module=target_module,
+                imported_names=imported_names,
+                is_relative=False,
+            )
+        )

codegraph-ai 0.2.0__tar.gz → 0.2.2__tar.gz

codegraph-ai 0.2.0tar.gz → 0.2.2tar.gz