PyPI - codemarp - Versions diffs - 0.1.0__py3-none-any.whl - Mend

codemarp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

codemarp/__init__.py +1 -0
codemarp/analyzers/__init__.py +1 -0
codemarp/analyzers/high_level.py +77 -0
codemarp/analyzers/low_level.py +272 -0
codemarp/analyzers/mid_level.py +162 -0
codemarp/cli/__init__.py +1 -0
codemarp/cli/main.py +159 -0
codemarp/errors.py +22 -0
codemarp/exporters/__init__.py +1 -0
codemarp/exporters/json_exporter.py +9 -0
codemarp/exporters/mermaid.py +116 -0
codemarp/graph/__init__.py +1 -0
codemarp/graph/builder.py +39 -0
codemarp/graph/models.py +71 -0
codemarp/parser/__init__.py +1 -0
codemarp/parser/js_parser.py +3 -0
codemarp/parser/python_parser.py +282 -0
codemarp/pipeline/apply_view.py +40 -0
codemarp/pipeline/build_bundle.py +54 -0
codemarp/pipeline/export_all.py +80 -0
codemarp/views/module_view.py +16 -0
codemarp/views/subgraph.py +27 -0
codemarp/views/trace.py +96 -0
codemarp-0.1.0.dist-info/METADATA +358 -0
codemarp-0.1.0.dist-info/RECORD +28 -0
codemarp-0.1.0.dist-info/WHEEL +5 -0
codemarp-0.1.0.dist-info/entry_points.txt +2 -0
codemarp-0.1.0.dist-info/top_level.txt +1 -0

codemarp/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

codemarp/analyzers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

codemarp/analyzers/high_level.py ADDED Viewed

@@ -0,0 +1,77 @@
+from codemarp.graph.models import Edge, ModuleNode
+from codemarp.parser.python_parser import ParsedPythonModule
+def build_high_level_edges(
+    parsed_modules: list[ParsedPythonModule],
+    modules: list[ModuleNode],
+) -> tuple:
+    module_to_group = {module.id: aggregate_module_id(module.id) for module in modules}
+    group_ids = sorted(set(module_to_group.values()))
+    known_module_ids = set(module_to_group.keys())
+    edges: list[Edge] = []
+    for parsed in parsed_modules:
+        source_group = module_to_group.get(
+            parsed.module_id, aggregate_module_id(parsed.module_id)
+        )
+        for imported in parsed.imports:
+            target_module = _resolve_local_import(imported, known_module_ids)
+            if not target_module:
+                continue
+            target_group = module_to_group.get(
+                target_module, aggregate_module_id(target_module)
+            )
+            if source_group != target_group:
+                edges.append(
+                    Edge(
+                        source=source_group,
+                        target=target_group,
+                        kind="imports",
+                        label="imports",
+                    )
+                )
+    return group_ids, _dedupe_edges(edges)
+def aggregate_module_id(module_id: str) -> str:
+    """
+    Collapse deep module paths for the high-level graph.
+    - 3+ segments collapse to the first 2 segments:
+      codemarp.views.trace -> codemarp.views
+    - 1–2 segments stay as-is:
+      codemarp.errors -> codemarp.errors
+      codemarp.cli -> codemarp.cli
+    """
+    segments = module_id.split(".")
+    if len(segments) >= 3:
+        return ".".join(segments[:2])
+    return module_id
+def _resolve_local_import(import_name: str, known_module_ids: set[str]) -> str | None:
+    if import_name in known_module_ids:
+        return import_name
+    for module_id in sorted(known_module_ids, key=len, reverse=True):
+        if import_name.startswith(module_id + "."):
+            return module_id
+        if module_id.startswith(import_name + "."):
+            return module_id
+    return None
+def _dedupe_edges(edges: list[Edge]) -> list[Edge]:
+    seen = set()
+    out = []
+    for edge in edges:
+        key = (edge.source, edge.target, edge.kind)
+        if key not in seen:
+            seen.add(key)
+            out.append(edge)
+    return out

codemarp/analyzers/low_level.py ADDED Viewed

@@ -0,0 +1,272 @@
+import ast
+from dataclasses import dataclass
+from pathlib import Path
+from codemarp.graph.models import ControlFlowNode, Edge
+from codemarp.parser.python_parser import find_function_node
+@dataclass(slots=True)
+class LowLevelResult:
+    function_id: str
+    nodes: list[ControlFlowNode]
+    edges: list[Edge]
+def build_low_level_view(root: str | Path, focus: str) -> LowLevelResult:
+    function_id, function_node = find_function_node(Path(root), focus)
+    builder = ControlFlowBuilder()
+    nodes, edges = builder.build_for_function(function_node)
+    return LowLevelResult(function_id=function_id, nodes=nodes, edges=edges)
+class ControlFlowBuilder:
+    def __init__(self) -> None:
+        self.nodes: list[ControlFlowNode] = []
+        self.edges: list[Edge] = []
+        self._counter = 0
+    def build_for_function(
+        self,
+        function_node: ast.FunctionDef | ast.AsyncFunctionDef,
+    ) -> tuple[list[ControlFlowNode], list[Edge]]:
+        start = self._new_node("Start", "start", lineno=function_node.lineno)
+        exits = self._walk_statements(function_node.body, [start])
+        end = self._new_node(
+            "End",
+            "end",
+            lineno=getattr(function_node, "end_lineno", function_node.lineno),
+        )
+        for exit_node in exits:
+            self._add_edge(exit_node, end)
+        return self.nodes, self.edges
+    def _walk_statements(
+        self,
+        statements: list[ast.stmt],
+        incoming: list[str],
+    ) -> list[str]:
+        exits = incoming
+        for statement in statements:
+            exits = self._handle_statement(statement, exits)
+        return exits
+    def _handle_statement(
+        self, statement: ast.stmt, incoming: list[str], edge_label: str | None = None
+    ) -> list[str]:
+        if isinstance(statement, ast.If):
+            return self._handle_if(statement, incoming)
+        if isinstance(statement, (ast.For, ast.AsyncFor, ast.While)):
+            return self._handle_loop(statement, incoming)
+        if isinstance(statement, ast.Try):
+            node = self._new_node("Try/Except", "statement", lineno=statement.lineno)
+            for index, source in enumerate(incoming):
+                self._add_edge(source, node, label=edge_label if index == 0 else None)
+            return [node]
+        if isinstance(statement, ast.Return):
+            node = self._new_node("Return", "terminal", lineno=statement.lineno)
+            for index, source in enumerate(incoming):
+                self._add_edge(source, node, label=edge_label if index == 0 else None)
+            return [node]
+        if isinstance(statement, ast.Raise):
+            node = self._new_node("Raise", "terminal", lineno=statement.lineno)
+            for index, source in enumerate(incoming):
+                self._add_edge(source, node, label=edge_label if index == 0 else None)
+            return [node]
+        label = self._statement_label(statement)
+        node = self._new_node(label, "statement", lineno=statement.lineno)
+        for index, source in enumerate(incoming):
+            self._add_edge(source, node, label=edge_label if index == 0 else None)
+        return [node]
+    def _walk_branch_statements(
+        self,
+        statements: list[ast.stmt],
+        condition_node: str,
+        *,
+        branch_label: str,
+    ) -> list[str]:
+        # if not statements:
+        #     empty_branch = self._new_node(branch_label, "branch")
+        #     self._add_edge(condition_node, empty_branch, label=branch_label)
+        #     return [empty_branch]
+        if not statements:
+            return [condition_node]
+        first_exits = self._handle_statement(
+            statements[0], [condition_node], edge_label=branch_label
+        )
+        if len(statements) == 1:
+            return first_exits
+        return self._walk_statements(statements[1:], first_exits)
+    def _handle_if(self, statement: ast.If, incoming: list[str]) -> list[str]:
+        condition = self._new_node(
+            self._expr_label(statement.test),
+            "decision",
+            lineno=statement.lineno,
+        )
+        for source in incoming:
+            self._add_edge(source, condition)
+        # then_entry = self._new_node("Then", "branch", lineno=statement.lineno)
+        # else_entry = self._new_node("Else", "branch", lineno=statement.lineno)
+        # self._add_edge(condition, then_entry, label="True")
+        # self._add_edge(condition, else_entry, label="False")
+        then_exits = self._walk_branch_statements(
+            statement.body, condition, branch_label="True"
+        )
+        else_exits = self._walk_branch_statements(
+            statement.orelse, condition, branch_label="False"
+        )
+        then_empty = condition in then_exits
+        else_empty = condition in else_exits
+        non_terminals = [
+            node_id
+            for node_id in then_exits + else_exits
+            if node_id != condition and self._node_kind(node_id) != "terminal"
+        ]
+        if non_terminals or then_empty or else_empty:
+            merge = self._new_node("Merge", "merge", lineno=statement.lineno)
+            for source in non_terminals:
+                self._add_edge(source, merge)
+            if then_empty:
+                self._add_edge(condition, merge, label="True")
+            if else_empty:
+                self._add_edge(condition, merge, label="False")
+            return [merge]
+        return []
+    def _handle_loop(
+        self,
+        statement: ast.For | ast.AsyncFor | ast.While,
+        incoming: list[str],
+    ) -> list[str]:
+        loop_label = type(statement).__name__
+        loop = self._new_node(loop_label, "loop", lineno=statement.lineno)
+        for source in incoming:
+            self._add_edge(source, loop)
+        body_entry = self._new_node("Loop Body", "branch", lineno=statement.lineno)
+        self._add_edge(loop, body_entry, label="Iterate")
+        body_exits = (
+            self._walk_statements(statement.body, [body_entry])
+            if statement.body
+            else [body_entry]
+        )
+        for source in body_exits:
+            self._add_edge(source, loop, label="Next")
+        after_loop = self._new_node("After Loop", "merge", lineno=statement.lineno)
+        self._add_edge(loop, after_loop, label="Exit")
+        return [after_loop]
+    def _statement_label(self, statement: ast.stmt) -> str:
+        if isinstance(statement, ast.Assign):
+            if isinstance(statement.value, ast.Call):  # simplify calls
+                return self._call_label(statement.value)
+            return "Assign"
+        if isinstance(statement, ast.AnnAssign):
+            if isinstance(statement.value, ast.Call):
+                return self._call_label(statement.value)
+            return "AnnAssign"
+        if isinstance(statement, ast.AugAssign):
+            return "AugAssign"
+        if isinstance(statement, ast.Expr):
+            if isinstance(statement.value, ast.Call):
+                return self._call_label(statement.value)
+            return self._expr_label(statement.value)
+        if isinstance(statement, ast.Pass):
+            return "Pass"
+        try:
+            return ast.unparse(statement)
+        except Exception:
+            return type(statement).__name__
+    def _expr_label(self, expr: ast.AST) -> str:
+        if isinstance(expr, ast.Call):
+            return self._call_label(expr)
+        try:
+            return ast.unparse(expr)
+        except Exception:
+            return type(expr).__name__
+    def _call_label(self, call: ast.Call) -> str:
+        callee = self._callable_name(call.func)
+        return f"{callee}(...)"
+    def _callable_name(self, node: ast.AST) -> str:
+        if isinstance(node, ast.Name):
+            return node.id
+        if isinstance(node, ast.Attribute):
+            parts: list[str] = []
+            current: ast.AST = node
+            while isinstance(current, ast.Attribute):
+                parts.append(current.attr)
+                current = current.value
+            if isinstance(current, ast.Name):
+                parts.append(current.id)
+            return ".".join(reversed(parts))
+        try:
+            return ast.unparse(node)
+        except Exception:
+            return type(node).__name__
+    def _new_node(self, label: str, kind: str, *, lineno: int | None = None) -> str:
+        self._counter += 1
+        node_id = f"n{self._counter}"
+        self.nodes.append(
+            ControlFlowNode(
+                id=node_id,
+                label=label,
+                kind=kind,
+                lineno=lineno,
+            )
+        )
+        return node_id
+    def _add_edge(self, source: str, target: str, *, label: str | None = None) -> None:
+        self.edges.append(
+            Edge(
+                source=source,
+                target=target,
+                kind="control_flow",
+                label=label,
+            )
+        )
+    def _node_kind(self, node_id: str) -> str:
+        for node in self.nodes:
+            if node.id == node_id:
+                return node.kind
+        raise KeyError(f"Unknown control-flow node id: {node_id}")

codemarp/analyzers/mid_level.py ADDED Viewed

@@ -0,0 +1,162 @@
+from codemarp.graph.models import Edge, FunctionNode
+from codemarp.parser.python_parser import ParsedPythonModule
+def build_mid_level_edges(
+    parsed_modules: list[ParsedPythonModule], functions: list[FunctionNode]
+) -> list[Edge]:
+    edges = []
+    by_name = {}
+    by_id = {fn.id: fn for fn in functions}
+    by_module_and_name = {}
+    for fn in functions:
+        by_name.setdefault(fn.name, []).append(fn)
+        by_name.setdefault(fn.name.split(".")[-1], []).append(fn)
+        by_module_and_name[(fn.module_id, fn.name)] = fn
+        by_module_and_name[(fn.module_id, fn.name.split(".")[-1])] = fn
+    parsed_by_module = {parsed.module_id: parsed for parsed in parsed_modules}
+    for module in parsed_modules:
+        for caller_id, callee_name in module.calls:
+            target = _resolve_callee(
+                caller_module_id=module.module_id,
+                callee_name=callee_name,
+                parsed_by_module=parsed_by_module,
+                by_module_and_name=by_module_and_name,
+                by_name=by_name,
+                by_id=by_id,
+            )
+            if target:
+                edges.append(
+                    Edge(
+                        source=caller_id, target=target.id, kind="calls", label="calls"
+                    )
+                )
+    return _dedupe_edges(edges)
+def _resolve_callee(
+    caller_module_id: str,
+    callee_name: str,
+    parsed_by_module: dict,
+    by_module_and_name: dict,
+    by_name: dict,
+    by_id: dict,
+) -> FunctionNode | None:
+    parsed_module = parsed_by_module[caller_module_id]
+    same_module = _resolve_same_module_call(
+        caller_module_id=caller_module_id,
+        callee_name=callee_name,
+        by_module_and_name=by_module_and_name,
+    )
+    if same_module:
+        return same_module
+    imported_symbol = _resolve_imported_symbol_call(
+        parsed_module=parsed_module,
+        callee_name=callee_name,
+        by_id=by_id,
+        by_module_and_name=by_module_and_name,
+    )
+    if imported_symbol:
+        return imported_symbol
+    imported_module = _resolve_imported_module_call(
+        parsed_module=parsed_module,
+        callee_name=callee_name,
+        by_module_and_name=by_module_and_name,
+    )
+    if imported_module:
+        return imported_module
+    return _resolve_unique_global_call(callee_name=callee_name, by_name=by_name)
+def _resolve_same_module_call(
+    *,
+    caller_module_id: str,
+    callee_name: str,
+    by_module_and_name: dict[tuple[str, str], FunctionNode],
+) -> FunctionNode | None:
+    return by_module_and_name.get((caller_module_id, callee_name))
+def _resolve_imported_symbol_call(
+    parsed_module: ParsedPythonModule,
+    callee_name: str,
+    by_id: dict,
+    by_module_and_name: dict,
+) -> FunctionNode | None:
+    if "." in callee_name:
+        return None
+    for imported in parsed_module.imported_symbols:
+        visible_name = imported.alias or imported.name
+        if visible_name != callee_name:
+            continue
+        direct_id = f"{imported.module}:{imported.name}"
+        if direct_id in by_id:
+            return by_id[direct_id]
+        candidate = by_module_and_name.get((imported.module, imported.name))
+        if candidate:
+            return candidate
+    return None
+def _resolve_imported_module_call(
+    parsed_module: ParsedPythonModule,
+    callee_name: str,
+    by_module_and_name: dict,
+) -> FunctionNode | None:
+    if "." not in callee_name:
+        return None
+    prefix, member = callee_name.split(".", 1)
+    for imported in parsed_module.imported_modules:
+        visible_name = imported.alias or imported.module.split(".")[-1]
+        if visible_name != prefix:
+            continue
+        return by_module_and_name.get((imported.module, member))
+    return None
+def _resolve_unique_global_call(
+    *,
+    callee_name: str,
+    by_name: dict[str, list[FunctionNode]],
+) -> FunctionNode | None:
+    if _is_dotted_call(callee_name):
+        return None
+    matches = by_name.get(callee_name, [])
+    unique = list({fn.id: fn for fn in matches}.values())
+    if len(unique) == 1:
+        return unique[0]
+    return None
+def _is_dotted_call(callee_name: str) -> bool:
+    return "." in callee_name
+def _dedupe_edges(edges: list[Edge]) -> list[Edge]:
+    seen: set[tuple[str, str, str]] = set()
+    out: list[Edge] = []
+    for edge in edges:
+        key = (edge.source, edge.target, edge.kind)
+        if key not in seen:
+            seen.add(key)
+            out.append(edge)
+    return out

codemarp/cli/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

codemarp/cli/main.py ADDED Viewed

@@ -0,0 +1,159 @@
+import argparse
+from codemarp.analyzers.low_level import build_low_level_view
+from codemarp.errors import codemarpError
+from codemarp.pipeline.apply_view import ViewType, apply_view
+from codemarp.pipeline.build_bundle import build_bundle
+from codemarp.pipeline.export_all import export_all, export_low_level
+def analyze_command(
+    root: str,
+    out: str,
+    *,
+    view: ViewType,
+    focus: str | None = None,
+    module: str | None = None,
+    max_depth: int | None = None,
+) -> None:
+    build_result = build_bundle(root)
+    if view is ViewType.LOW:
+        assert focus is not None
+        low_view = build_low_level_view(root, focus)
+        export_low_level(build_result=build_result, low_view=low_view, out_dir=out)
+        print(f"Parsed {len(build_result.parsed_modules)} modules")
+        print(f"Discovered {len(build_result.bundle.functions)} functions")
+        print(f"View type: {view.value}")
+        print(f"Low-level view for {focus}")
+        print(f"Low-level view contains {len(low_view.nodes)} nodes")
+        print("Wrote graph.json")
+        print("Wrote high_level.mmd")
+        print("Wrote low_level.mmd")
+        print("Wrote low_level.json")
+        return
+    graph_view = apply_view(
+        build_result.bundle,
+        view=view,
+        focus=focus,
+        module=module,
+        max_depth=max_depth,
+    )
+    export_all(build_result=build_result, view=graph_view, out_dir=out)
+    print(f"Parsed {len(build_result.parsed_modules)} modules")
+    print(f"Discovered {len(build_result.bundle.functions)} functions")
+    print(f"View type: {view.value}")
+    if view is ViewType.TRACE:
+        print(f"Focused trace from {focus}")
+        print(f"Trace contains {len(graph_view.functions)} functions")
+    if view is ViewType.MODULE:
+        print(f"Module view for {module}")
+        print(f"Module view contains {len(graph_view.functions)} functions")
+    if view is ViewType.REVERSE:
+        print(f"Reverse trace from {focus}")
+        print(f"Reverse trace contains {len(graph_view.functions)} functions")
+    print("Wrote graph.json")
+    print("Wrote high_level.mmd")
+    print("Wrote mid_level.mmd")
+    print("Wrote mid_level.json")
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog="codemarp", description="3-level code mapper")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    analyze = subparsers.add_parser("analyze", help="Analyze a Python codebase")
+    analyze.add_argument("root", help="Path to the repository root")
+    analyze.add_argument("--out", default="./codemarp_out", help="Output directory")
+    analyze.add_argument(
+        "--view",
+        choices=[view.value for view in ViewType],
+        default=ViewType.FULL.value,
+        help="Graph view to export",
+    )
+    analyze.add_argument(
+        "--focus",
+        default=None,
+        help="Entrypoint for TRACE/REVERSE view (function id: module:function)",
+    )
+    analyze.add_argument(
+        "--module",
+        default=None,
+        help="Module id for MODULE view",
+    )
+    analyze.add_argument(
+        "--max-depth",
+        type=int,
+        default=None,
+        help="Maximum trace depth from the focused function",
+    )
+    return parser
+def _validate_analyze_args(
+    args: argparse.Namespace, parser: argparse.ArgumentParser
+) -> None:
+    view = ViewType(args.view)
+    if view is ViewType.FULL:
+        if args.focus is not None:
+            parser.error("--focus cannot be used with --view full")
+        if args.module is not None:
+            parser.error("--module cannot be used with --view full")
+        if args.max_depth is not None:
+            parser.error("--max-depth cannot be used with --view full")
+    if view is ViewType.TRACE:
+        if not args.focus:
+            parser.error("--focus is required with --view trace")
+        if args.module is not None:
+            parser.error("--module cannot be used with --view trace")
+    if view is ViewType.MODULE:
+        if not args.module:
+            parser.error("--module is required with --view module")
+        if args.focus is not None:
+            parser.error("--focus cannot be used with --view module")
+        if args.max_depth is not None:
+            parser.error("--max-depth cannot be used with --view module")
+    if view is ViewType.REVERSE:
+        if not args.focus:
+            parser.error("--focus is required with --view reverse")
+        if args.module is not None:
+            parser.error("--module cannot be used with --view reverse")
+    if view is ViewType.LOW:
+        if not args.focus:
+            parser.error("--focus is required with --view low")
+        if args.module is not None:
+            parser.error("--module cannot be used with --view low")
+        if args.max_depth is not None:
+            parser.error("--max-depth cannot be used with --view low")
+def main() -> None:
+    parser = build_parser()
+    args = parser.parse_args()
+    if args.command == "analyze":
+        _validate_analyze_args(args, parser)
+        try:
+            analyze_command(
+                args.root,
+                args.out,
+                view=ViewType(args.view),
+                focus=args.focus,
+                module=args.module,
+                max_depth=args.max_depth,
+            )
+        except codemarpError as exc:
+            raise SystemExit(str(exc)) from exc
+if __name__ == "__main__":
+    main()

codemarp/errors.py ADDED Viewed

@@ -0,0 +1,22 @@
+class codemarpError(Exception):
+    pass
+class ParseError(codemarpError):
+    pass
+class ResolutionError(codemarpError):
+    pass
+class TraceError(ResolutionError):
+    pass
+class ModuleViewError(ResolutionError):
+    pass
+class FocusFormatError(codemarpError):
+    pass

codemarp/exporters/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+