PyPI - codemap-python - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

codemap-python 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

analysis/call_graph/call_extractor.py +15 -16
analysis/core/ast_parser.py +7 -13
analysis/core/import_extractor.py +46 -46
analysis/explain/explain_runner.py +50 -47
analysis/explain/summary_generator.py +8 -5
analysis/runners/phase4_runner.py +41 -44
analysis/utils/bom_handler.py +78 -14
analysis/utils/cache_manager.py +26 -21
analysis/utils/progress_spinner.py +85 -0
analysis/utils/repo_walk.py +27 -0
cli.py → codemap_app.py +203 -132
codemap_cli.py +1 -1
{codemap_python-0.1.3.dist-info → codemap_python-0.1.5.dist-info}/METADATA +24 -12
{codemap_python-0.1.3.dist-info → codemap_python-0.1.5.dist-info}/RECORD +19 -17
{codemap_python-0.1.3.dist-info → codemap_python-0.1.5.dist-info}/top_level.txt +1 -1
ui/app.py +10 -6
ui/static/app.js +69 -10
{codemap_python-0.1.3.dist-info → codemap_python-0.1.5.dist-info}/WHEEL +0 -0
{codemap_python-0.1.3.dist-info → codemap_python-0.1.5.dist-info}/entry_points.txt +0 -0

analysis/call_graph/call_extractor.py CHANGED Viewed

@@ -1,7 +1,7 @@
-# AST Call detection
-import ast
-from analysis.utils.bom_handler import remove_bom
+# AST Call detection
+import ast
+from analysis.utils.bom_handler import read_source_file, parse_source_to_ast
 class FunctionCallVisitor(ast.NodeVisitor):
     def __init__(self, file_path):
@@ -82,15 +82,14 @@ class FunctionCallVisitor(ast.NodeVisitor):
         return None
-def extract_function_calls(file_path):
-    with open(file_path, "r", encoding="utf-8") as f:
-        source = f.read()
-    # Remove BOM if present
-    source = remove_bom(source)
-    tree = ast.parse(source)
-    visitor = FunctionCallVisitor(file_path)
-    visitor.visit(tree)
-    return visitor.calls
+def extract_function_calls(file_path):
+    source = read_source_file(file_path)
+    tree = parse_source_to_ast(source, file_path=file_path)
+    return extract_function_calls_from_tree(tree, file_path)
+def extract_function_calls_from_tree(tree, file_path):
+    visitor = FunctionCallVisitor(file_path)
+    visitor.visit(tree)
+    return visitor.calls

analysis/core/ast_parser.py CHANGED Viewed

@@ -1,13 +1,12 @@
-# AST Parser Module
-import ast
-from analysis.utils.bom_handler import remove_bom
+# AST Parser Module
+from analysis.utils.bom_handler import read_and_parse_python_file
-def parse_python_file(file_path):
-    """Parse a Python file, automatically handling UTF-8 BOM.
+def parse_python_file(file_path):
+    """Parse a Python file with automatic encoding and BOM handling.
     This function:
-    1. Reads the file with UTF-8 encoding
+    1. Reads the file with automatic encoding detection (UTF-8 → Latin-1)
     2. Removes any BOM characters automatically
     3. Parses the cleaned source code
@@ -20,12 +19,7 @@ def parse_python_file(file_path):
     Raises:
         SyntaxError: If source code has syntax errors
         FileNotFoundError: If file doesn't exist
+        ValueError: If file encoding cannot be determined
     """
-    with open(file_path, "r", encoding="utf-8") as f:
-        source = f.read()
-    # Remove BOM if present (handles files from Windows editors, etc.)
-    source = remove_bom(source)
-    return ast.parse(source)
+    return read_and_parse_python_file(file_path)

analysis/core/import_extractor.py CHANGED Viewed

@@ -1,49 +1,49 @@
 # Import Extractor Module
 # analysis/import_extractor.py
-import ast
-from analysis.utils.bom_handler import remove_bom
-def extract_imports(file_path):
-    """Extract imports from a Python file, handling UTF-8 BOM automatically."""
-    with open(file_path, "r", encoding="utf-8") as f:
-        source = f.read()
-    # Remove BOM if present
-    source = remove_bom(source)
-    tree = ast.parse(source)
-    imports = []
-    for node in ast.walk(tree):
-        # import module
-        if isinstance(node, ast.Import):
-            for alias in node.names:
-                imports.append({
-                    "type": "import",
-                    "module": alias.name,
-                    "name": None,
-                    "alias": alias.asname,
-                    "line": node.lineno,
-                    "file": file_path
-                })
-        # from module import name
-        elif isinstance(node, ast.ImportFrom):
-            module = node.module
-            level = node.level  # 0 = absolute, >0 = relative
-            for alias in node.names:
-                imports.append({
-                    "type": "from_import",
-                    "module": module,
-                    "name": alias.name,
-                    "alias": alias.asname,
-                    "level": level,
-                    "line": node.lineno,
-                    "file": file_path
-                })
-    return imports
+import ast
+from analysis.utils.bom_handler import read_source_file, parse_source_to_ast
+def extract_imports_from_tree(tree, file_path):
+    """Extract imports from an already-parsed AST tree."""
+    imports = []
+    for node in ast.walk(tree):
+        # import module
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                imports.append({
+                    "type": "import",
+                    "module": alias.name,
+                    "name": None,
+                    "alias": alias.asname,
+                    "line": node.lineno,
+                    "file": file_path
+                })
+        # from module import name
+        elif isinstance(node, ast.ImportFrom):
+            module = node.module
+            level = node.level  # 0 = absolute, >0 = relative
+            for alias in node.names:
+                imports.append({
+                    "type": "from_import",
+                    "module": module,
+                    "name": alias.name,
+                    "alias": alias.asname,
+                    "level": level,
+                    "line": node.lineno,
+                    "file": file_path
+                })
+    return imports
+def extract_imports(file_path):
+    """Extract imports from a Python file with automatic encoding and BOM handling."""
+    source = read_source_file(file_path)
+    tree = parse_source_to_ast(source, file_path=file_path)
+    return extract_imports_from_tree(tree, file_path)

analysis/explain/explain_runner.py CHANGED Viewed

@@ -5,35 +5,32 @@ from __future__ import annotations
 from typing import Optional, Dict, Any
-import ast
-import json
-import os
-from analysis.utils.bom_handler import remove_bom
-from analysis.indexing.symbol_index import SymbolIndex, SymbolInfo
-from analysis.graph.callgraph_index import CallGraphIndex, CallSite
-from analysis.explain.docstring_extractor import extract_docstrings
-from analysis.explain.signature_extractor import extract_signatures
-from analysis.explain.return_analyzer import analyze_returns
-from analysis.explain.summary_generator import generate_symbol_summary
-def collect_python_files(root_dir: str):
-    py_files = []
-    for root, _, files in os.walk(root_dir):
-        for file in files:
-            if file.endswith(".py") and not file.startswith("__"):
-                py_files.append(os.path.join(root, file))
-    return py_files
-def parse_ast(file_path: str) -> ast.AST:
-    """Parse a Python file, automatically handling UTF-8 BOM."""
-    with open(file_path, "r", encoding="utf-8") as f:
-        source = f.read()
-    # Remove BOM if present
-    source = remove_bom(source)
-    return ast.parse(source)
+import json
+import os
+from analysis.indexing.symbol_index import SymbolIndex, SymbolInfo
+from analysis.graph.callgraph_index import CallGraphIndex, CallSite
+from analysis.explain.docstring_extractor import extract_docstrings
+from analysis.explain.signature_extractor import extract_signatures
+from analysis.explain.return_analyzer import analyze_returns
+from analysis.explain.summary_generator import generate_symbol_summary
+from analysis.utils.repo_walk import filter_skipped_dirs
+def collect_python_files(root_dir: str):
+    py_files = []
+    for root, dirs, files in os.walk(root_dir):
+        dirs[:] = filter_skipped_dirs(dirs)
+        for file in files:
+            if file.endswith(".py") and not file.startswith("__"):
+                py_files.append(os.path.join(root, file))
+    return py_files
+def parse_ast(file_path: str):
+    """Parse a Python file with automatic encoding and BOM handling."""
+    from analysis.utils.bom_handler import read_and_parse_python_file
+    return read_and_parse_python_file(file_path)
 def file_to_module(file_path: str, repo_root: str) -> str:
@@ -83,7 +80,11 @@ def merge_maps(dst: dict, src: dict):
         dst[k].update(src.get(k, {}))
-def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None) -> Dict[str, Any]:
+def run(
+    repo_dir: Optional[str] = None,
+    output_dir: Optional[str] = None,
+    symbol_snapshot: Optional[list] = None,
+) -> Dict[str, Any]:
     """
     Callable explain pipeline (Phase-5/6), suitable for CLI/VS Code.
@@ -119,23 +120,25 @@ def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None) -> Dic
     # 2) Collect repo python files
     python_files = collect_python_files(repo_dir)
-    # 3) Build symbol index + extractors across repo
-    symbol_index = SymbolIndex()
-    repo_docstrings = {"module": None, "classes": {}, "functions": {}, "methods": {}}
-    repo_signatures = {"functions": {}, "methods": {}}
-    repo_returns = {"functions": {}, "methods": {}}
-    for file_path in python_files:
-        tree = parse_ast(file_path)
-        module_path = file_to_module(file_path, repo_dir)
-        # index symbols
-        symbol_index.index_file(tree, module_path, file_path)
-        # extract per-file and merge
-        merge_maps(repo_docstrings, extract_docstrings(tree))
+    # 3) Build symbol index + extractors across repo
+    symbol_index = SymbolIndex()
+    loaded_snapshot = False
+    if isinstance(symbol_snapshot, list) and symbol_snapshot:
+        symbol_index.load_snapshot(symbol_snapshot)
+        loaded_snapshot = True
+    repo_docstrings = {"module": None, "classes": {}, "functions": {}, "methods": {}}
+    repo_signatures = {"functions": {}, "methods": {}}
+    repo_returns = {"functions": {}, "methods": {}}
+    for file_path in python_files:
+        tree = parse_ast(file_path)
+        if not loaded_snapshot:
+            module_path = file_to_module(file_path, repo_dir)
+            symbol_index.index_file(tree, module_path, file_path)
+        # extract per-file and merge
+        merge_maps(repo_docstrings, extract_docstrings(tree))
         sigs = extract_signatures(tree)
         repo_signatures["functions"].update(sigs.get("functions", {}))

analysis/explain/summary_generator.py CHANGED Viewed

@@ -11,11 +11,14 @@ from analysis.indexing.symbol_index import SymbolInfo
 from analysis.graph.callgraph_index import CallGraphIndex
-def _first_line(text: Optional[str]) -> Optional[str]:
-    if not text:
-        return None
-    line = text.strip().splitlines()[0].strip()
-    return line or None
+def _first_line(text: Optional[str]) -> Optional[str]:
+    if not text:
+        return None
+    stripped = text.strip()
+    if not stripped:
+        return None
+    line = stripped.splitlines()[0].strip()
+    return line or None
 def _humanize_name(name: str) -> str:

analysis/runners/phase4_runner.py CHANGED Viewed

@@ -3,39 +3,34 @@ from __future__ import annotations
 from typing import Optional, Dict, Any, List
-import os
-import ast
-import json
-from analysis.indexing.symbol_index import SymbolIndex
-from analysis.indexing.import_resolver import ImportResolver
-from analysis.call_graph.cross_file_resolver import CrossFileResolver
-from analysis.call_graph.call_extractor import extract_function_calls
-from analysis.core.import_extractor import extract_imports
-from analysis.graph.callgraph_index import build_caller_fqn
-from analysis.utils.bom_handler import remove_bom
+import os
+import json
+from analysis.indexing.symbol_index import SymbolIndex
+from analysis.indexing.import_resolver import ImportResolver
+from analysis.call_graph.cross_file_resolver import CrossFileResolver
+from analysis.call_graph.call_extractor import extract_function_calls_from_tree
+from analysis.core.import_extractor import extract_imports_from_tree
+from analysis.graph.callgraph_index import build_caller_fqn
+from analysis.utils.repo_walk import filter_skipped_dirs
 PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
-def collect_python_files(root_dir: str) -> List[str]:
-    ignore_dirs = {".git", "__pycache__", ".codemap_cache", "node_modules", ".venv", "venv"}
-    py_files: List[str] = []
-    for root, dirs, files in os.walk(root_dir):
-        dirs[:] = [d for d in dirs if d not in ignore_dirs]
-        for file in files:
-            if file.endswith(".py") and not file.startswith("__"):
-                py_files.append(os.path.join(root, file))
+def collect_python_files(root_dir: str) -> List[str]:
+    py_files: List[str] = []
+    for root, dirs, files in os.walk(root_dir):
+        dirs[:] = filter_skipped_dirs(dirs)
+        for file in files:
+            if file.endswith(".py") and not file.startswith("__"):
+                py_files.append(os.path.join(root, file))
     return py_files
-def parse_ast(file_path: str):
-    """Parse a Python file, automatically handling UTF-8 BOM."""
-    with open(file_path, "r", encoding="utf-8") as f:
-        source = f.read()
-    # Remove BOM if present
-    source = remove_bom(source)
-    return ast.parse(source)
+def parse_ast(file_path: str):
+    """Parse a Python file, automatically handling encoding and UTF-8 BOM."""
+    from analysis.utils.bom_handler import read_and_parse_python_file
+    return read_and_parse_python_file(file_path)
 def file_to_module(file_path: str, repo_root: str) -> str:
@@ -80,25 +75,27 @@ def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None, force_
     os.makedirs(output_dir, exist_ok=True)
-    python_files = collect_python_files(repo_dir)
-    symbol_index = SymbolIndex()
-    file_module_map: Dict[str, str] = {}
-    for file_path in python_files:
-        module_path = file_to_module(file_path, repo_dir)
-        file_module_map[file_path] = module_path
-        tree = parse_ast(file_path)
-        symbol_index.index_file(tree, module_path, file_path)
-    import_resolver = ImportResolver(symbol_index)
-    for file_path in python_files:
-        module_path = file_module_map[file_path]
-        imports = extract_imports(file_path)
-        import_resolver.index_module_imports(module_path, imports)
-    all_calls = []
-    for file_path in python_files:
-        all_calls.extend(extract_function_calls(file_path))
+    python_files = collect_python_files(repo_dir)
+    symbol_index = SymbolIndex()
+    file_module_map: Dict[str, str] = {}
+    parsed_trees: Dict[str, Any] = {}
+    for file_path in python_files:
+        module_path = file_to_module(file_path, repo_dir)
+        file_module_map[file_path] = module_path
+        tree = parse_ast(file_path)
+        parsed_trees[file_path] = tree
+        symbol_index.index_file(tree, module_path, file_path)
+    import_resolver = ImportResolver(symbol_index)
+    for file_path in python_files:
+        module_path = file_module_map[file_path]
+        imports = extract_imports_from_tree(parsed_trees[file_path], file_path)
+        import_resolver.index_module_imports(module_path, imports)
+    all_calls = []
+    for file_path in python_files:
+        all_calls.extend(extract_function_calls_from_tree(parsed_trees[file_path], file_path))
     cross_resolver = CrossFileResolver(symbol_index, import_resolver)
     resolved_calls = []

analysis/utils/bom_handler.py CHANGED Viewed

@@ -1,13 +1,19 @@
-"""BOM (Byte Order Mark) handling utilities for CodeMap.
+"""BOM (Byte Order Mark), encoding, and AST parsing utilities for CodeMap.
-This module provides utilities to handle UTF-8 BOM characters that are
-sometimes added to Python files by certain editors (especially on Windows).
+This module provides utilities to handle:
+1. UTF-8 BOM (Byte Order Mark) characters added by certain editors
+2. Non-UTF-8 encoded files (e.g., Latin-1, Windows-1252)
-BOM (U+FEFF) is an invisible character that Python's AST parser cannot handle,
-causing: "invalid non-printable character U+FEFF"
+Issues handled:
+- BOM (U+FEFF): invisible character causing "invalid non-printable character U+FEFF"
+- Non-UTF-8: files with different encodings causing UnicodeDecodeError
-Solution: Strip BOM before parsing Python files.
-"""
+Solution: Detect encoding with fallback chain, strip BOM, and parse quietly.
+"""
+import ast
+import warnings
+from typing import Tuple
 def remove_bom(source: str) -> str:
@@ -35,10 +41,49 @@ def remove_bom(source: str) -> str:
     return source
-def read_source_file(file_path: str) -> str:
-    """Read a Python file and remove BOM if present.
+def detect_encoding(file_path: str) -> Tuple[str, bool]:
+    """Detect file encoding by trying multiple decodings.
-    This is a convenience function that combines file reading with BOM removal.
+    Tries encodings in this order:
+    1. UTF-8 (most common for Python files)
+    2. System default encoding
+    3. Latin-1 / ISO-8859-1 (accepts any byte sequence)
+    Args:
+        file_path: Path to file to detect encoding for
+    Returns:
+        Tuple of (encoding_name: str, is_fallback: bool)
+        is_fallback=True means file uses non-standard encoding
+    Raises:
+        FileNotFoundError: If file doesn't exist
+    """
+    import sys
+    encodings_to_try = [
+        ('utf-8', False),
+        (sys.getdefaultencoding(), False),
+        ('latin-1', True),  # Latin-1 accepts any byte sequence
+    ]
+    for encoding, is_fallback in encodings_to_try:
+        try:
+            with open(file_path, 'rb') as f:
+                f.read().decode(encoding)
+            return (encoding, is_fallback)
+        except (UnicodeDecodeError, LookupError):
+            continue
+    # Should never reach here since Latin-1 accepts all bytes
+    return ('latin-1', True)
+def read_source_file(file_path: str) -> str:
+    """Read a Python file with automatic encoding detection and BOM removal.
+    Handles files with different encodings gracefully by trying multiple
+    decodings in order of likelihood, then falling back to Latin-1.
     Args:
         file_path: Path to Python file to read
@@ -48,8 +93,27 @@ def read_source_file(file_path: str) -> str:
     Raises:
         FileNotFoundError: If file doesn't exist
-        UnicodeDecodeError: If file encoding is not UTF-8
     """
-    with open(file_path, "r", encoding="utf-8") as f:
-        source = f.read()
-    return remove_bom(source)
+    encoding, _is_fallback = detect_encoding(file_path)
+    with open(file_path, 'r', encoding=encoding, errors='replace') as f:
+        source = f.read()
+    return remove_bom(source)
+def parse_source_to_ast(source: str, file_path: str = "<unknown>") -> ast.AST:
+    """Parse source code while suppressing noisy invalid-escape warnings.
+    Some user repositories contain regular string literals like ``"\\S"`` or
+    ``"\\["``. Python can emit ``SyntaxWarning: invalid escape sequence`` while
+    parsing those files even though analysis can continue normally. For CodeMap,
+    these warnings are implementation noise, so we suppress them here.
+    """
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", category=SyntaxWarning)
+        return ast.parse(source, filename=file_path)
+def read_and_parse_python_file(file_path: str) -> ast.AST:
+    """Read a Python file with encoding/BOM handling and return its AST."""
+    source = read_source_file(file_path)
+    return parse_source_to_ast(source, file_path=file_path)

analysis/utils/cache_manager.py CHANGED Viewed

@@ -5,15 +5,15 @@ import json
 import os
 import shutil
 import tempfile
-from datetime import datetime, timezone
-from threading import RLock
-from typing import Any, Dict, List, Optional, Tuple
-from security_utils import redact_secrets
-_LOCK = RLock()
-_SENSITIVE_KEYS = ("api_key", "token", "authorization", "bearer", "basic", "secret", "password")
-_SKIP_DIRS = {".git", "__pycache__", ".codemap_cache", ".venv", "venv", "node_modules"}
+from datetime import datetime, timezone
+from threading import RLock
+from typing import Any, Dict, List, Optional, Tuple
+from analysis.utils.repo_walk import filter_skipped_dirs
+from security_utils import redact_secrets
+_LOCK = RLock()
+_SENSITIVE_KEYS = ("api_key", "token", "authorization", "bearer", "basic", "secret", "password")
 def _project_root() -> str:
@@ -195,14 +195,14 @@ def save_policy(policy: Dict[str, Any], base_dir: Optional[str] = None) -> Dict[
 def collect_fingerprints(repo_dir: str) -> Dict[str, Dict[str, int]]:
     repo_root = os.path.abspath(repo_dir)
-    out: Dict[str, Dict[str, int]] = {}
-    if not os.path.isdir(repo_root):
-        return out
-    for root, dirs, files in os.walk(repo_root):
-        dirs[:] = [d for d in dirs if d not in _SKIP_DIRS]
-        for name in files:
-            if not name.endswith(".py"):
-                continue
+    out: Dict[str, Dict[str, int]] = {}
+    if not os.path.isdir(repo_root):
+        return out
+    for root, dirs, files in os.walk(repo_root):
+        dirs[:] = filter_skipped_dirs(dirs)
+        for name in files:
+            if not name.endswith(".py"):
+                continue
             fp = os.path.join(root, name)
             try:
                 st = os.stat(fp)
@@ -250,16 +250,21 @@ def save_manifest(repo_dir: str, manifest: Dict[str, Any], base_dir: Optional[st
         _atomic_json_write(_manifest_path(repo_dir, base_dir), _scrub_payload(payload))
-def should_rebuild(repo_dir: str, analysis_version: str = "2.2", base_dir: Optional[str] = None) -> bool:
+def should_rebuild(
+    repo_dir: str,
+    analysis_version: str = "2.2",
+    base_dir: Optional[str] = None,
+    current_fingerprints: Optional[Dict[str, Any]] = None,
+) -> bool:
     manifest = load_manifest(repo_dir, base_dir=base_dir)
     if not manifest:
         return True
     if str(manifest.get("analysis_version", "") or "") != str(analysis_version or ""):
         return True
     previous = manifest.get("fingerprints", {}) if isinstance(manifest.get("fingerprints"), dict) else {}
-    current = collect_fingerprints(repo_dir)
-    delta = diff_fingerprints(previous, current)
-    return bool(delta.get("changed_count", 0))
+    current = current_fingerprints if isinstance(current_fingerprints, dict) else collect_fingerprints(repo_dir)
+    delta = diff_fingerprints(previous, current)
+    return bool(delta.get("changed_count", 0))
 def _default_metadata(repo_hash: str) -> Dict[str, Any]:

codemap-python 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

codemap-python 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl