codemap-python 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
 
3
3
  from collections import defaultdict
4
4
  from typing import Dict, Any, Optional, Set
5
+ from analysis.graph.entrypoint_detector import detect_entry_points
5
6
 
6
7
 
7
8
  def _kind_for_fqn(fqn: str, repo_prefix: str) -> str:
@@ -52,6 +53,7 @@ def _infer_repo_prefix(nodes: Set[str]) -> str:
52
53
  def compute_architecture_metrics(
53
54
  callgraph,
54
55
  symbol_index,
56
+ repo_dir: Optional[str] = None,
55
57
  repo_prefix: Optional[str] = None,
56
58
  top_k: int = 25,
57
59
  fanout_threshold: int = 10,
@@ -134,6 +136,8 @@ def compute_architecture_metrics(
134
136
  "edges": int(edges_per_file.get(fp, 0)),
135
137
  }
136
138
 
139
+ entry_points = detect_entry_points(repo_dir=repo_dir, repo_prefix=prefix) if repo_dir else []
140
+
137
141
  return {
138
142
  "ok": True,
139
143
  "repo_prefix": prefix,
@@ -142,6 +146,7 @@ def compute_architecture_metrics(
142
146
  "dead_symbols": sorted(dead_symbols),
143
147
  "orchestrators": sorted(orchestrators),
144
148
  "critical_symbols": sorted(critical),
149
+ "entry_points": entry_points,
145
150
  "top_fan_in": top_fan_in,
146
151
  "top_fan_out": top_fan_out,
147
152
  },
@@ -85,8 +85,11 @@ class FunctionCallVisitor(ast.NodeVisitor):
85
85
  def extract_function_calls(file_path):
86
86
  source = read_source_file(file_path)
87
87
  tree = parse_source_to_ast(source, file_path=file_path)
88
-
89
- visitor = FunctionCallVisitor(file_path)
90
- visitor.visit(tree)
91
-
92
- return visitor.calls
88
+ return extract_function_calls_from_tree(tree, file_path)
89
+
90
+
91
+ def extract_function_calls_from_tree(tree, file_path):
92
+ visitor = FunctionCallVisitor(file_path)
93
+ visitor.visit(tree)
94
+
95
+ return visitor.calls
@@ -3,43 +3,47 @@
3
3
 
4
4
  import ast
5
5
  from analysis.utils.bom_handler import read_source_file, parse_source_to_ast
6
-
7
-
6
+
7
+
8
+ def extract_imports_from_tree(tree, file_path):
9
+ """Extract imports from an already-parsed AST tree."""
10
+ imports = []
11
+
12
+ for node in ast.walk(tree):
13
+
14
+ # import module
15
+ if isinstance(node, ast.Import):
16
+ for alias in node.names:
17
+ imports.append({
18
+ "type": "import",
19
+ "module": alias.name,
20
+ "name": None,
21
+ "alias": alias.asname,
22
+ "line": node.lineno,
23
+ "file": file_path
24
+ })
25
+
26
+ # from module import name
27
+ elif isinstance(node, ast.ImportFrom):
28
+ module = node.module
29
+ level = node.level # 0 = absolute, >0 = relative
30
+
31
+ for alias in node.names:
32
+ imports.append({
33
+ "type": "from_import",
34
+ "module": module,
35
+ "name": alias.name,
36
+ "alias": alias.asname,
37
+ "level": level,
38
+ "line": node.lineno,
39
+ "file": file_path
40
+ })
41
+
42
+ return imports
43
+
44
+
8
45
  def extract_imports(file_path):
9
46
  """Extract imports from a Python file with automatic encoding and BOM handling."""
10
47
  source = read_source_file(file_path)
11
48
  tree = parse_source_to_ast(source, file_path=file_path)
12
-
13
- imports = []
14
-
15
- for node in ast.walk(tree):
16
-
17
- # import module
18
- if isinstance(node, ast.Import):
19
- for alias in node.names:
20
- imports.append({
21
- "type": "import",
22
- "module": alias.name,
23
- "name": None,
24
- "alias": alias.asname,
25
- "line": node.lineno,
26
- "file": file_path
27
- })
28
-
29
- # from module import name
30
- elif isinstance(node, ast.ImportFrom):
31
- module = node.module
32
- level = node.level # 0 = absolute, >0 = relative
33
-
34
- for alias in node.names:
35
- imports.append({
36
- "type": "from_import",
37
- "module": module,
38
- "name": alias.name,
39
- "alias": alias.asname,
40
- "level": level,
41
- "line": node.lineno,
42
- "file": file_path
43
- })
44
-
45
- return imports
49
+ return extract_imports_from_tree(tree, file_path)
@@ -7,22 +7,24 @@ from typing import Optional, Dict, Any
7
7
 
8
8
  import json
9
9
  import os
10
-
11
- from analysis.indexing.symbol_index import SymbolIndex, SymbolInfo
12
- from analysis.graph.callgraph_index import CallGraphIndex, CallSite
13
- from analysis.explain.docstring_extractor import extract_docstrings
14
- from analysis.explain.signature_extractor import extract_signatures
15
- from analysis.explain.return_analyzer import analyze_returns
16
- from analysis.explain.summary_generator import generate_symbol_summary
17
-
18
-
19
- def collect_python_files(root_dir: str):
20
- py_files = []
21
- for root, _, files in os.walk(root_dir):
22
- for file in files:
23
- if file.endswith(".py") and not file.startswith("__"):
24
- py_files.append(os.path.join(root, file))
25
- return py_files
10
+
11
+ from analysis.indexing.symbol_index import SymbolIndex, SymbolInfo
12
+ from analysis.graph.callgraph_index import CallGraphIndex, CallSite
13
+ from analysis.explain.docstring_extractor import extract_docstrings
14
+ from analysis.explain.signature_extractor import extract_signatures
15
+ from analysis.explain.return_analyzer import analyze_returns
16
+ from analysis.explain.summary_generator import generate_symbol_summary
17
+ from analysis.utils.repo_walk import filter_skipped_dirs
18
+
19
+
20
+ def collect_python_files(root_dir: str):
21
+ py_files = []
22
+ for root, dirs, files in os.walk(root_dir):
23
+ dirs[:] = filter_skipped_dirs(dirs)
24
+ for file in files:
25
+ if file.endswith(".py") and not file.startswith("__"):
26
+ py_files.append(os.path.join(root, file))
27
+ return py_files
26
28
 
27
29
 
28
30
  def parse_ast(file_path: str):
@@ -78,7 +80,11 @@ def merge_maps(dst: dict, src: dict):
78
80
  dst[k].update(src.get(k, {}))
79
81
 
80
82
 
81
- def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None) -> Dict[str, Any]:
83
+ def run(
84
+ repo_dir: Optional[str] = None,
85
+ output_dir: Optional[str] = None,
86
+ symbol_snapshot: Optional[list] = None,
87
+ ) -> Dict[str, Any]:
82
88
  """
83
89
  Callable explain pipeline (Phase-5/6), suitable for CLI/VS Code.
84
90
 
@@ -114,23 +120,25 @@ def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None) -> Dic
114
120
  # 2) Collect repo python files
115
121
  python_files = collect_python_files(repo_dir)
116
122
 
117
- # 3) Build symbol index + extractors across repo
118
- symbol_index = SymbolIndex()
119
-
120
- repo_docstrings = {"module": None, "classes": {}, "functions": {}, "methods": {}}
121
- repo_signatures = {"functions": {}, "methods": {}}
122
- repo_returns = {"functions": {}, "methods": {}}
123
-
124
- for file_path in python_files:
125
- tree = parse_ast(file_path)
126
- module_path = file_to_module(file_path, repo_dir)
127
-
128
-
129
- # index symbols
130
- symbol_index.index_file(tree, module_path, file_path)
131
-
132
- # extract per-file and merge
133
- merge_maps(repo_docstrings, extract_docstrings(tree))
123
+ # 3) Build symbol index + extractors across repo
124
+ symbol_index = SymbolIndex()
125
+ loaded_snapshot = False
126
+ if isinstance(symbol_snapshot, list) and symbol_snapshot:
127
+ symbol_index.load_snapshot(symbol_snapshot)
128
+ loaded_snapshot = True
129
+
130
+ repo_docstrings = {"module": None, "classes": {}, "functions": {}, "methods": {}}
131
+ repo_signatures = {"functions": {}, "methods": {}}
132
+ repo_returns = {"functions": {}, "methods": {}}
133
+
134
+ for file_path in python_files:
135
+ tree = parse_ast(file_path)
136
+ if not loaded_snapshot:
137
+ module_path = file_to_module(file_path, repo_dir)
138
+ symbol_index.index_file(tree, module_path, file_path)
139
+
140
+ # extract per-file and merge
141
+ merge_maps(repo_docstrings, extract_docstrings(tree))
134
142
 
135
143
  sigs = extract_signatures(tree)
136
144
  repo_signatures["functions"].update(sigs.get("functions", {}))
@@ -11,11 +11,14 @@ from analysis.indexing.symbol_index import SymbolInfo
11
11
  from analysis.graph.callgraph_index import CallGraphIndex
12
12
 
13
13
 
14
- def _first_line(text: Optional[str]) -> Optional[str]:
15
- if not text:
16
- return None
17
- line = text.strip().splitlines()[0].strip()
18
- return line or None
14
+ def _first_line(text: Optional[str]) -> Optional[str]:
15
+ if not text:
16
+ return None
17
+ stripped = text.strip()
18
+ if not stripped:
19
+ return None
20
+ line = stripped.splitlines()[0].strip()
21
+ return line or None
19
22
 
20
23
 
21
24
  def _humanize_name(name: str) -> str:
@@ -1 +1,217 @@
1
- # __main__, click/typer/argparse, etc.
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import os
5
+ from typing import Any, Dict, List, Optional, Set, Tuple
6
+
7
+ from analysis.utils.bom_handler import read_and_parse_python_file
8
+ from analysis.utils.repo_walk import filter_skipped_dirs
9
+
10
+
11
+ _ROUTE_DECORATORS = {
12
+ "get",
13
+ "post",
14
+ "put",
15
+ "delete",
16
+ "patch",
17
+ "options",
18
+ "head",
19
+ "websocket",
20
+ "route",
21
+ "api_route",
22
+ }
23
+
24
+ _CLI_DECORATOR_SUFFIXES = {
25
+ ".command",
26
+ ".group",
27
+ ".callback",
28
+ }
29
+
30
+
31
+ def _collect_python_files(repo_dir: str) -> List[str]:
32
+ py_files: List[str] = []
33
+ for root, dirs, files in os.walk(repo_dir):
34
+ dirs[:] = filter_skipped_dirs(dirs)
35
+ for file_name in files:
36
+ if not file_name.endswith(".py"):
37
+ continue
38
+ if file_name.startswith("__") and file_name != "__main__.py":
39
+ continue
40
+ py_files.append(os.path.join(root, file_name))
41
+ return sorted(py_files)
42
+
43
+
44
+ def _file_to_module(file_path: str, repo_root: str, repo_prefix: str) -> str:
45
+ rel = os.path.relpath(os.path.abspath(file_path), os.path.abspath(repo_root)).replace(os.sep, ".")
46
+ if rel.endswith(".py"):
47
+ rel = rel[:-3]
48
+ prefix = str(repo_prefix or os.path.basename(os.path.abspath(repo_root).rstrip("\\/"))).strip()
49
+ return f"{prefix}.{rel}" if prefix else rel
50
+
51
+
52
+ def _dotted_name(node: ast.AST) -> str:
53
+ if isinstance(node, ast.Call):
54
+ return _dotted_name(node.func)
55
+ if isinstance(node, ast.Name):
56
+ return node.id
57
+ if isinstance(node, ast.Attribute):
58
+ parent = _dotted_name(node.value)
59
+ return f"{parent}.{node.attr}" if parent else node.attr
60
+ return ""
61
+
62
+
63
+ def _str_constant(node: Optional[ast.AST]) -> str:
64
+ if isinstance(node, ast.Constant) and isinstance(node.value, str):
65
+ return node.value
66
+ return ""
67
+
68
+
69
+ def _main_guard(node: ast.AST) -> bool:
70
+ if not isinstance(node, ast.If):
71
+ return False
72
+ test = node.test
73
+ if not isinstance(test, ast.Compare) or len(test.ops) != 1 or len(test.comparators) != 1:
74
+ return False
75
+ left = test.left
76
+ right = test.comparators[0]
77
+ if not isinstance(test.ops[0], ast.Eq):
78
+ return False
79
+ return (
80
+ isinstance(left, ast.Name)
81
+ and left.id == "__name__"
82
+ and isinstance(right, ast.Constant)
83
+ and right.value == "__main__"
84
+ )
85
+
86
+
87
+ class _EntryPointVisitor(ast.NodeVisitor):
88
+ def __init__(self, module: str, file_path: str):
89
+ self.module = module
90
+ self.file_path = file_path
91
+ self.class_stack: List[str] = []
92
+ self.entries: List[Dict[str, Any]] = []
93
+ self._seen: Set[Tuple[str, str, int]] = set()
94
+
95
+ def _fqn_for(self, name: str) -> str:
96
+ if self.class_stack:
97
+ return f"{self.module}.{'.'.join(self.class_stack)}.{name}"
98
+ return f"{self.module}.{name}"
99
+
100
+ def _add_entry(
101
+ self,
102
+ *,
103
+ kind: str,
104
+ title: str,
105
+ reason: str,
106
+ line: int,
107
+ fqn: Optional[str] = None,
108
+ ) -> None:
109
+ key = (str(kind), str(fqn or title), int(line or 1))
110
+ if key in self._seen:
111
+ return
112
+ self._seen.add(key)
113
+ self.entries.append(
114
+ {
115
+ "kind": kind,
116
+ "title": title,
117
+ "reason": reason,
118
+ "fqn": fqn or "",
119
+ "file": self.file_path,
120
+ "line": int(line or 1),
121
+ }
122
+ )
123
+
124
+ def _record_function_entrypoints(self, node: ast.AST, name: str, decorators: List[ast.AST]) -> None:
125
+ fqn = self._fqn_for(name)
126
+ for decorator in decorators:
127
+ dotted = _dotted_name(decorator)
128
+ if not dotted:
129
+ continue
130
+ last = dotted.split(".")[-1].lower()
131
+ if last in _ROUTE_DECORATORS:
132
+ path = ""
133
+ if isinstance(decorator, ast.Call) and decorator.args:
134
+ path = _str_constant(decorator.args[0])
135
+ method_label = "Web route" if last in {"route", "api_route"} else last.upper()
136
+ title = f"{method_label} {path}".strip()
137
+ reason = "This function looks like a web request entry point."
138
+ self._add_entry(
139
+ kind="api_route",
140
+ title=title,
141
+ reason=reason,
142
+ line=getattr(node, "lineno", 1),
143
+ fqn=fqn,
144
+ )
145
+ if any(dotted.endswith(suffix) for suffix in _CLI_DECORATOR_SUFFIXES) or dotted.startswith("click.") or dotted.startswith("typer."):
146
+ self._add_entry(
147
+ kind="cli_command",
148
+ title=f"CLI command: {name}",
149
+ reason="This function looks like a command-line entry point.",
150
+ line=getattr(node, "lineno", 1),
151
+ fqn=fqn,
152
+ )
153
+
154
+ if not self.class_stack and name == "main":
155
+ self._add_entry(
156
+ kind="script_start",
157
+ title="Script start: main()",
158
+ reason="This is a common starting function for running the file directly.",
159
+ line=getattr(node, "lineno", 1),
160
+ fqn=fqn,
161
+ )
162
+
163
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
164
+ self.class_stack.append(node.name)
165
+ self.generic_visit(node)
166
+ self.class_stack.pop()
167
+
168
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
169
+ self._record_function_entrypoints(node, node.name, list(node.decorator_list or []))
170
+ self.generic_visit(node)
171
+
172
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
173
+ self._record_function_entrypoints(node, node.name, list(node.decorator_list or []))
174
+ self.generic_visit(node)
175
+
176
+ def visit_If(self, node: ast.If) -> None:
177
+ if _main_guard(node):
178
+ module_fqn = f"{self.module}.<module>"
179
+ self._add_entry(
180
+ kind="script_start",
181
+ title="Run this file directly",
182
+ reason="This file has a __main__ block, so it can start execution directly.",
183
+ line=getattr(node, "lineno", 1),
184
+ fqn=module_fqn,
185
+ )
186
+ for child in ast.walk(node):
187
+ if isinstance(child, ast.Call) and isinstance(child.func, ast.Name):
188
+ called_name = child.func.id
189
+ if called_name:
190
+ self._add_entry(
191
+ kind="script_start",
192
+ title=f"Script start: {called_name}()",
193
+ reason="This function is called from the file's __main__ block.",
194
+ line=getattr(child, "lineno", getattr(node, "lineno", 1)),
195
+ fqn=f"{self.module}.{called_name}",
196
+ )
197
+ self.generic_visit(node)
198
+
199
+
200
+ def detect_entry_points(repo_dir: str, repo_prefix: str = "") -> List[Dict[str, Any]]:
201
+ repo_root = os.path.abspath(repo_dir)
202
+ prefix = str(repo_prefix or os.path.basename(repo_root.rstrip("\\/"))).strip()
203
+ rows: List[Dict[str, Any]] = []
204
+
205
+ for file_path in _collect_python_files(repo_root):
206
+ try:
207
+ tree = read_and_parse_python_file(file_path)
208
+ except Exception:
209
+ continue
210
+ module = _file_to_module(file_path, repo_root, prefix)
211
+ visitor = _EntryPointVisitor(module=module, file_path=file_path)
212
+ visitor.visit(tree)
213
+ rows.extend(visitor.entries)
214
+
215
+ order = {"api_route": 0, "cli_command": 1, "script_start": 2}
216
+ rows.sort(key=lambda item: (order.get(str(item.get("kind", "")), 99), str(item.get("file", "")), int(item.get("line", 1))))
217
+ return rows[:50]
@@ -5,25 +5,25 @@ from typing import Optional, Dict, Any, List
5
5
 
6
6
  import os
7
7
  import json
8
- from analysis.indexing.symbol_index import SymbolIndex
9
- from analysis.indexing.import_resolver import ImportResolver
10
- from analysis.call_graph.cross_file_resolver import CrossFileResolver
11
- from analysis.call_graph.call_extractor import extract_function_calls
12
- from analysis.core.import_extractor import extract_imports
13
- from analysis.graph.callgraph_index import build_caller_fqn
8
+ from analysis.indexing.symbol_index import SymbolIndex
9
+ from analysis.indexing.import_resolver import ImportResolver
10
+ from analysis.call_graph.cross_file_resolver import CrossFileResolver
11
+ from analysis.call_graph.call_extractor import extract_function_calls_from_tree
12
+ from analysis.core.import_extractor import extract_imports_from_tree
13
+ from analysis.graph.callgraph_index import build_caller_fqn
14
+ from analysis.utils.repo_walk import filter_skipped_dirs
14
15
 
15
16
 
16
17
  PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
17
18
 
18
19
 
19
- def collect_python_files(root_dir: str) -> List[str]:
20
- ignore_dirs = {".git", "__pycache__", ".codemap_cache", "node_modules", ".venv", "venv"}
21
- py_files: List[str] = []
22
- for root, dirs, files in os.walk(root_dir):
23
- dirs[:] = [d for d in dirs if d not in ignore_dirs]
24
- for file in files:
25
- if file.endswith(".py") and not file.startswith("__"):
26
- py_files.append(os.path.join(root, file))
20
+ def collect_python_files(root_dir: str) -> List[str]:
21
+ py_files: List[str] = []
22
+ for root, dirs, files in os.walk(root_dir):
23
+ dirs[:] = filter_skipped_dirs(dirs)
24
+ for file in files:
25
+ if file.endswith(".py") and not file.startswith("__"):
26
+ py_files.append(os.path.join(root, file))
27
27
  return py_files
28
28
 
29
29
 
@@ -75,25 +75,27 @@ def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None, force_
75
75
 
76
76
  os.makedirs(output_dir, exist_ok=True)
77
77
 
78
- python_files = collect_python_files(repo_dir)
79
- symbol_index = SymbolIndex()
80
- file_module_map: Dict[str, str] = {}
81
-
82
- for file_path in python_files:
83
- module_path = file_to_module(file_path, repo_dir)
84
- file_module_map[file_path] = module_path
85
- tree = parse_ast(file_path)
86
- symbol_index.index_file(tree, module_path, file_path)
87
-
88
- import_resolver = ImportResolver(symbol_index)
89
- for file_path in python_files:
90
- module_path = file_module_map[file_path]
91
- imports = extract_imports(file_path)
92
- import_resolver.index_module_imports(module_path, imports)
93
-
94
- all_calls = []
95
- for file_path in python_files:
96
- all_calls.extend(extract_function_calls(file_path))
78
+ python_files = collect_python_files(repo_dir)
79
+ symbol_index = SymbolIndex()
80
+ file_module_map: Dict[str, str] = {}
81
+ parsed_trees: Dict[str, Any] = {}
82
+
83
+ for file_path in python_files:
84
+ module_path = file_to_module(file_path, repo_dir)
85
+ file_module_map[file_path] = module_path
86
+ tree = parse_ast(file_path)
87
+ parsed_trees[file_path] = tree
88
+ symbol_index.index_file(tree, module_path, file_path)
89
+
90
+ import_resolver = ImportResolver(symbol_index)
91
+ for file_path in python_files:
92
+ module_path = file_module_map[file_path]
93
+ imports = extract_imports_from_tree(parsed_trees[file_path], file_path)
94
+ import_resolver.index_module_imports(module_path, imports)
95
+
96
+ all_calls = []
97
+ for file_path in python_files:
98
+ all_calls.extend(extract_function_calls_from_tree(parsed_trees[file_path], file_path))
97
99
 
98
100
  cross_resolver = CrossFileResolver(symbol_index, import_resolver)
99
101
  resolved_calls = []
@@ -5,15 +5,15 @@ import json
5
5
  import os
6
6
  import shutil
7
7
  import tempfile
8
- from datetime import datetime, timezone
9
- from threading import RLock
10
- from typing import Any, Dict, List, Optional, Tuple
11
-
12
- from security_utils import redact_secrets
13
-
14
- _LOCK = RLock()
15
- _SENSITIVE_KEYS = ("api_key", "token", "authorization", "bearer", "basic", "secret", "password")
16
- _SKIP_DIRS = {".git", "__pycache__", ".codemap_cache", ".venv", "venv", "node_modules"}
8
+ from datetime import datetime, timezone
9
+ from threading import RLock
10
+ from typing import Any, Dict, List, Optional, Tuple
11
+
12
+ from analysis.utils.repo_walk import filter_skipped_dirs
13
+ from security_utils import redact_secrets
14
+
15
+ _LOCK = RLock()
16
+ _SENSITIVE_KEYS = ("api_key", "token", "authorization", "bearer", "basic", "secret", "password")
17
17
 
18
18
 
19
19
  def _project_root() -> str:
@@ -195,14 +195,14 @@ def save_policy(policy: Dict[str, Any], base_dir: Optional[str] = None) -> Dict[
195
195
 
196
196
  def collect_fingerprints(repo_dir: str) -> Dict[str, Dict[str, int]]:
197
197
  repo_root = os.path.abspath(repo_dir)
198
- out: Dict[str, Dict[str, int]] = {}
199
- if not os.path.isdir(repo_root):
200
- return out
201
- for root, dirs, files in os.walk(repo_root):
202
- dirs[:] = [d for d in dirs if d not in _SKIP_DIRS]
203
- for name in files:
204
- if not name.endswith(".py"):
205
- continue
198
+ out: Dict[str, Dict[str, int]] = {}
199
+ if not os.path.isdir(repo_root):
200
+ return out
201
+ for root, dirs, files in os.walk(repo_root):
202
+ dirs[:] = filter_skipped_dirs(dirs)
203
+ for name in files:
204
+ if not name.endswith(".py"):
205
+ continue
206
206
  fp = os.path.join(root, name)
207
207
  try:
208
208
  st = os.stat(fp)
@@ -250,16 +250,21 @@ def save_manifest(repo_dir: str, manifest: Dict[str, Any], base_dir: Optional[st
250
250
  _atomic_json_write(_manifest_path(repo_dir, base_dir), _scrub_payload(payload))
251
251
 
252
252
 
253
- def should_rebuild(repo_dir: str, analysis_version: str = "2.2", base_dir: Optional[str] = None) -> bool:
253
+ def should_rebuild(
254
+ repo_dir: str,
255
+ analysis_version: str = "2.2",
256
+ base_dir: Optional[str] = None,
257
+ current_fingerprints: Optional[Dict[str, Any]] = None,
258
+ ) -> bool:
254
259
  manifest = load_manifest(repo_dir, base_dir=base_dir)
255
260
  if not manifest:
256
261
  return True
257
262
  if str(manifest.get("analysis_version", "") or "") != str(analysis_version or ""):
258
263
  return True
259
264
  previous = manifest.get("fingerprints", {}) if isinstance(manifest.get("fingerprints"), dict) else {}
260
- current = collect_fingerprints(repo_dir)
261
- delta = diff_fingerprints(previous, current)
262
- return bool(delta.get("changed_count", 0))
265
+ current = current_fingerprints if isinstance(current_fingerprints, dict) else collect_fingerprints(repo_dir)
266
+ delta = diff_fingerprints(previous, current)
267
+ return bool(delta.get("changed_count", 0))
263
268
 
264
269
 
265
270
  def _default_metadata(repo_hash: str) -> Dict[str, Any]:
@@ -0,0 +1,27 @@
1
+ """Shared repository walking rules for source analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Iterable
6
+
7
+
8
+ SKIP_DIR_NAMES = {
9
+ ".git",
10
+ "__pycache__",
11
+ ".codemap_cache",
12
+ ".venv",
13
+ "venv",
14
+ "env",
15
+ "ENV",
16
+ ".env",
17
+ "node_modules",
18
+ "site-packages",
19
+ "dist-packages",
20
+ ".tox",
21
+ ".nox",
22
+ }
23
+
24
+
25
+ def filter_skipped_dirs(dir_names: Iterable[str]) -> list[str]:
26
+ """Return directory names that should still be traversed."""
27
+ return [name for name in dir_names if name not in SKIP_DIR_NAMES]