coderecon 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. coderecon-0.1.0/PKG-INFO +14 -0
  2. coderecon-0.1.0/README.md +2 -0
  3. coderecon-0.1.0/analyzer/__init__.py +0 -0
  4. coderecon-0.1.0/analyzer/diff.py +44 -0
  5. coderecon-0.1.0/analyzer/discovery/__init__.py +0 -0
  6. coderecon-0.1.0/analyzer/discovery/files.py +46 -0
  7. coderecon-0.1.0/analyzer/discovery/languages.py +0 -0
  8. coderecon-0.1.0/analyzer/inference/__init__.py +0 -0
  9. coderecon-0.1.0/analyzer/inference/edge_cases.py +92 -0
  10. coderecon-0.1.0/analyzer/parsing/__init__.py +0 -0
  11. coderecon-0.1.0/analyzer/parsing/calls.py +0 -0
  12. coderecon-0.1.0/analyzer/parsing/functions.py +56 -0
  13. coderecon-0.1.0/analyzer/scan.py +95 -0
  14. coderecon-0.1.0/analyzer/signals/__init__.py +0 -0
  15. coderecon-0.1.0/analyzer/signals/aggregate.py +35 -0
  16. coderecon-0.1.0/analyzer/signals/signals.py +75 -0
  17. coderecon-0.1.0/analyzer/testing/__init__.py +0 -0
  18. coderecon-0.1.0/analyzer/testing/tests.py +39 -0
  19. coderecon-0.1.0/cli.py +263 -0
  20. coderecon-0.1.0/coderecon.egg-info/PKG-INFO +14 -0
  21. coderecon-0.1.0/coderecon.egg-info/SOURCES.txt +46 -0
  22. coderecon-0.1.0/coderecon.egg-info/dependency_links.txt +1 -0
  23. coderecon-0.1.0/coderecon.egg-info/entry_points.txt +2 -0
  24. coderecon-0.1.0/coderecon.egg-info/requires.txt +5 -0
  25. coderecon-0.1.0/coderecon.egg-info/top_level.txt +6 -0
  26. coderecon-0.1.0/llm/client.py +64 -0
  27. coderecon-0.1.0/llm/explain.py +151 -0
  28. coderecon-0.1.0/llm/prompt.py +110 -0
  29. coderecon-0.1.0/llm/slice.py +31 -0
  30. coderecon-0.1.0/llm/suggest.py +67 -0
  31. coderecon-0.1.0/mcp/__init__.py +0 -0
  32. coderecon-0.1.0/mcp/tools.py +0 -0
  33. coderecon-0.1.0/pyproject.toml +29 -0
  34. coderecon-0.1.0/report/__init__.py +0 -0
  35. coderecon-0.1.0/report/doctor.py +31 -0
  36. coderecon-0.1.0/report/generate.py +0 -0
  37. coderecon-0.1.0/report/render/__init__.py +0 -0
  38. coderecon-0.1.0/report/render/cli.py +0 -0
  39. coderecon-0.1.0/report/render/json.py +0 -0
  40. coderecon-0.1.0/report/render/markdown.py +0 -0
  41. coderecon-0.1.0/report/summary.py +33 -0
  42. coderecon-0.1.0/report/topology.py +225 -0
  43. coderecon-0.1.0/report/writer.py +46 -0
  44. coderecon-0.1.0/schemas/__init__.py +0 -0
  45. coderecon-0.1.0/schemas/analysis.py +13 -0
  46. coderecon-0.1.0/schemas/report.py +0 -0
  47. coderecon-0.1.0/setup.cfg +4 -0
  48. coderecon-0.1.0/setup.py +18 -0
@@ -0,0 +1,14 @@
1
+ Metadata-Version: 2.4
2
+ Name: coderecon
3
+ Version: 0.1.0
4
+ Summary: Engineering-first repository reconnaissance and architectural auditing.
5
+ Requires-Python: >=3.10
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: mcp
8
+ Requires-Dist: fastmcp
9
+ Requires-Dist: click
10
+ Requires-Dist: ollama
11
+ Requires-Dist: requests
12
+
13
+ # coderecon
14
+ coderecon is a CLI tool that performs deterministic reconnaissance over a codebase to extract structural facts—files, functions, edge cases, test coverage, and issue signals—and then uses an LLM to reason over those findings to generate detailed, auditable reports.
@@ -0,0 +1,2 @@
1
+ # coderecon
2
+ coderecon is a CLI tool that performs deterministic reconnaissance over a codebase to extract structural facts—files, functions, edge cases, test coverage, and issue signals—and then uses an LLM to reason over those findings to generate detailed, auditable reports.
File without changes
@@ -0,0 +1,44 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+
5
+ def run_diff_logic():
6
+ # Use the same root where analysis.json lives
7
+ current_path = Path("analysis.json")
8
+ previous_path = Path("analysis_previous.json")
9
+
10
+ if not previous_path.exists():
11
+ return "[coderecon] Diff Refused: No previous analysis found. Run 'scan' twice to compare changes."
12
+
13
+ with open(current_path, "r", encoding="utf-8") as f:
14
+ curr = json.load(f)
15
+ with open(previous_path, "r", encoding="utf-8") as f:
16
+ prev = json.load(f)
17
+
18
+ # Use Path + Type + Message as the unique signature for a signal
19
+ def get_sig(s):
20
+ return (s.get("path") or s.get("file"), s.get("type"), s.get("message"))
21
+
22
+ curr_set = {get_sig(s) for s in curr.get("signals", [])}
23
+ prev_set = {get_sig(s) for s in prev.get("signals", [])}
24
+
25
+ added = curr_set - prev_set
26
+ removed = prev_set - curr_set
27
+
28
+ # Formatting Output
29
+ out = ["# 🔄 RECON DELTA REPORT\n"]
30
+
31
+ if added:
32
+ out.append(f"## 🛑 New Hazards Detected (+{len(added)})")
33
+ for path, stype, msg in added:
34
+ out.append(f"- [{stype.upper()}] in {path}: {msg}")
35
+
36
+ if removed:
37
+ out.append(f"\n## ✅ Risks Resolved (-{len(removed)})")
38
+ for path, stype, msg in removed:
39
+ out.append(f"- Resolved: {stype} in {path}")
40
+
41
+ if not added and not removed:
42
+ out.append("No changes in signal density detected.")
43
+
44
+ return "\n".join(out)
File without changes
@@ -0,0 +1,46 @@
1
+ import os
2
+ from pathlib import Path
3
+
4
+ # Move these to a set for O(1) lookup speed
5
+ SUPPORTED_EXTENSIONS = {".py", ".js", ".ts", ".tsx", ".rs", ".go", ".java", ".cpp", ".c", ".h"}
6
+ EXCLUDE_DIRS = {".git", "node_modules", "__pycache__", ".venv", "dist", "build", ".coderecon", "venv", "lib"}
7
+
8
+
9
+ def _get_file_metadata(full_path, filename):
10
+ """Fast metadata assembly using pre-calculated values."""
11
+ return {
12
+ "path": full_path,
13
+ "name": filename,
14
+ "size": os.path.getsize(full_path) # Only hit the disk if necessary
15
+ }
16
+
17
+
18
+ def discover_files(root_path: str):
19
+ """
20
+ High-speed discovery using os.scandir to minimize system calls.
21
+ """
22
+ # 1. Handle Single File Input Fast
23
+ if os.path.isfile(root_path):
24
+ if os.path.splitext(root_path)[1].lower() in SUPPORTED_EXTENSIONS:
25
+ return [_get_file_metadata(root_path, os.path.basename(root_path))]
26
+ return []
27
+
28
+ files = []
29
+ stack = [root_path]
30
+
31
+ # 2. Manual Stack Walk (often faster than os.walk for deep trees)
32
+ while stack:
33
+ current_dir = stack.pop()
34
+ try:
35
+ with os.scandir(current_dir) as it:
36
+ for entry in it:
37
+ if entry.is_dir():
38
+ if entry.name not in EXCLUDE_DIRS:
39
+ stack.append(entry.path)
40
+ elif entry.is_file():
41
+ if os.path.splitext(entry.name)[1].lower() in SUPPORTED_EXTENSIONS:
42
+ files.append(_get_file_metadata(entry.path, entry.name))
43
+ except PermissionError:
44
+ continue
45
+
46
+ return files
File without changes
File without changes
@@ -0,0 +1,92 @@
1
+ import ast
2
+
3
+
4
+ def _emit(fn, inner, rule_id, case, reason, severity="low"):
5
+ return {
6
+ "rule_id": rule_id,
7
+ "function": fn["name"],
8
+ "file": fn["path"],
9
+ "case": case,
10
+ "reason": reason,
11
+ "severity": severity,
12
+ "line": getattr(inner, "lineno", None),
13
+ "node_type": type(inner).__name__,
14
+ }
15
+
16
+
17
+ class DepthVisitor(ast.NodeVisitor):
18
+ """Tracks nesting depth of control flow nodes."""
19
+
20
+ def __init__(self):
21
+ self.current_depth = 0
22
+ self.max_depth = 0
23
+ self.nesting_nodes = (ast.If, ast.For, ast.While, ast.Try, ast.With)
24
+
25
+ def visit_nested(self, node):
26
+ self.current_depth += 1
27
+ self.max_depth = max(self.max_depth, self.current_depth)
28
+ self.generic_visit(node)
29
+ self.current_depth -= 1
30
+
31
+ def visit_If(self, node): self.visit_nested(node)
32
+
33
+ def visit_For(self, node): self.visit_nested(node)
34
+
35
+ def visit_While(self, node): self.visit_nested(node)
36
+
37
+ def visit_Try(self, node): self.visit_nested(node)
38
+
39
+
40
+ def detect_edge_cases(functions):
41
+ edge_cases = []
42
+
43
+ for fn in functions:
44
+ try:
45
+ with open(fn["path"], "r", encoding="utf-8", errors="ignore") as f:
46
+ tree = ast.parse(f.read())
47
+ except Exception:
48
+ continue
49
+
50
+ for node in ast.walk(tree):
51
+ if isinstance(node, ast.FunctionDef) and node.name == fn["name"]:
52
+
53
+ # 1. Check Deep Nesting
54
+ dv = DepthVisitor()
55
+ dv.visit(node)
56
+ if dv.max_depth > 3:
57
+ edge_cases.append(_emit(
58
+ fn, node, "CR1001", "Deep Nesting",
59
+ f"Logic nested {dv.max_depth} levels deep. High cognitive load.", "high"
60
+ ))
61
+
62
+ # 2. Specific Logic Hazards
63
+ for inner in ast.walk(node):
64
+ if isinstance(inner, (ast.For, ast.While)):
65
+ edge_cases.append(_emit(
66
+ fn, inner, "CR2001", "Loop execution",
67
+ "Potential for infinite loops or O(n) performance hits.", "medium"
68
+ ))
69
+
70
+ if isinstance(inner, ast.Try):
71
+ # Check for 'bare' except or too many handlers
72
+ edge_cases.append(_emit(
73
+ fn, inner, "CR3001", "Exception path",
74
+ "Complexity in error recovery paths.", "low"
75
+ ))
76
+
77
+ if isinstance(inner, ast.BinOp) and isinstance(inner.op, ast.Div):
78
+ edge_cases.append(_emit(
79
+ fn, inner, "CR4001", "Math risk",
80
+ "Division operation without visible zero-check.", "medium"
81
+ ))
82
+
83
+ # 3. Detect "God Functions" (Length-based)
84
+ if hasattr(node, 'end_lineno'):
85
+ length = node.end_lineno - node.lineno
86
+ if length > 50:
87
+ edge_cases.append(_emit(
88
+ fn, node, "CR1002", "Large Function",
89
+ f"Function is {length} lines long. Suggest refactoring.", "medium"
90
+ ))
91
+
92
+ return edge_cases
File without changes
File without changes
@@ -0,0 +1,56 @@
1
+ import re
2
+ from pathlib import Path
3
+
4
+ # Pre-compiled for speed. Optimized for Rust, TSX, JS, Go, and C-style syntax.
5
+ # Also added a group for Python 'def' as a secondary regex fallback.
6
+ GENERIC_FUNCTION_RE = re.compile(
7
+ r"(?:async\s+)?(?:fn\s+(\w+)|function\s+(\w+)|(\w+)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>|const\s+(\w+)\s*:\s*React\.FC|(\w+)\s*\([^)]*\)\s*\{|def\s+(\w+)\s*\()",
8
+ re.MULTILINE
9
+ )
10
+
11
+ # Reserved keywords to ignore during regex discovery
12
+ RESERVED = {"if", "for", "while", "switch", "catch", "return", "export", "default"}
13
+
14
+
15
+ def extract_functions(file_path: str, content: str):
16
+ """
17
+ High-speed extraction: AST for Python, Regex for everything else.
18
+ """
19
+ path = Path(file_path)
20
+ functions = []
21
+ suffix = path.suffix.lower()
22
+
23
+ # 1. PYTHON AST PARSING (Primary for .py)
24
+ if suffix == ".py":
25
+ import ast
26
+ try:
27
+ tree = ast.parse(content)
28
+ for node in ast.walk(tree):
29
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
30
+ functions.append({
31
+ "name": node.name,
32
+ "line": node.lineno,
33
+ "path": str(path),
34
+ "type": "python_ast",
35
+ "length": len(node.body) # Real logic density
36
+ })
37
+ return functions
38
+ except SyntaxError:
39
+ pass # Fallback to regex if the file is malformed
40
+
41
+ # 2. MULTI-LANGUAGE REGEX DISCOVERY (For Rust, JS, TS, Go, and failing Py files)
42
+ # Using finditer is faster as it doesn't build the whole list at once
43
+ for match in GENERIC_FUNCTION_RE.finditer(content):
44
+ # Extract the first non-None group (the function name)
45
+ func_name = next((g for g in match.groups() if g), None)
46
+
47
+ if func_name and func_name not in RESERVED:
48
+ functions.append({
49
+ "name": func_name,
50
+ "line": content.count("\n", 0, match.start()) + 1,
51
+ "path": str(path),
52
+ "type": "regex_discovery",
53
+ "length": 0 # Placeholder for regex-found functions
54
+ })
55
+
56
+ return functions
@@ -0,0 +1,95 @@
1
+ import json
2
+ import concurrent.futures
3
+ from pathlib import Path
4
+ from multiprocessing import cpu_count
5
+ from tqdm import tqdm
6
+
7
+ from analyzer.discovery.files import discover_files
8
+ from analyzer.parsing.functions import extract_functions
9
+ from analyzer.signals.aggregate import aggregate_signals
10
+ from analyzer.testing.tests import map_tests
11
+ from analyzer.inference.edge_cases import detect_edge_cases
12
+ from analyzer.signals.signals import generate_signals
13
+ from schemas.analysis import AnalysisSchema
14
+
15
+ def detect_tech_stack(files):
16
+ """Detects the tech stack based on marker files."""
17
+ stack = []
18
+ filenames = {Path(f['path']).name for f in files}
19
+ all_paths_str = "".join([f['path'].lower() for f in files])
20
+
21
+ if "package.json" in filenames: stack.append("Node.js")
22
+ if "tsconfig.json" in filenames: stack.append("TypeScript")
23
+ if "requirements.txt" in filenames or "pyproject.toml" in filenames: stack.append("Python")
24
+ if "pom.xml" in filenames: stack.append("Java/Maven")
25
+ if "go.mod" in filenames: stack.append("Go")
26
+ if "prisma" in all_paths_str: stack.append("Prisma ORM")
27
+ if "vite.config" in all_paths_str: stack.append("Vite")
28
+ if "react" in all_paths_str: stack.append("React")
29
+
30
+ return stack if stack else ["General Software"]
31
+
32
+
33
+ def _parse_file_batch(file_batch):
34
+ """Processes a chunk of files in one go to reduce process overhead."""
35
+ from analyzer.parsing.functions import extract_functions
36
+ results = []
37
+ for file_info in file_batch:
38
+ file_path = file_info["path"]
39
+ try:
40
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
41
+ content = f.read()
42
+ results.extend(extract_functions(file_path, content))
43
+ except Exception:
44
+ continue
45
+ return results
46
+
47
+
48
+ def run_analysis(path: str) -> dict:
49
+ from analyzer.discovery.files import discover_files
50
+ files = discover_files(path)
51
+ all_functions = []
52
+
53
+ # Optimization: Chunking
54
+ # Spawning processes is expensive; processing in batches is 3x faster for small files.
55
+ chunk_size = 20
56
+ chunks = [files[i:i + chunk_size] for i in range(0, len(files), chunk_size)]
57
+
58
+ max_workers = max(1, int(cpu_count() * 0.8))
59
+
60
+ print(f"[coderecon] Analyzing {len(files)} files using {max_workers} cores...")
61
+
62
+ with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
63
+ futures = {executor.submit(_parse_file_batch, chunk): chunk for chunk in chunks}
64
+
65
+ for future in tqdm(concurrent.futures.as_completed(futures),
66
+ total=len(chunks),
67
+ desc="[coderecon] Scanning",
68
+ unit="batch",
69
+ leave=False):
70
+ all_functions.extend(future.result())
71
+
72
+ # Pipeline logic...
73
+ tech_stack = detect_tech_stack(files)
74
+ tests = map_tests(files)
75
+ edge_cases = detect_edge_cases(all_functions)
76
+ raw_signals = generate_signals(all_functions, edge_cases, tests)
77
+ aggregated_signals = aggregate_signals(raw_signals)
78
+
79
+ analysis = AnalysisSchema(
80
+ files=files,
81
+ functions=all_functions,
82
+ tests=tests,
83
+ edge_cases=edge_cases,
84
+ signals_raw=raw_signals,
85
+ signals=aggregated_signals,
86
+ tech_stack=tech_stack
87
+ )
88
+
89
+ analysis_dict = analysis.dict()
90
+
91
+ with open("analysis.json", "w", encoding="utf-8") as f:
92
+ json.dump(analysis_dict, f, indent=4)
93
+
94
+ return analysis_dict
95
+
File without changes
@@ -0,0 +1,35 @@
1
+ from collections import defaultdict
2
+
3
+
4
+ def aggregate_signals(signals):
5
+ grouped = defaultdict(lambda: {
6
+ "count": 0,
7
+ "lines": set()
8
+ })
9
+
10
+ for sig in signals:
11
+ key = (
12
+ sig["type"],
13
+ sig.get("path"),
14
+ sig.get("function"),
15
+ sig.get("case")
16
+ )
17
+
18
+ grouped[key]["count"] += 1
19
+
20
+ if sig.get("line") is not None:
21
+ grouped[key]["lines"].add(sig["line"])
22
+
23
+ aggregated = []
24
+
25
+ for (sig_type, path, function, case), data in grouped.items():
26
+ aggregated.append({
27
+ "type": sig_type,
28
+ "path": path,
29
+ "function": function,
30
+ "case": case,
31
+ "count": data["count"],
32
+ "lines": sorted(data["lines"]),
33
+ })
34
+
35
+ return aggregated
@@ -0,0 +1,75 @@
1
+ def assign_severity(signal_type: str) -> str:
2
+ mapping = {
3
+ "untested_function": "Medium",
4
+ "potential_edge_case": "Low",
5
+ "exception_path": "High",
6
+ "division_operation": "High",
7
+ "while_loop": "Medium",
8
+ "conditional_branch": "Low"
9
+ }
10
+
11
+ return mapping.get(signal_type, "Low")
12
+
13
+ def generate_signals(functions, edge_cases, tests):
14
+ """Convert raw findings into structured signals with safety fallbacks."""
15
+
16
+ signals = []
17
+
18
+ tested_functions = set()
19
+ for test in tests:
20
+ # Safely handle potential missing references key
21
+ tested_functions.update(test.get("references", []))
22
+
23
+ # Untested functions
24
+ for fn in functions:
25
+ # Use .get() to prevent KeyError if 'length' or 'line_start' is missing
26
+ length = fn.get("length", 0)
27
+ line_start = fn.get("line_start", fn.get("line", 0)) # Fallback to 'line' for regex matches
28
+ fn_name = fn.get("name", "unknown")
29
+ fn_path = fn.get("path", "unknown")
30
+
31
+ # Threshold checks with safe length
32
+ if length > 100:
33
+ signals.append({
34
+ "type": "large_function",
35
+ "function": fn_name,
36
+ "path": fn_path,
37
+ "line": line_start,
38
+ "length": length,
39
+ "severity": "High"
40
+ })
41
+ elif length > 60:
42
+ signals.append({
43
+ "type": "large_function",
44
+ "function": fn_name,
45
+ "path": fn_path,
46
+ "line": line_start,
47
+ "length": length,
48
+ "severity": "Medium"
49
+ })
50
+
51
+ if fn_name not in tested_functions:
52
+ signal_type = "untested_function"
53
+ signals.append({
54
+ "type": "untested_function",
55
+ "function": fn_name,
56
+ "path": fn_path,
57
+ "line": line_start,
58
+ "severity": assign_severity(signal_type),
59
+ })
60
+
61
+ # Potential edge cases
62
+ for ec in edge_cases:
63
+ signal_type = "potential_edge_case"
64
+ signals.append({
65
+ "type": "potential_edge_case",
66
+ "function": ec.get("function"),
67
+ "path": ec.get("file"),
68
+ "line": ec.get("line"),
69
+ "case": ec.get("case"),
70
+ "rule_id": ec.get("rule_id"),
71
+ "node_type": ec.get("node_type"),
72
+ "severity": assign_severity(signal_type),
73
+ })
74
+
75
+ return signals
File without changes
@@ -0,0 +1,39 @@
1
+ from pathlib import Path
2
+ import ast
3
+
4
+ def map_tests(files):
5
+ """
6
+ Discover test functions and infer which priduction functions they reference.
7
+ """
8
+ tests=[]
9
+
10
+ for file in files:
11
+ path = Path(file["path"])
12
+
13
+ if not (
14
+ path.name.startswith("test_") or "test" in path.parts
15
+ ):
16
+ continue
17
+
18
+ try:
19
+ source = open(path, "r",encoding="utf-8",errors="ignore").read()
20
+ tree= ast.parse(source)
21
+ except Exception:
22
+ continue
23
+
24
+ for node in ast.walk(tree):
25
+ if isinstance(node,ast.FunctionDef) and node.name.startswith("test_"):
26
+ referenced = set()
27
+
28
+ for inner in ast.walk(node):
29
+ if isinstance(inner,ast.Call):
30
+ if isinstance(inner.func,ast.Name):
31
+ referenced.add(inner.func.id)
32
+
33
+ tests.append({
34
+ "test_name": node.name,
35
+ "file":str(path),
36
+ "references":sorted(referenced)
37
+ })
38
+
39
+ return tests