coderecon 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coderecon-0.1.0/PKG-INFO +14 -0
- coderecon-0.1.0/README.md +2 -0
- coderecon-0.1.0/analyzer/__init__.py +0 -0
- coderecon-0.1.0/analyzer/diff.py +44 -0
- coderecon-0.1.0/analyzer/discovery/__init__.py +0 -0
- coderecon-0.1.0/analyzer/discovery/files.py +46 -0
- coderecon-0.1.0/analyzer/discovery/languages.py +0 -0
- coderecon-0.1.0/analyzer/inference/__init__.py +0 -0
- coderecon-0.1.0/analyzer/inference/edge_cases.py +92 -0
- coderecon-0.1.0/analyzer/parsing/__init__.py +0 -0
- coderecon-0.1.0/analyzer/parsing/calls.py +0 -0
- coderecon-0.1.0/analyzer/parsing/functions.py +56 -0
- coderecon-0.1.0/analyzer/scan.py +95 -0
- coderecon-0.1.0/analyzer/signals/__init__.py +0 -0
- coderecon-0.1.0/analyzer/signals/aggregate.py +35 -0
- coderecon-0.1.0/analyzer/signals/signals.py +75 -0
- coderecon-0.1.0/analyzer/testing/__init__.py +0 -0
- coderecon-0.1.0/analyzer/testing/tests.py +39 -0
- coderecon-0.1.0/cli.py +263 -0
- coderecon-0.1.0/coderecon.egg-info/PKG-INFO +14 -0
- coderecon-0.1.0/coderecon.egg-info/SOURCES.txt +46 -0
- coderecon-0.1.0/coderecon.egg-info/dependency_links.txt +1 -0
- coderecon-0.1.0/coderecon.egg-info/entry_points.txt +2 -0
- coderecon-0.1.0/coderecon.egg-info/requires.txt +5 -0
- coderecon-0.1.0/coderecon.egg-info/top_level.txt +6 -0
- coderecon-0.1.0/llm/client.py +64 -0
- coderecon-0.1.0/llm/explain.py +151 -0
- coderecon-0.1.0/llm/prompt.py +110 -0
- coderecon-0.1.0/llm/slice.py +31 -0
- coderecon-0.1.0/llm/suggest.py +67 -0
- coderecon-0.1.0/mcp/__init__.py +0 -0
- coderecon-0.1.0/mcp/tools.py +0 -0
- coderecon-0.1.0/pyproject.toml +29 -0
- coderecon-0.1.0/report/__init__.py +0 -0
- coderecon-0.1.0/report/doctor.py +31 -0
- coderecon-0.1.0/report/generate.py +0 -0
- coderecon-0.1.0/report/render/__init__.py +0 -0
- coderecon-0.1.0/report/render/cli.py +0 -0
- coderecon-0.1.0/report/render/json.py +0 -0
- coderecon-0.1.0/report/render/markdown.py +0 -0
- coderecon-0.1.0/report/summary.py +33 -0
- coderecon-0.1.0/report/topology.py +225 -0
- coderecon-0.1.0/report/writer.py +46 -0
- coderecon-0.1.0/schemas/__init__.py +0 -0
- coderecon-0.1.0/schemas/analysis.py +13 -0
- coderecon-0.1.0/schemas/report.py +0 -0
- coderecon-0.1.0/setup.cfg +4 -0
- coderecon-0.1.0/setup.py +18 -0
coderecon-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: coderecon
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Engineering-first repository reconnaissance and architectural auditing.
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: mcp
|
|
8
|
+
Requires-Dist: fastmcp
|
|
9
|
+
Requires-Dist: click
|
|
10
|
+
Requires-Dist: ollama
|
|
11
|
+
Requires-Dist: requests
|
|
12
|
+
|
|
13
|
+
# coderecon
|
|
14
|
+
coderecon is a CLI tool that performs deterministic reconnaissance over a codebase to extract structural facts—files, functions, edge cases, test coverage, and issue signals—and then uses an LLM to reason over those findings to generate detailed, auditable reports.
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
# coderecon
|
|
2
|
+
coderecon is a CLI tool that performs deterministic reconnaissance over a codebase to extract structural facts—files, functions, edge cases, test coverage, and issue signals—and then uses an LLM to reason over those findings to generate detailed, auditable reports.
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def run_diff_logic():
|
|
6
|
+
# Use the same root where analysis.json lives
|
|
7
|
+
current_path = Path("analysis.json")
|
|
8
|
+
previous_path = Path("analysis_previous.json")
|
|
9
|
+
|
|
10
|
+
if not previous_path.exists():
|
|
11
|
+
return "[coderecon] Diff Refused: No previous analysis found. Run 'scan' twice to compare changes."
|
|
12
|
+
|
|
13
|
+
with open(current_path, "r", encoding="utf-8") as f:
|
|
14
|
+
curr = json.load(f)
|
|
15
|
+
with open(previous_path, "r", encoding="utf-8") as f:
|
|
16
|
+
prev = json.load(f)
|
|
17
|
+
|
|
18
|
+
# Use Path + Type + Message as the unique signature for a signal
|
|
19
|
+
def get_sig(s):
|
|
20
|
+
return (s.get("path") or s.get("file"), s.get("type"), s.get("message"))
|
|
21
|
+
|
|
22
|
+
curr_set = {get_sig(s) for s in curr.get("signals", [])}
|
|
23
|
+
prev_set = {get_sig(s) for s in prev.get("signals", [])}
|
|
24
|
+
|
|
25
|
+
added = curr_set - prev_set
|
|
26
|
+
removed = prev_set - curr_set
|
|
27
|
+
|
|
28
|
+
# Formatting Output
|
|
29
|
+
out = ["# 🔄 RECON DELTA REPORT\n"]
|
|
30
|
+
|
|
31
|
+
if added:
|
|
32
|
+
out.append(f"## 🛑 New Hazards Detected (+{len(added)})")
|
|
33
|
+
for path, stype, msg in added:
|
|
34
|
+
out.append(f"- [{stype.upper()}] in {path}: {msg}")
|
|
35
|
+
|
|
36
|
+
if removed:
|
|
37
|
+
out.append(f"\n## ✅ Risks Resolved (-{len(removed)})")
|
|
38
|
+
for path, stype, msg in removed:
|
|
39
|
+
out.append(f"- Resolved: {stype} in {path}")
|
|
40
|
+
|
|
41
|
+
if not added and not removed:
|
|
42
|
+
out.append("No changes in signal density detected.")
|
|
43
|
+
|
|
44
|
+
return "\n".join(out)
|
|
File without changes
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
# Move these to a set for O(1) lookup speed
|
|
5
|
+
SUPPORTED_EXTENSIONS = {".py", ".js", ".ts", ".tsx", ".rs", ".go", ".java", ".cpp", ".c", ".h"}
|
|
6
|
+
EXCLUDE_DIRS = {".git", "node_modules", "__pycache__", ".venv", "dist", "build", ".coderecon", "venv", "lib"}
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _get_file_metadata(full_path, filename):
|
|
10
|
+
"""Fast metadata assembly using pre-calculated values."""
|
|
11
|
+
return {
|
|
12
|
+
"path": full_path,
|
|
13
|
+
"name": filename,
|
|
14
|
+
"size": os.path.getsize(full_path) # Only hit the disk if necessary
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def discover_files(root_path: str):
|
|
19
|
+
"""
|
|
20
|
+
High-speed discovery using os.scandir to minimize system calls.
|
|
21
|
+
"""
|
|
22
|
+
# 1. Handle Single File Input Fast
|
|
23
|
+
if os.path.isfile(root_path):
|
|
24
|
+
if os.path.splitext(root_path)[1].lower() in SUPPORTED_EXTENSIONS:
|
|
25
|
+
return [_get_file_metadata(root_path, os.path.basename(root_path))]
|
|
26
|
+
return []
|
|
27
|
+
|
|
28
|
+
files = []
|
|
29
|
+
stack = [root_path]
|
|
30
|
+
|
|
31
|
+
# 2. Manual Stack Walk (often faster than os.walk for deep trees)
|
|
32
|
+
while stack:
|
|
33
|
+
current_dir = stack.pop()
|
|
34
|
+
try:
|
|
35
|
+
with os.scandir(current_dir) as it:
|
|
36
|
+
for entry in it:
|
|
37
|
+
if entry.is_dir():
|
|
38
|
+
if entry.name not in EXCLUDE_DIRS:
|
|
39
|
+
stack.append(entry.path)
|
|
40
|
+
elif entry.is_file():
|
|
41
|
+
if os.path.splitext(entry.name)[1].lower() in SUPPORTED_EXTENSIONS:
|
|
42
|
+
files.append(_get_file_metadata(entry.path, entry.name))
|
|
43
|
+
except PermissionError:
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
return files
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _emit(fn, inner, rule_id, case, reason, severity="low"):
|
|
5
|
+
return {
|
|
6
|
+
"rule_id": rule_id,
|
|
7
|
+
"function": fn["name"],
|
|
8
|
+
"file": fn["path"],
|
|
9
|
+
"case": case,
|
|
10
|
+
"reason": reason,
|
|
11
|
+
"severity": severity,
|
|
12
|
+
"line": getattr(inner, "lineno", None),
|
|
13
|
+
"node_type": type(inner).__name__,
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DepthVisitor(ast.NodeVisitor):
|
|
18
|
+
"""Tracks nesting depth of control flow nodes."""
|
|
19
|
+
|
|
20
|
+
def __init__(self):
|
|
21
|
+
self.current_depth = 0
|
|
22
|
+
self.max_depth = 0
|
|
23
|
+
self.nesting_nodes = (ast.If, ast.For, ast.While, ast.Try, ast.With)
|
|
24
|
+
|
|
25
|
+
def visit_nested(self, node):
|
|
26
|
+
self.current_depth += 1
|
|
27
|
+
self.max_depth = max(self.max_depth, self.current_depth)
|
|
28
|
+
self.generic_visit(node)
|
|
29
|
+
self.current_depth -= 1
|
|
30
|
+
|
|
31
|
+
def visit_If(self, node): self.visit_nested(node)
|
|
32
|
+
|
|
33
|
+
def visit_For(self, node): self.visit_nested(node)
|
|
34
|
+
|
|
35
|
+
def visit_While(self, node): self.visit_nested(node)
|
|
36
|
+
|
|
37
|
+
def visit_Try(self, node): self.visit_nested(node)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def detect_edge_cases(functions):
|
|
41
|
+
edge_cases = []
|
|
42
|
+
|
|
43
|
+
for fn in functions:
|
|
44
|
+
try:
|
|
45
|
+
with open(fn["path"], "r", encoding="utf-8", errors="ignore") as f:
|
|
46
|
+
tree = ast.parse(f.read())
|
|
47
|
+
except Exception:
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
for node in ast.walk(tree):
|
|
51
|
+
if isinstance(node, ast.FunctionDef) and node.name == fn["name"]:
|
|
52
|
+
|
|
53
|
+
# 1. Check Deep Nesting
|
|
54
|
+
dv = DepthVisitor()
|
|
55
|
+
dv.visit(node)
|
|
56
|
+
if dv.max_depth > 3:
|
|
57
|
+
edge_cases.append(_emit(
|
|
58
|
+
fn, node, "CR1001", "Deep Nesting",
|
|
59
|
+
f"Logic nested {dv.max_depth} levels deep. High cognitive load.", "high"
|
|
60
|
+
))
|
|
61
|
+
|
|
62
|
+
# 2. Specific Logic Hazards
|
|
63
|
+
for inner in ast.walk(node):
|
|
64
|
+
if isinstance(inner, (ast.For, ast.While)):
|
|
65
|
+
edge_cases.append(_emit(
|
|
66
|
+
fn, inner, "CR2001", "Loop execution",
|
|
67
|
+
"Potential for infinite loops or O(n) performance hits.", "medium"
|
|
68
|
+
))
|
|
69
|
+
|
|
70
|
+
if isinstance(inner, ast.Try):
|
|
71
|
+
# Check for 'bare' except or too many handlers
|
|
72
|
+
edge_cases.append(_emit(
|
|
73
|
+
fn, inner, "CR3001", "Exception path",
|
|
74
|
+
"Complexity in error recovery paths.", "low"
|
|
75
|
+
))
|
|
76
|
+
|
|
77
|
+
if isinstance(inner, ast.BinOp) and isinstance(inner.op, ast.Div):
|
|
78
|
+
edge_cases.append(_emit(
|
|
79
|
+
fn, inner, "CR4001", "Math risk",
|
|
80
|
+
"Division operation without visible zero-check.", "medium"
|
|
81
|
+
))
|
|
82
|
+
|
|
83
|
+
# 3. Detect "God Functions" (Length-based)
|
|
84
|
+
if hasattr(node, 'end_lineno'):
|
|
85
|
+
length = node.end_lineno - node.lineno
|
|
86
|
+
if length > 50:
|
|
87
|
+
edge_cases.append(_emit(
|
|
88
|
+
fn, node, "CR1002", "Large Function",
|
|
89
|
+
f"Function is {length} lines long. Suggest refactoring.", "medium"
|
|
90
|
+
))
|
|
91
|
+
|
|
92
|
+
return edge_cases
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
# Pre-compiled for speed. Optimized for Rust, TSX, JS, Go, and C-style syntax.
|
|
5
|
+
# Also added a group for Python 'def' as a secondary regex fallback.
|
|
6
|
+
GENERIC_FUNCTION_RE = re.compile(
|
|
7
|
+
r"(?:async\s+)?(?:fn\s+(\w+)|function\s+(\w+)|(\w+)\s*=\s*(?:async\s*)?\([^)]*\)\s*=>|const\s+(\w+)\s*:\s*React\.FC|(\w+)\s*\([^)]*\)\s*\{|def\s+(\w+)\s*\()",
|
|
8
|
+
re.MULTILINE
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
# Reserved keywords to ignore during regex discovery
|
|
12
|
+
RESERVED = {"if", "for", "while", "switch", "catch", "return", "export", "default"}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def extract_functions(file_path: str, content: str):
|
|
16
|
+
"""
|
|
17
|
+
High-speed extraction: AST for Python, Regex for everything else.
|
|
18
|
+
"""
|
|
19
|
+
path = Path(file_path)
|
|
20
|
+
functions = []
|
|
21
|
+
suffix = path.suffix.lower()
|
|
22
|
+
|
|
23
|
+
# 1. PYTHON AST PARSING (Primary for .py)
|
|
24
|
+
if suffix == ".py":
|
|
25
|
+
import ast
|
|
26
|
+
try:
|
|
27
|
+
tree = ast.parse(content)
|
|
28
|
+
for node in ast.walk(tree):
|
|
29
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
30
|
+
functions.append({
|
|
31
|
+
"name": node.name,
|
|
32
|
+
"line": node.lineno,
|
|
33
|
+
"path": str(path),
|
|
34
|
+
"type": "python_ast",
|
|
35
|
+
"length": len(node.body) # Real logic density
|
|
36
|
+
})
|
|
37
|
+
return functions
|
|
38
|
+
except SyntaxError:
|
|
39
|
+
pass # Fallback to regex if the file is malformed
|
|
40
|
+
|
|
41
|
+
# 2. MULTI-LANGUAGE REGEX DISCOVERY (For Rust, JS, TS, Go, and failing Py files)
|
|
42
|
+
# Using finditer is faster as it doesn't build the whole list at once
|
|
43
|
+
for match in GENERIC_FUNCTION_RE.finditer(content):
|
|
44
|
+
# Extract the first non-None group (the function name)
|
|
45
|
+
func_name = next((g for g in match.groups() if g), None)
|
|
46
|
+
|
|
47
|
+
if func_name and func_name not in RESERVED:
|
|
48
|
+
functions.append({
|
|
49
|
+
"name": func_name,
|
|
50
|
+
"line": content.count("\n", 0, match.start()) + 1,
|
|
51
|
+
"path": str(path),
|
|
52
|
+
"type": "regex_discovery",
|
|
53
|
+
"length": 0 # Placeholder for regex-found functions
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
return functions
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import concurrent.futures
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from multiprocessing import cpu_count
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
|
|
7
|
+
from analyzer.discovery.files import discover_files
|
|
8
|
+
from analyzer.parsing.functions import extract_functions
|
|
9
|
+
from analyzer.signals.aggregate import aggregate_signals
|
|
10
|
+
from analyzer.testing.tests import map_tests
|
|
11
|
+
from analyzer.inference.edge_cases import detect_edge_cases
|
|
12
|
+
from analyzer.signals.signals import generate_signals
|
|
13
|
+
from schemas.analysis import AnalysisSchema
|
|
14
|
+
|
|
15
|
+
def detect_tech_stack(files):
|
|
16
|
+
"""Detects the tech stack based on marker files."""
|
|
17
|
+
stack = []
|
|
18
|
+
filenames = {Path(f['path']).name for f in files}
|
|
19
|
+
all_paths_str = "".join([f['path'].lower() for f in files])
|
|
20
|
+
|
|
21
|
+
if "package.json" in filenames: stack.append("Node.js")
|
|
22
|
+
if "tsconfig.json" in filenames: stack.append("TypeScript")
|
|
23
|
+
if "requirements.txt" in filenames or "pyproject.toml" in filenames: stack.append("Python")
|
|
24
|
+
if "pom.xml" in filenames: stack.append("Java/Maven")
|
|
25
|
+
if "go.mod" in filenames: stack.append("Go")
|
|
26
|
+
if "prisma" in all_paths_str: stack.append("Prisma ORM")
|
|
27
|
+
if "vite.config" in all_paths_str: stack.append("Vite")
|
|
28
|
+
if "react" in all_paths_str: stack.append("React")
|
|
29
|
+
|
|
30
|
+
return stack if stack else ["General Software"]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _parse_file_batch(file_batch):
|
|
34
|
+
"""Processes a chunk of files in one go to reduce process overhead."""
|
|
35
|
+
from analyzer.parsing.functions import extract_functions
|
|
36
|
+
results = []
|
|
37
|
+
for file_info in file_batch:
|
|
38
|
+
file_path = file_info["path"]
|
|
39
|
+
try:
|
|
40
|
+
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
41
|
+
content = f.read()
|
|
42
|
+
results.extend(extract_functions(file_path, content))
|
|
43
|
+
except Exception:
|
|
44
|
+
continue
|
|
45
|
+
return results
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def run_analysis(path: str) -> dict:
|
|
49
|
+
from analyzer.discovery.files import discover_files
|
|
50
|
+
files = discover_files(path)
|
|
51
|
+
all_functions = []
|
|
52
|
+
|
|
53
|
+
# Optimization: Chunking
|
|
54
|
+
# Spawning processes is expensive; processing in batches is 3x faster for small files.
|
|
55
|
+
chunk_size = 20
|
|
56
|
+
chunks = [files[i:i + chunk_size] for i in range(0, len(files), chunk_size)]
|
|
57
|
+
|
|
58
|
+
max_workers = max(1, int(cpu_count() * 0.8))
|
|
59
|
+
|
|
60
|
+
print(f"[coderecon] Analyzing {len(files)} files using {max_workers} cores...")
|
|
61
|
+
|
|
62
|
+
with concurrent.futures.ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|
63
|
+
futures = {executor.submit(_parse_file_batch, chunk): chunk for chunk in chunks}
|
|
64
|
+
|
|
65
|
+
for future in tqdm(concurrent.futures.as_completed(futures),
|
|
66
|
+
total=len(chunks),
|
|
67
|
+
desc="[coderecon] Scanning",
|
|
68
|
+
unit="batch",
|
|
69
|
+
leave=False):
|
|
70
|
+
all_functions.extend(future.result())
|
|
71
|
+
|
|
72
|
+
# Pipeline logic...
|
|
73
|
+
tech_stack = detect_tech_stack(files)
|
|
74
|
+
tests = map_tests(files)
|
|
75
|
+
edge_cases = detect_edge_cases(all_functions)
|
|
76
|
+
raw_signals = generate_signals(all_functions, edge_cases, tests)
|
|
77
|
+
aggregated_signals = aggregate_signals(raw_signals)
|
|
78
|
+
|
|
79
|
+
analysis = AnalysisSchema(
|
|
80
|
+
files=files,
|
|
81
|
+
functions=all_functions,
|
|
82
|
+
tests=tests,
|
|
83
|
+
edge_cases=edge_cases,
|
|
84
|
+
signals_raw=raw_signals,
|
|
85
|
+
signals=aggregated_signals,
|
|
86
|
+
tech_stack=tech_stack
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
analysis_dict = analysis.dict()
|
|
90
|
+
|
|
91
|
+
with open("analysis.json", "w", encoding="utf-8") as f:
|
|
92
|
+
json.dump(analysis_dict, f, indent=4)
|
|
93
|
+
|
|
94
|
+
return analysis_dict
|
|
95
|
+
|
|
File without changes
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def aggregate_signals(signals):
|
|
5
|
+
grouped = defaultdict(lambda: {
|
|
6
|
+
"count": 0,
|
|
7
|
+
"lines": set()
|
|
8
|
+
})
|
|
9
|
+
|
|
10
|
+
for sig in signals:
|
|
11
|
+
key = (
|
|
12
|
+
sig["type"],
|
|
13
|
+
sig.get("path"),
|
|
14
|
+
sig.get("function"),
|
|
15
|
+
sig.get("case")
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
grouped[key]["count"] += 1
|
|
19
|
+
|
|
20
|
+
if sig.get("line") is not None:
|
|
21
|
+
grouped[key]["lines"].add(sig["line"])
|
|
22
|
+
|
|
23
|
+
aggregated = []
|
|
24
|
+
|
|
25
|
+
for (sig_type, path, function, case), data in grouped.items():
|
|
26
|
+
aggregated.append({
|
|
27
|
+
"type": sig_type,
|
|
28
|
+
"path": path,
|
|
29
|
+
"function": function,
|
|
30
|
+
"case": case,
|
|
31
|
+
"count": data["count"],
|
|
32
|
+
"lines": sorted(data["lines"]),
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
return aggregated
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
def assign_severity(signal_type: str) -> str:
|
|
2
|
+
mapping = {
|
|
3
|
+
"untested_function": "Medium",
|
|
4
|
+
"potential_edge_case": "Low",
|
|
5
|
+
"exception_path": "High",
|
|
6
|
+
"division_operation": "High",
|
|
7
|
+
"while_loop": "Medium",
|
|
8
|
+
"conditional_branch": "Low"
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
return mapping.get(signal_type, "Low")
|
|
12
|
+
|
|
13
|
+
def generate_signals(functions, edge_cases, tests):
|
|
14
|
+
"""Convert raw findings into structured signals with safety fallbacks."""
|
|
15
|
+
|
|
16
|
+
signals = []
|
|
17
|
+
|
|
18
|
+
tested_functions = set()
|
|
19
|
+
for test in tests:
|
|
20
|
+
# Safely handle potential missing references key
|
|
21
|
+
tested_functions.update(test.get("references", []))
|
|
22
|
+
|
|
23
|
+
# Untested functions
|
|
24
|
+
for fn in functions:
|
|
25
|
+
# Use .get() to prevent KeyError if 'length' or 'line_start' is missing
|
|
26
|
+
length = fn.get("length", 0)
|
|
27
|
+
line_start = fn.get("line_start", fn.get("line", 0)) # Fallback to 'line' for regex matches
|
|
28
|
+
fn_name = fn.get("name", "unknown")
|
|
29
|
+
fn_path = fn.get("path", "unknown")
|
|
30
|
+
|
|
31
|
+
# Threshold checks with safe length
|
|
32
|
+
if length > 100:
|
|
33
|
+
signals.append({
|
|
34
|
+
"type": "large_function",
|
|
35
|
+
"function": fn_name,
|
|
36
|
+
"path": fn_path,
|
|
37
|
+
"line": line_start,
|
|
38
|
+
"length": length,
|
|
39
|
+
"severity": "High"
|
|
40
|
+
})
|
|
41
|
+
elif length > 60:
|
|
42
|
+
signals.append({
|
|
43
|
+
"type": "large_function",
|
|
44
|
+
"function": fn_name,
|
|
45
|
+
"path": fn_path,
|
|
46
|
+
"line": line_start,
|
|
47
|
+
"length": length,
|
|
48
|
+
"severity": "Medium"
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
if fn_name not in tested_functions:
|
|
52
|
+
signal_type = "untested_function"
|
|
53
|
+
signals.append({
|
|
54
|
+
"type": "untested_function",
|
|
55
|
+
"function": fn_name,
|
|
56
|
+
"path": fn_path,
|
|
57
|
+
"line": line_start,
|
|
58
|
+
"severity": assign_severity(signal_type),
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
# Potential edge cases
|
|
62
|
+
for ec in edge_cases:
|
|
63
|
+
signal_type = "potential_edge_case"
|
|
64
|
+
signals.append({
|
|
65
|
+
"type": "potential_edge_case",
|
|
66
|
+
"function": ec.get("function"),
|
|
67
|
+
"path": ec.get("file"),
|
|
68
|
+
"line": ec.get("line"),
|
|
69
|
+
"case": ec.get("case"),
|
|
70
|
+
"rule_id": ec.get("rule_id"),
|
|
71
|
+
"node_type": ec.get("node_type"),
|
|
72
|
+
"severity": assign_severity(signal_type),
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
return signals
|
|
File without changes
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import ast
|
|
3
|
+
|
|
4
|
+
def map_tests(files):
|
|
5
|
+
"""
|
|
6
|
+
Discover test functions and infer which priduction functions they reference.
|
|
7
|
+
"""
|
|
8
|
+
tests=[]
|
|
9
|
+
|
|
10
|
+
for file in files:
|
|
11
|
+
path = Path(file["path"])
|
|
12
|
+
|
|
13
|
+
if not (
|
|
14
|
+
path.name.startswith("test_") or "test" in path.parts
|
|
15
|
+
):
|
|
16
|
+
continue
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
source = open(path, "r",encoding="utf-8",errors="ignore").read()
|
|
20
|
+
tree= ast.parse(source)
|
|
21
|
+
except Exception:
|
|
22
|
+
continue
|
|
23
|
+
|
|
24
|
+
for node in ast.walk(tree):
|
|
25
|
+
if isinstance(node,ast.FunctionDef) and node.name.startswith("test_"):
|
|
26
|
+
referenced = set()
|
|
27
|
+
|
|
28
|
+
for inner in ast.walk(node):
|
|
29
|
+
if isinstance(inner,ast.Call):
|
|
30
|
+
if isinstance(inner.func,ast.Name):
|
|
31
|
+
referenced.add(inner.func.id)
|
|
32
|
+
|
|
33
|
+
tests.append({
|
|
34
|
+
"test_name": node.name,
|
|
35
|
+
"file":str(path),
|
|
36
|
+
"references":sorted(referenced)
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
return tests
|