devarch 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devarch/__init__.py +4 -0
- devarch/__main__.py +4 -0
- devarch/analyzers/__init__.py +2 -0
- devarch/analyzers/ancient.py +48 -0
- devarch/analyzers/dead_code.py +92 -0
- devarch/analyzers/duplicates.py +101 -0
- devarch/analyzers/health.py +60 -0
- devarch/analyzers/maintenance.py +902 -0
- devarch/analyzers/monsters.py +62 -0
- devarch/analyzers/recovery.py +338 -0
- devarch/analyzers/ruins.py +45 -0
- devarch/analyzers/suspicious.py +39 -0
- devarch/analyzers/todos.py +60 -0
- devarch/cli/__init__.py +2 -0
- devarch/cli/main.py +1708 -0
- devarch/models.py +43 -0
- devarch/plugins.py +29 -0
- devarch/reports/__init__.py +2 -0
- devarch/reports/exporters.py +274 -0
- devarch/scanner/__init__.py +2 -0
- devarch/scanner/core.py +15 -0
- devarch/scanner/discovery.py +84 -0
- devarch/scanner/intelligence.py +1559 -0
- devarch/utils/__init__.py +2 -0
- devarch/utils/fs.py +165 -0
- devarch/utils/git_info.py +64 -0
- devarch/utils/rich_ui.py +107 -0
- devarch/version.py +3 -0
- devarch-0.2.0.dist-info/METADATA +317 -0
- devarch-0.2.0.dist-info/RECORD +33 -0
- devarch-0.2.0.dist-info/WHEEL +4 -0
- devarch-0.2.0.dist-info/entry_points.txt +3 -0
- devarch-0.2.0.dist-info/licenses/LICENSE +22 -0
devarch/__init__.py
ADDED
devarch/__main__.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ..models import Artifact
|
|
8
|
+
from ..utils.fs import path_kind, safe_stat
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(slots=True)
|
|
12
|
+
class AncientStats:
|
|
13
|
+
total: int
|
|
14
|
+
unreferenced: int
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def file_age_days(path: Path) -> int:
|
|
18
|
+
modified = datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc)
|
|
19
|
+
return max((datetime.now(timezone.utc) - modified).days, 0)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def find_ancient_files(
|
|
23
|
+
files: list[Path],
|
|
24
|
+
references: dict[Path, set[Path]],
|
|
25
|
+
threshold_days: int = 365,
|
|
26
|
+
) -> list[Artifact]:
|
|
27
|
+
artifacts: list[Artifact] = []
|
|
28
|
+
for path in files:
|
|
29
|
+
if path_kind(path) == "binary":
|
|
30
|
+
continue
|
|
31
|
+
age = file_age_days(path)
|
|
32
|
+
referenced = path in references and bool(references[path])
|
|
33
|
+
if age >= threshold_days or (age >= 180 and not referenced):
|
|
34
|
+
risk = "High" if age >= 730 or not referenced else "Medium"
|
|
35
|
+
status = "Unreferenced" if not referenced else "Referenced"
|
|
36
|
+
artifacts.append(
|
|
37
|
+
Artifact(
|
|
38
|
+
path=path,
|
|
39
|
+
kind="ancient_file",
|
|
40
|
+
risk=risk,
|
|
41
|
+
age_days=age,
|
|
42
|
+
size_bytes=safe_stat(path),
|
|
43
|
+
detail=status,
|
|
44
|
+
confidence=0.84 if not referenced else 0.7,
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
return artifacts
|
|
48
|
+
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from ..models import Artifact
|
|
8
|
+
from ..utils.fs import path_kind, read_text
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
PY_IMPORT_RE = re.compile(r"^\s*(?:from\s+([\w.]+)\s+import|import\s+([\w.]+))", re.MULTILINE)
|
|
12
|
+
CODE_EXTENSIONS = {".py", ".pyi", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}
|
|
13
|
+
def _module_name(path: Path, root: Path) -> str:
|
|
14
|
+
rel = path.relative_to(root).with_suffix("")
|
|
15
|
+
return ".".join(rel.parts)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _has_unreachable_code(content: str) -> bool:
|
|
19
|
+
try:
|
|
20
|
+
tree = ast.parse(content)
|
|
21
|
+
except SyntaxError:
|
|
22
|
+
return False
|
|
23
|
+
|
|
24
|
+
terminal_types = (ast.Return, ast.Raise, ast.Break, ast.Continue)
|
|
25
|
+
for node in ast.walk(tree):
|
|
26
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
27
|
+
terminated = False
|
|
28
|
+
for stmt in node.body:
|
|
29
|
+
if terminated:
|
|
30
|
+
return True
|
|
31
|
+
terminated = isinstance(stmt, terminal_types)
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def find_dead_code(root: Path, files: list[Path], text_cache: dict[Path, str]) -> list[Artifact]:
|
|
36
|
+
artifacts: list[Artifact] = []
|
|
37
|
+
text_files = [path for path in files if path_kind(path) == "text"]
|
|
38
|
+
|
|
39
|
+
imported_modules: set[str] = set()
|
|
40
|
+
for path in text_files:
|
|
41
|
+
content = text_cache.get(path, "")
|
|
42
|
+
for match in PY_IMPORT_RE.finditer(content):
|
|
43
|
+
target = match.group(1) or match.group(2)
|
|
44
|
+
if target:
|
|
45
|
+
imported_modules.add(target.lstrip(".").replace(".", "/"))
|
|
46
|
+
|
|
47
|
+
for path in text_files:
|
|
48
|
+
content = text_cache.get(path, "")
|
|
49
|
+
module = _module_name(path, root)
|
|
50
|
+
module_path = module.replace(".", "/")
|
|
51
|
+
if path.suffix.lower() == ".py":
|
|
52
|
+
if "tests" in path.parts or path.name.startswith("test_") or path.name == "conftest.py":
|
|
53
|
+
continue
|
|
54
|
+
if path.name in {"__init__.py", "__main__.py"}:
|
|
55
|
+
continue
|
|
56
|
+
if not any(module_path.endswith(name) or name.endswith(module_path) for name in imported_modules):
|
|
57
|
+
if "if __name__ == \"__main__\"" not in content and "if __name__ == '__main__'" not in content:
|
|
58
|
+
artifacts.append(
|
|
59
|
+
Artifact(
|
|
60
|
+
path=path,
|
|
61
|
+
kind="dead_code_candidate",
|
|
62
|
+
risk="Medium",
|
|
63
|
+
detail="Module is not referenced by obvious imports",
|
|
64
|
+
confidence=0.62,
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if _has_unreachable_code(content):
|
|
69
|
+
artifacts.append(
|
|
70
|
+
Artifact(
|
|
71
|
+
path=path,
|
|
72
|
+
kind="unreachable_code",
|
|
73
|
+
risk="Low",
|
|
74
|
+
detail="Function body contains statements after a terminal statement",
|
|
75
|
+
confidence=0.72,
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
else:
|
|
79
|
+
if path.suffix.lower() not in CODE_EXTENSIONS:
|
|
80
|
+
continue
|
|
81
|
+
if not any(module_path.endswith(name) or name.endswith(module_path) for name in imported_modules):
|
|
82
|
+
artifacts.append(
|
|
83
|
+
Artifact(
|
|
84
|
+
path=path,
|
|
85
|
+
kind="dead_code_candidate",
|
|
86
|
+
risk="Low",
|
|
87
|
+
detail="File is not referenced by obvious imports",
|
|
88
|
+
confidence=0.5,
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
return artifacts
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
from ..models import Artifact
|
|
9
|
+
from ..utils.fs import path_kind, read_text
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def normalize_text(text: str) -> str:
|
|
13
|
+
text = re.sub(r"\"\"\".*?\"\"\"|'''.*?'''", "", text, flags=re.S)
|
|
14
|
+
text = re.sub(r"#.*$", "", text, flags=re.M)
|
|
15
|
+
text = re.sub(r"//.*$", "", text, flags=re.M)
|
|
16
|
+
text = re.sub(r"/\*.*?\*/", "", text, flags=re.S)
|
|
17
|
+
text = re.sub(r"\s+", " ", text)
|
|
18
|
+
text = re.sub(r"\b\d+\b", "0", text)
|
|
19
|
+
return text.strip().lower()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def tokenize_blocks(text: str, block_size: int = 12) -> list[str]:
|
|
23
|
+
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
|
24
|
+
if len(lines) <= block_size:
|
|
25
|
+
return ["\n".join(lines)] if lines else []
|
|
26
|
+
blocks: list[str] = []
|
|
27
|
+
for index in range(0, len(lines) - block_size + 1):
|
|
28
|
+
blocks.append("\n".join(lines[index : index + block_size]))
|
|
29
|
+
return blocks
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def find_duplicates(files: list[Path], text_cache: dict[Path, str]) -> list[Artifact]:
|
|
33
|
+
signatures: dict[str, list[Path]] = defaultdict(list)
|
|
34
|
+
for path in files:
|
|
35
|
+
if path_kind(path) != "text":
|
|
36
|
+
continue
|
|
37
|
+
content = normalize_text(text_cache.get(path, ""))
|
|
38
|
+
if not content:
|
|
39
|
+
continue
|
|
40
|
+
for block in tokenize_blocks(content):
|
|
41
|
+
if len(block.split()) < 10:
|
|
42
|
+
continue
|
|
43
|
+
signatures[block].append(path)
|
|
44
|
+
|
|
45
|
+
artifacts: list[Artifact] = []
|
|
46
|
+
seen: set[tuple[Path, Path, str]] = set()
|
|
47
|
+
for block, paths in signatures.items():
|
|
48
|
+
if len(paths) < 2:
|
|
49
|
+
continue
|
|
50
|
+
for idx, left in enumerate(paths):
|
|
51
|
+
for right in paths[idx + 1 :]:
|
|
52
|
+
pair = tuple(sorted((left, right))) + (block,)
|
|
53
|
+
if pair in seen:
|
|
54
|
+
continue
|
|
55
|
+
seen.add(pair)
|
|
56
|
+
artifacts.append(
|
|
57
|
+
Artifact(
|
|
58
|
+
path=left,
|
|
59
|
+
kind="duplicate_block",
|
|
60
|
+
risk="Medium",
|
|
61
|
+
detail=f"Similar to {right}",
|
|
62
|
+
score=0.85,
|
|
63
|
+
confidence=0.85,
|
|
64
|
+
metadata={"match_path": str(right)},
|
|
65
|
+
)
|
|
66
|
+
)
|
|
67
|
+
return artifacts
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def similarity_report(files: list[Path], text_cache: dict[Path, str]) -> list[dict[str, object]]:
|
|
71
|
+
normalized: dict[Path, set[str]] = {}
|
|
72
|
+
for path in files:
|
|
73
|
+
if path_kind(path) != "text":
|
|
74
|
+
continue
|
|
75
|
+
tokens = set(re.findall(r"[a-zA-Z_][a-zA-Z0-9_]+", normalize_text(text_cache.get(path, ""))))
|
|
76
|
+
if tokens:
|
|
77
|
+
normalized[path] = tokens
|
|
78
|
+
|
|
79
|
+
report: list[dict[str, object]] = []
|
|
80
|
+
paths = list(normalized)
|
|
81
|
+
for idx, left in enumerate(paths):
|
|
82
|
+
for right in paths[idx + 1 :]:
|
|
83
|
+
a = normalized[left]
|
|
84
|
+
b = normalized[right]
|
|
85
|
+
if not a or not b:
|
|
86
|
+
continue
|
|
87
|
+
intersection = len(a & b)
|
|
88
|
+
union = len(a | b)
|
|
89
|
+
if not union:
|
|
90
|
+
continue
|
|
91
|
+
similarity = round((intersection / union) * 100, 1)
|
|
92
|
+
if similarity >= 65:
|
|
93
|
+
report.append(
|
|
94
|
+
{
|
|
95
|
+
"left": str(left),
|
|
96
|
+
"right": str(right),
|
|
97
|
+
"similarity": similarity,
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
return sorted(report, key=lambda item: item["similarity"], reverse=True)
|
|
101
|
+
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass(slots=True)
|
|
7
|
+
class HealthMetrics:
|
|
8
|
+
score: int
|
|
9
|
+
status: str
|
|
10
|
+
warnings: list[str]
|
|
11
|
+
debt_estimate: float
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def calculate_health(
|
|
15
|
+
*,
|
|
16
|
+
total_files: int,
|
|
17
|
+
dead_code_count: int,
|
|
18
|
+
duplicate_count: int,
|
|
19
|
+
ancient_count: int,
|
|
20
|
+
todo_count: int,
|
|
21
|
+
monster_count: int,
|
|
22
|
+
ruin_count: int,
|
|
23
|
+
suspicious_count: int,
|
|
24
|
+
) -> HealthMetrics:
|
|
25
|
+
debt = (
|
|
26
|
+
dead_code_count * 2.0
|
|
27
|
+
+ duplicate_count * 1.5
|
|
28
|
+
+ ancient_count * 1.2
|
|
29
|
+
+ todo_count * 0.35
|
|
30
|
+
+ monster_count * 2.5
|
|
31
|
+
+ ruin_count * 0.8
|
|
32
|
+
+ suspicious_count * 0.6
|
|
33
|
+
)
|
|
34
|
+
if total_files:
|
|
35
|
+
debt += min(total_files / 250.0, 10.0)
|
|
36
|
+
score = max(0, min(100, int(round(100 - debt))))
|
|
37
|
+
if score >= 85:
|
|
38
|
+
status = "Healthy"
|
|
39
|
+
elif score >= 65:
|
|
40
|
+
status = "Moderate debt"
|
|
41
|
+
elif score >= 45:
|
|
42
|
+
status = "Debt detected"
|
|
43
|
+
else:
|
|
44
|
+
status = "Critical"
|
|
45
|
+
|
|
46
|
+
warnings: list[str] = []
|
|
47
|
+
if dead_code_count:
|
|
48
|
+
warnings.append("Dead code candidates detected")
|
|
49
|
+
if duplicate_count:
|
|
50
|
+
warnings.append("Duplicate implementations found")
|
|
51
|
+
if ancient_count:
|
|
52
|
+
warnings.append("Ancient files appear abandoned")
|
|
53
|
+
if monster_count:
|
|
54
|
+
warnings.append("Monster files need review")
|
|
55
|
+
if ruin_count:
|
|
56
|
+
warnings.append("Empty structures or unused assets found")
|
|
57
|
+
if suspicious_count:
|
|
58
|
+
warnings.append("Suspicious filenames found")
|
|
59
|
+
return HealthMetrics(score=score, status=status, warnings=warnings, debt_estimate=debt)
|
|
60
|
+
|