fossil-code 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fossil/__init__.py +3 -0
- fossil/__main__.py +4 -0
- fossil/analyzers.py +221 -0
- fossil/cache.py +228 -0
- fossil/cli.py +421 -0
- fossil/config_manager.py +141 -0
- fossil/engine.py +122 -0
- fossil/git_miner.py +78 -0
- fossil/models.py +109 -0
- fossil/patterns.py +79 -0
- fossil/py.typed +1 -0
- fossil/render.py +436 -0
- fossil/repo.py +82 -0
- fossil/scoring.py +126 -0
- fossil_code-0.2.0.dist-info/METADATA +377 -0
- fossil_code-0.2.0.dist-info/RECORD +20 -0
- fossil_code-0.2.0.dist-info/WHEEL +5 -0
- fossil_code-0.2.0.dist-info/entry_points.txt +2 -0
- fossil_code-0.2.0.dist-info/licenses/LICENSE +21 -0
- fossil_code-0.2.0.dist-info/top_level.txt +1 -0
fossil/git_miner.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from fossil.models import CommitInfo, GitHistoryResult
|
|
8
|
+
from fossil.repo import git_head, is_shallow, is_tracked, relpath, remote_url, run_git
|
|
9
|
+
|
|
10
|
+
PR_RE = re.compile(r"(?:#|PR\s*#?|pull request\s*#?)(\d+)", re.IGNORECASE)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def parse_commit(raw: str) -> CommitInfo:
|
|
14
|
+
h, ts, author, email, subject = raw.split("\x1f", 4)
|
|
15
|
+
date = datetime.fromtimestamp(int(ts), UTC).isoformat().replace("+00:00", "Z")
|
|
16
|
+
pr_match = PR_RE.search(subject)
|
|
17
|
+
return CommitInfo(
|
|
18
|
+
hash=h,
|
|
19
|
+
short_hash=h[:7],
|
|
20
|
+
date=date,
|
|
21
|
+
author_name=author,
|
|
22
|
+
author_email=email,
|
|
23
|
+
message=subject,
|
|
24
|
+
pr_number=int(pr_match.group(1)) if pr_match else None,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def mine_history(
|
|
29
|
+
path: Path, repo_root: Path, depth: int, reference_terms: set[str]
|
|
30
|
+
) -> GitHistoryResult:
|
|
31
|
+
rel = relpath(path, repo_root)
|
|
32
|
+
result = GitHistoryResult(
|
|
33
|
+
head=git_head(repo_root),
|
|
34
|
+
tracked=is_tracked(path, repo_root),
|
|
35
|
+
shallow=is_shallow(repo_root),
|
|
36
|
+
remote_url=remote_url(repo_root),
|
|
37
|
+
)
|
|
38
|
+
if result.shallow:
|
|
39
|
+
result.warnings.append("Shallow git clone detected. History may be incomplete.")
|
|
40
|
+
if not result.tracked:
|
|
41
|
+
return result
|
|
42
|
+
|
|
43
|
+
fmt = "%H%x1f%ct%x1f%an%x1f%ae%x1f%s"
|
|
44
|
+
file_log = run_git(
|
|
45
|
+
repo_root,
|
|
46
|
+
["log", f"--max-count={depth}", f"--format={fmt}", "--follow", "--", rel],
|
|
47
|
+
check=False,
|
|
48
|
+
)
|
|
49
|
+
commits = [parse_commit(line) for line in file_log.stdout.splitlines() if line.strip()]
|
|
50
|
+
if commits:
|
|
51
|
+
result.last_modified = commits[0]
|
|
52
|
+
result.original_author = commits[-1]
|
|
53
|
+
if len(commits) >= depth:
|
|
54
|
+
result.warnings.append(
|
|
55
|
+
f"History truncated at {depth} commits. Increase --depth for deeper traversal."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
terms = [term for term in reference_terms if term]
|
|
59
|
+
if not terms:
|
|
60
|
+
result.ambiguous_death = True
|
|
61
|
+
return result
|
|
62
|
+
grep = "|".join(re.escape(term) for term in sorted(terms, key=len, reverse=True))
|
|
63
|
+
ref_log = run_git(
|
|
64
|
+
repo_root,
|
|
65
|
+
["log", "--all", f"--max-count={depth}", f"--format={fmt}", "-G", grep, "--", "."],
|
|
66
|
+
check=False,
|
|
67
|
+
)
|
|
68
|
+
ref_commits = [parse_commit(line) for line in ref_log.stdout.splitlines() if line.strip()]
|
|
69
|
+
candidates = [
|
|
70
|
+
commit
|
|
71
|
+
for commit in ref_commits
|
|
72
|
+
if commit.hash != (result.last_modified.hash if result.last_modified else "")
|
|
73
|
+
]
|
|
74
|
+
if candidates:
|
|
75
|
+
result.death_commit = candidates[0]
|
|
76
|
+
else:
|
|
77
|
+
result.ambiguous_death = True
|
|
78
|
+
return result
|
fossil/models.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import asdict, dataclass, field
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Reference:
|
|
9
|
+
path: str
|
|
10
|
+
line: int
|
|
11
|
+
kind: str
|
|
12
|
+
text: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class StaticAnalysisResult:
|
|
17
|
+
language: str
|
|
18
|
+
call_sites: int = 0
|
|
19
|
+
import_references: int = 0
|
|
20
|
+
dynamic_references: list[Reference] = field(default_factory=list)
|
|
21
|
+
reflection_patterns: list[Reference] = field(default_factory=list)
|
|
22
|
+
test_file_references: int = 0
|
|
23
|
+
documentation_references: int = 0
|
|
24
|
+
config_file_references: int = 0
|
|
25
|
+
references: list[Reference] = field(default_factory=list)
|
|
26
|
+
unknown_language: bool = False
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def main_code_references(self) -> int:
|
|
30
|
+
return max(0, self.call_sites + self.import_references)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class CommitInfo:
|
|
35
|
+
hash: str
|
|
36
|
+
short_hash: str
|
|
37
|
+
date: str
|
|
38
|
+
author_name: str
|
|
39
|
+
author_email: str
|
|
40
|
+
message: str
|
|
41
|
+
pr_number: int | None = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class GitHistoryResult:
|
|
46
|
+
head: str
|
|
47
|
+
tracked: bool
|
|
48
|
+
shallow: bool = False
|
|
49
|
+
remote_url: str | None = None
|
|
50
|
+
death_commit: CommitInfo | None = None
|
|
51
|
+
original_author: CommitInfo | None = None
|
|
52
|
+
last_modified: CommitInfo | None = None
|
|
53
|
+
ambiguous_death: bool = False
|
|
54
|
+
warnings: list[str] = field(default_factory=list)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class HoldPattern:
|
|
59
|
+
text: str
|
|
60
|
+
line: int
|
|
61
|
+
condition: str | None
|
|
62
|
+
condition_type: str
|
|
63
|
+
condition_met: bool | None
|
|
64
|
+
evidence: str
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class PatternResult:
|
|
69
|
+
detected: bool
|
|
70
|
+
patterns: list[HoldPattern] = field(default_factory=list)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class ConfidenceSignal:
|
|
75
|
+
name: str
|
|
76
|
+
weight: int
|
|
77
|
+
applied: bool
|
|
78
|
+
reason: str
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass
|
|
82
|
+
class ConfidenceResult:
|
|
83
|
+
score: int
|
|
84
|
+
label: str
|
|
85
|
+
risk: str
|
|
86
|
+
signals: list[ConfidenceSignal]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class ForensicResult:
|
|
91
|
+
fossil_version: str
|
|
92
|
+
target: str
|
|
93
|
+
abs_path: str
|
|
94
|
+
repo_root: str
|
|
95
|
+
language: str
|
|
96
|
+
dead: bool
|
|
97
|
+
status: str
|
|
98
|
+
static_analysis: StaticAnalysisResult
|
|
99
|
+
git_history: GitHistoryResult
|
|
100
|
+
temporary_hold: PatternResult
|
|
101
|
+
confidence: ConfidenceResult | None
|
|
102
|
+
suggested_action: str | None
|
|
103
|
+
yolo_command: str | None
|
|
104
|
+
analysis_duration_ms: int
|
|
105
|
+
cached: bool = False
|
|
106
|
+
warnings: list[str] = field(default_factory=list)
|
|
107
|
+
|
|
108
|
+
def to_dict(self) -> dict[str, Any]:
|
|
109
|
+
return asdict(self)
|
fossil/patterns.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from datetime import date, datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from fossil.models import HoldPattern, PatternResult
|
|
8
|
+
from fossil.repo import run_git
|
|
9
|
+
|
|
10
|
+
PATTERNS = [
|
|
11
|
+
re.compile(r"TODO:\s*remove after\s+(?P<condition>.+)", re.IGNORECASE),
|
|
12
|
+
re.compile(r"FIXME:\s*delete when\s+(?P<condition>.+)", re.IGNORECASE),
|
|
13
|
+
re.compile(
|
|
14
|
+
r"keep(?:ing)? (?:this )?(?:around )?(?:for now|until\s+(?P<condition>.+))", re.IGNORECASE
|
|
15
|
+
),
|
|
16
|
+
re.compile(r"\btemporary\b|\btemp code\b|\btemp fix\b", re.IGNORECASE),
|
|
17
|
+
re.compile(r"\bDEPRECATED\b|@deprecated", re.IGNORECASE),
|
|
18
|
+
re.compile(r"will be removed in\s+(?P<condition>.+)", re.IGNORECASE),
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def detect_patterns(path: Path, repo_root: Path) -> PatternResult:
|
|
23
|
+
result = PatternResult(detected=False)
|
|
24
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
25
|
+
for line_no, line in enumerate(text.splitlines(), 1):
|
|
26
|
+
for regex in PATTERNS:
|
|
27
|
+
match = regex.search(line)
|
|
28
|
+
if not match:
|
|
29
|
+
continue
|
|
30
|
+
condition = (match.groupdict().get("condition") or "").strip(" .#")
|
|
31
|
+
kind, met, evidence = verify_condition(condition, repo_root)
|
|
32
|
+
result.patterns.append(
|
|
33
|
+
HoldPattern(
|
|
34
|
+
text=line.strip(),
|
|
35
|
+
line=line_no,
|
|
36
|
+
condition=condition or None,
|
|
37
|
+
condition_type=kind,
|
|
38
|
+
condition_met=met,
|
|
39
|
+
evidence=evidence,
|
|
40
|
+
)
|
|
41
|
+
)
|
|
42
|
+
result.detected = bool(result.patterns)
|
|
43
|
+
return result
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def verify_condition(condition: str, repo_root: Path) -> tuple[str, bool | None, str]:
|
|
47
|
+
if not condition:
|
|
48
|
+
return "unverifiable", None, "No explicit condition found."
|
|
49
|
+
pr = re.search(r"(?:PR|#)\s*(\d+)", condition, re.IGNORECASE)
|
|
50
|
+
if pr:
|
|
51
|
+
number = pr.group(1)
|
|
52
|
+
log = run_git(
|
|
53
|
+
repo_root, ["log", "--all", "--grep", f"#{number}", "--format=%H %s"], check=False
|
|
54
|
+
)
|
|
55
|
+
if log.stdout.strip():
|
|
56
|
+
return "pr", True, f"Found commit message referencing #{number}."
|
|
57
|
+
return "pr", None, f"PR #{number} requires remote API verification."
|
|
58
|
+
version = re.search(r"\bv?(\d+\.\d+(?:\.\d+)?)\b", condition)
|
|
59
|
+
if version:
|
|
60
|
+
tags = run_git(repo_root, ["tag", "--list", f"*{version.group(1)}*"], check=False)
|
|
61
|
+
if tags.stdout.strip():
|
|
62
|
+
return "version", True, f"Matching git tag found: {tags.stdout.splitlines()[0]}."
|
|
63
|
+
return "version", False, f"No git tag matched version {version.group(1)}."
|
|
64
|
+
parsed = _parse_date(condition)
|
|
65
|
+
if parsed:
|
|
66
|
+
if parsed <= date.today():
|
|
67
|
+
return "date", True, f"Date {parsed.isoformat()} has passed."
|
|
68
|
+
return "date", False, f"Date {parsed.isoformat()} has not passed."
|
|
69
|
+
return "unverifiable", None, f'Condition: UNVERIFIABLE — "{condition}"'
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _parse_date(value: str) -> date | None:
|
|
73
|
+
for token in re.findall(r"\d{4}-\d{2}-\d{2}|\d{4}/\d{2}/\d{2}", value):
|
|
74
|
+
for fmt in ("%Y-%m-%d", "%Y/%m/%d"):
|
|
75
|
+
try:
|
|
76
|
+
return datetime.strptime(token, fmt).date()
|
|
77
|
+
except ValueError:
|
|
78
|
+
pass
|
|
79
|
+
return None
|
fossil/py.typed
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|