fossil-code 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fossil/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Dead-code forensics CLI."""
2
+
3
+ __version__ = "0.2.0"
fossil/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from fossil.cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
fossil/analyzers.py ADDED
@@ -0,0 +1,221 @@
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ import fnmatch
5
+ import re
6
+ from pathlib import Path
7
+
8
+ from fossil.models import Reference, StaticAnalysisResult
9
+ from fossil.repo import relpath
10
+
11
+ SOURCE_EXTENSIONS = {
12
+ ".py": "python",
13
+ ".js": "javascript",
14
+ ".jsx": "javascript",
15
+ ".ts": "typescript",
16
+ ".tsx": "typescript",
17
+ ".java": "java",
18
+ ".go": "go",
19
+ }
20
+
21
+ DOC_EXTENSIONS = {".md", ".rst", ".txt", ".adoc"}
22
+ CONFIG_EXTENSIONS = {".toml", ".yaml", ".yml", ".json", ".ini", ".cfg"}
23
+ SKIP_PARTS = {".git", ".fossil", "__pycache__", "node_modules", "dist", "build", ".venv", "venv"}
24
+
25
+
26
+ def language_for(path: Path) -> str:
27
+ return SOURCE_EXTENSIONS.get(path.suffix.lower(), "unknown")
28
+
29
+
30
+ def iter_repo_files(repo_root: Path, exclude: list[str] | None = None) -> list[Path]:
31
+ exclude = exclude or []
32
+ files: list[Path] = []
33
+ for path in repo_root.rglob("*"):
34
+ if not path.is_file():
35
+ continue
36
+ rel = path.relative_to(repo_root).as_posix()
37
+ if any(part in SKIP_PARTS for part in path.relative_to(repo_root).parts):
38
+ continue
39
+ if any(fnmatch.fnmatch(rel, pattern) for pattern in exclude):
40
+ continue
41
+ files.append(path)
42
+ return files
43
+
44
+
45
+ def exported_symbols(path: Path) -> set[str]:
46
+ if path.suffix != ".py":
47
+ return {path.stem}
48
+ try:
49
+ tree = ast.parse(path.read_text(encoding="utf-8"))
50
+ except (SyntaxError, UnicodeDecodeError):
51
+ return {path.stem}
52
+ symbols = {path.stem}
53
+ for node in tree.body:
54
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
55
+ if not node.name.startswith("_"):
56
+ symbols.add(node.name)
57
+ elif isinstance(node, ast.Assign):
58
+ for target in node.targets:
59
+ if isinstance(target, ast.Name) and not target.id.startswith("_"):
60
+ symbols.add(target.id)
61
+ return symbols
62
+
63
+
64
+ def module_names(path: Path, repo_root: Path) -> set[str]:
65
+ rel = path.relative_to(repo_root).with_suffix("")
66
+ parts = list(rel.parts)
67
+ names = {path.stem, ".".join(parts)}
68
+ if parts[-1] == "__init__" and len(parts) > 1:
69
+ names.add(".".join(parts[:-1]))
70
+ return {name for name in names if name}
71
+
72
+
73
+ def analyze_file(
74
+ path: Path, repo_root: Path, exclude: list[str] | None = None
75
+ ) -> StaticAnalysisResult:
76
+ language = language_for(path)
77
+ symbols = exported_symbols(path)
78
+ modules = module_names(path, repo_root)
79
+ result = StaticAnalysisResult(language=language, unknown_language=language == "unknown")
80
+ files = iter_repo_files(repo_root, exclude)
81
+ target_rel = relpath(path, repo_root)
82
+
83
+ for other in files:
84
+ if other.resolve() == path.resolve():
85
+ continue
86
+ rel = relpath(other, repo_root)
87
+ try:
88
+ text = other.read_text(encoding="utf-8")
89
+ except UnicodeDecodeError:
90
+ continue
91
+ if other.suffix == ".py":
92
+ _scan_python(other, rel, text, modules, symbols, result)
93
+ else:
94
+ _scan_text(other, rel, text, modules, symbols, result, target_rel)
95
+ _scan_dynamic_and_reflection(files, path, repo_root, modules, result)
96
+ return result
97
+
98
+
99
+ def _add_ref(result: StaticAnalysisResult, path: str, line: int, kind: str, text: str) -> None:
100
+ ref = Reference(path=path, line=line, kind=kind, text=text.strip()[:240])
101
+ result.references.append(ref)
102
+ if _is_test_path(path):
103
+ result.test_file_references += 1
104
+ elif kind == "import":
105
+ result.import_references += 1
106
+ elif kind == "call":
107
+ result.call_sites += 1
108
+ elif kind == "doc":
109
+ result.documentation_references += 1
110
+ elif kind == "config":
111
+ result.config_file_references += 1
112
+
113
+
114
+ def _is_test_path(path: str) -> bool:
115
+ lower = path.lower()
116
+ return "/test" in lower or lower.startswith("test") or "_test." in lower
117
+
118
+
119
+ def _scan_python(
120
+ path: Path,
121
+ rel: str,
122
+ text: str,
123
+ modules: set[str],
124
+ symbols: set[str],
125
+ result: StaticAnalysisResult,
126
+ ) -> None:
127
+ lines = text.splitlines()
128
+ try:
129
+ tree = ast.parse(text)
130
+ except SyntaxError:
131
+ _scan_text(path, rel, text, modules, symbols, result, "")
132
+ return
133
+ imported_aliases: set[str] = set()
134
+ for node in ast.walk(tree):
135
+ if isinstance(node, ast.Import):
136
+ for alias in node.names:
137
+ if alias.name in modules or any(alias.name.endswith("." + m) for m in modules):
138
+ imported_aliases.add(alias.asname or alias.name.split(".")[0])
139
+ _add_ref(result, rel, node.lineno, "import", lines[node.lineno - 1])
140
+ elif isinstance(node, ast.ImportFrom) and node.module:
141
+ if node.module in modules or any(node.module.endswith("." + m) for m in modules):
142
+ for alias in node.names:
143
+ imported_aliases.add(alias.asname or alias.name)
144
+ _add_ref(result, rel, node.lineno, "import", lines[node.lineno - 1])
145
+ elif any(alias.name in symbols for alias in node.names):
146
+ _add_ref(result, rel, node.lineno, "import", lines[node.lineno - 1])
147
+ elif isinstance(node, ast.Call):
148
+ name = _call_name(node.func)
149
+ if name and (name in symbols or name.split(".")[0] in imported_aliases):
150
+ _add_ref(result, rel, node.lineno, "call", lines[node.lineno - 1])
151
+ elif isinstance(node, ast.Name) and node.id in symbols:
152
+ _add_ref(
153
+ result,
154
+ rel,
155
+ getattr(node, "lineno", 1),
156
+ "call",
157
+ lines[getattr(node, "lineno", 1) - 1],
158
+ )
159
+
160
+
161
+ def _call_name(node: ast.AST) -> str | None:
162
+ if isinstance(node, ast.Name):
163
+ return node.id
164
+ if isinstance(node, ast.Attribute):
165
+ base = _call_name(node.value)
166
+ return f"{base}.{node.attr}" if base else node.attr
167
+ return None
168
+
169
+
170
+ def _scan_text(
171
+ path: Path,
172
+ rel: str,
173
+ text: str,
174
+ modules: set[str],
175
+ symbols: set[str],
176
+ result: StaticAnalysisResult,
177
+ target_rel: str,
178
+ ) -> None:
179
+ needles = sorted(
180
+ modules | symbols | ({target_rel} if target_rel else set()), key=len, reverse=True
181
+ )
182
+ if not needles:
183
+ return
184
+ pattern = re.compile(r"\b(" + "|".join(re.escape(n) for n in needles if n) + r")\b")
185
+ kind = (
186
+ "doc"
187
+ if path.suffix.lower() in DOC_EXTENSIONS
188
+ else "config"
189
+ if path.suffix.lower() in CONFIG_EXTENSIONS
190
+ else "call"
191
+ )
192
+ for idx, line in enumerate(text.splitlines(), 1):
193
+ if pattern.search(line):
194
+ _add_ref(result, rel, idx, kind, line)
195
+
196
+
197
+ def _scan_dynamic_and_reflection(
198
+ files: list[Path],
199
+ target: Path,
200
+ repo_root: Path,
201
+ modules: set[str],
202
+ result: StaticAnalysisResult,
203
+ ) -> None:
204
+ dynamic_re = re.compile(r"(importlib\.import_module|__import__)\(([^)]*)\)")
205
+ reflection_re = re.compile(r"\b(getattr|hasattr|setattr|vars)\(([^)]*)\)")
206
+ module_re = re.compile("|".join(re.escape(m) for m in sorted(modules, key=len, reverse=True)))
207
+ if not modules:
208
+ return
209
+ for path in files:
210
+ if path.resolve() == target.resolve():
211
+ continue
212
+ try:
213
+ text = path.read_text(encoding="utf-8")
214
+ except UnicodeDecodeError:
215
+ continue
216
+ rel = relpath(path, repo_root)
217
+ for idx, line in enumerate(text.splitlines(), 1):
218
+ if dynamic_re.search(line) and module_re.search(line):
219
+ result.dynamic_references.append(Reference(rel, idx, "dynamic", line.strip()))
220
+ if reflection_re.search(line) and module_re.search(line):
221
+ result.reflection_patterns.append(Reference(rel, idx, "reflection", line.strip()))
fossil/cache.py ADDED
@@ -0,0 +1,228 @@
1
+ """Local SQLite result cache.
2
+
3
+ Implements §3.5 of the pre-development docs:
4
+ - analysis_results table for per-file results
5
+ - scan_results table for directory scan results
6
+ - pr_cache table for GitHub/GitLab PR lookups
7
+ - schema_version for future migration support
8
+ - Auto-prune entries older than cache_ttl_hours when cache exceeds 100MB
9
+ - Corruption detection and silent rebuild
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import sqlite3
16
+ import time
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ SCHEMA_VERSION = 2
21
+
22
+ SCHEMA = """\
23
+ CREATE TABLE IF NOT EXISTS analysis_results (
24
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
25
+ file_path TEXT NOT NULL,
26
+ git_head_hash TEXT NOT NULL,
27
+ repo_root TEXT NOT NULL,
28
+ result_json TEXT NOT NULL,
29
+ created_at INTEGER NOT NULL,
30
+ fossil_version TEXT NOT NULL,
31
+ UNIQUE(file_path, git_head_hash, repo_root)
32
+ );
33
+ CREATE TABLE IF NOT EXISTS scan_results (
34
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
35
+ repo_root TEXT NOT NULL,
36
+ scan_target TEXT NOT NULL,
37
+ git_head_hash TEXT NOT NULL,
38
+ result_json TEXT NOT NULL,
39
+ created_at INTEGER NOT NULL,
40
+ UNIQUE(repo_root, scan_target, git_head_hash)
41
+ );
42
+ CREATE TABLE IF NOT EXISTS pr_cache (
43
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
44
+ remote_url TEXT NOT NULL,
45
+ pr_number INTEGER NOT NULL,
46
+ pr_title TEXT,
47
+ pr_body TEXT,
48
+ merged_at TEXT,
49
+ cached_at INTEGER NOT NULL,
50
+ UNIQUE(remote_url, pr_number)
51
+ );
52
+ CREATE TABLE IF NOT EXISTS schema_version (version INTEGER);
53
+ """
54
+
55
+ MAX_CACHE_BYTES = 100 * 1024 * 1024 # 100 MB
56
+ MAX_RESULT_BYTES = 5 * 1024 * 1024 # 5 MB
57
+ DEFAULT_TTL_HOURS = 24
58
+
59
+
60
+ class CacheStore:
61
+ def __init__(self, repo_root: Path):
62
+ self.path = repo_root / ".fossil" / "cache.db"
63
+
64
+ def _connect(self) -> sqlite3.Connection:
65
+ self.path.parent.mkdir(exist_ok=True)
66
+ try:
67
+ conn = sqlite3.connect(self.path)
68
+ conn.executescript(SCHEMA)
69
+ # Set schema version if not yet set
70
+ row = conn.execute("SELECT version FROM schema_version LIMIT 1").fetchone()
71
+ if row is None:
72
+ conn.execute("INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,))
73
+ conn.commit()
74
+ except sqlite3.DatabaseError:
75
+ # Corruption detected — rebuild
76
+ self.clear()
77
+ conn = sqlite3.connect(self.path)
78
+ conn.executescript(SCHEMA)
79
+ conn.execute("INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,))
80
+ conn.commit()
81
+ return conn
82
+
83
+ # ── Analysis result CRUD ──
84
+
85
+ def get_analysis(self, file_path: Path, head: str, repo_root: Path) -> dict[str, Any] | None:
86
+ try:
87
+ with self._connect() as conn:
88
+ row = conn.execute(
89
+ "SELECT result_json FROM analysis_results WHERE file_path=? AND git_head_hash=? AND repo_root=?",
90
+ (str(file_path), head, str(repo_root)),
91
+ ).fetchone()
92
+ except sqlite3.DatabaseError:
93
+ self.clear()
94
+ return None
95
+ return json.loads(row[0]) if row else None
96
+
97
+ def put_analysis(
98
+ self, file_path: Path, head: str, repo_root: Path, version: str, result: dict[str, Any]
99
+ ) -> None:
100
+ payload = json.dumps(result, sort_keys=True)
101
+ if len(payload.encode("utf-8")) > MAX_RESULT_BYTES:
102
+ return
103
+ try:
104
+ with self._connect() as conn:
105
+ conn.execute(
106
+ """
107
+ INSERT OR REPLACE INTO analysis_results
108
+ (file_path, git_head_hash, repo_root, result_json, created_at, fossil_version)
109
+ VALUES (?, ?, ?, ?, ?, ?)
110
+ """,
111
+ (str(file_path), head, str(repo_root), payload, int(time.time()), version),
112
+ )
113
+ except sqlite3.DatabaseError:
114
+ self.clear()
115
+ self._auto_prune()
116
+
117
+ # ── Scan result CRUD ──
118
+
119
+ def get_scan(self, scan_target: str, head: str, repo_root: Path) -> list[dict[str, Any]] | None:
120
+ try:
121
+ with self._connect() as conn:
122
+ row = conn.execute(
123
+ "SELECT result_json FROM scan_results WHERE scan_target=? AND git_head_hash=? AND repo_root=?",
124
+ (scan_target, head, str(repo_root)),
125
+ ).fetchone()
126
+ except sqlite3.DatabaseError:
127
+ self.clear()
128
+ return None
129
+ return json.loads(row[0]) if row else None
130
+
131
+ def put_scan(
132
+ self, scan_target: str, head: str, repo_root: Path, result: list[dict[str, Any]]
133
+ ) -> None:
134
+ payload = json.dumps(result, sort_keys=True)
135
+ if len(payload.encode("utf-8")) > MAX_RESULT_BYTES:
136
+ return
137
+ try:
138
+ with self._connect() as conn:
139
+ conn.execute(
140
+ """
141
+ INSERT OR REPLACE INTO scan_results
142
+ (repo_root, scan_target, git_head_hash, result_json, created_at)
143
+ VALUES (?, ?, ?, ?, ?)
144
+ """,
145
+ (str(repo_root), scan_target, head, payload, int(time.time())),
146
+ )
147
+ except sqlite3.DatabaseError:
148
+ self.clear()
149
+
150
+ # ── PR cache CRUD ──
151
+
152
+ def get_pr(self, remote_url: str, pr_number: int) -> dict[str, Any] | None:
153
+ try:
154
+ with self._connect() as conn:
155
+ row = conn.execute(
156
+ "SELECT pr_title, pr_body, merged_at FROM pr_cache WHERE remote_url=? AND pr_number=?",
157
+ (remote_url, pr_number),
158
+ ).fetchone()
159
+ except sqlite3.DatabaseError:
160
+ return None
161
+ if row is None:
162
+ return None
163
+ return {"pr_title": row[0], "pr_body": row[1], "merged_at": row[2]}
164
+
165
+ def put_pr(
166
+ self,
167
+ remote_url: str,
168
+ pr_number: int,
169
+ title: str | None,
170
+ body: str | None,
171
+ merged_at: str | None,
172
+ ) -> None:
173
+ try:
174
+ with self._connect() as conn:
175
+ conn.execute(
176
+ """
177
+ INSERT OR REPLACE INTO pr_cache
178
+ (remote_url, pr_number, pr_title, pr_body, merged_at, cached_at)
179
+ VALUES (?, ?, ?, ?, ?, ?)
180
+ """,
181
+ (remote_url, pr_number, title, body, merged_at, int(time.time())),
182
+ )
183
+ except sqlite3.DatabaseError:
184
+ pass
185
+
186
+ # ── Cache management ──
187
+
188
+ def clear(self) -> None:
189
+ if self.path.exists():
190
+ self.path.unlink()
191
+
192
+ def stats(self) -> dict[str, Any]:
193
+ """Return cache statistics."""
194
+ if not self.path.exists():
195
+ return {"size_bytes": 0, "analysis_count": 0, "scan_count": 0, "pr_count": 0}
196
+ try:
197
+ size = self.path.stat().st_size
198
+ with self._connect() as conn:
199
+ analysis_count = conn.execute("SELECT COUNT(*) FROM analysis_results").fetchone()[0]
200
+ scan_count = conn.execute("SELECT COUNT(*) FROM scan_results").fetchone()[0]
201
+ pr_count = conn.execute("SELECT COUNT(*) FROM pr_cache").fetchone()[0]
202
+ return {
203
+ "size_bytes": size,
204
+ "analysis_count": analysis_count,
205
+ "scan_count": scan_count,
206
+ "pr_count": pr_count,
207
+ }
208
+ except (sqlite3.DatabaseError, OSError):
209
+ return {"size_bytes": 0, "analysis_count": 0, "scan_count": 0, "pr_count": 0}
210
+
211
+ def _auto_prune(self, ttl_hours: int = DEFAULT_TTL_HOURS) -> None:
212
+ """Prune old entries if cache exceeds MAX_CACHE_BYTES."""
213
+ if not self.path.exists():
214
+ return
215
+ try:
216
+ if self.path.stat().st_size < MAX_CACHE_BYTES:
217
+ return
218
+ except OSError:
219
+ return
220
+ cutoff = int(time.time()) - (ttl_hours * 3600)
221
+ try:
222
+ with self._connect() as conn:
223
+ conn.execute("DELETE FROM analysis_results WHERE created_at < ?", (cutoff,))
224
+ conn.execute("DELETE FROM scan_results WHERE created_at < ?", (cutoff,))
225
+ conn.execute("DELETE FROM pr_cache WHERE cached_at < ?", (cutoff,))
226
+ conn.execute("VACUUM")
227
+ except sqlite3.DatabaseError:
228
+ self.clear()