deadpush 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deadpush/config.py ADDED
@@ -0,0 +1,284 @@
1
+ """
2
+ Configuration loading for deadpush.
3
+
4
+ Supports:
5
+ - Auto-detection of repo root (.git, pyproject.toml, etc.)
6
+ - Language enablement
7
+ - Entrypoint configuration
8
+ - Debris blocking/warning rules
9
+ - Custom ignore patterns (merged with .gitignore etc.)
10
+ - Optional loading from pyproject.toml [tool.deadpush] or .deadpush.toml
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ from dataclasses import dataclass, field
17
+ from pathlib import Path
18
+ from typing import Any
19
+ import tomllib
20
+ import pathspec
21
+
22
+
23
+ SUPPORTED_LANGUAGES = [
24
+ "python",
25
+ "typescript",
26
+ "javascript",
27
+ "go",
28
+ "rust",
29
+ "cpp",
30
+ "java",
31
+ ]
32
+
33
+
34
+ @dataclass
35
+ class EntrypointsConfig:
36
+ """Configuration for entry point detection."""
37
+ include: list[str] = field(default_factory=list)
38
+ dynamic_patterns: list[str] = field(default_factory=lambda: [
39
+ r"main\b", r"__main__", r"if __name__",
40
+ r"app\.run", r"server\.(start|listen)", r"cli\."
41
+ ])
42
+
43
+
44
+ @dataclass
45
+ class DebrisConfig:
46
+ """Rules for debris categories."""
47
+ block_categories: set[str] = field(default_factory=lambda: {
48
+ "hardcoded_secret", "llm_context_file", "chat_export"
49
+ })
50
+ warn_categories: set[str] = field(default_factory=lambda: {
51
+ "vibe_scratchpad", "duplicate_file", "ai_regenerated_duplicate",
52
+ "dev_artifact", "env_file", "silent_failure", "hallucinated_import",
53
+ "weak_test", "no_assertions", "tautology", "empty_test",
54
+ "prompt_injection",
55
+ })
56
+
57
+
58
+ @dataclass
59
+ class DeadCodeConfig:
60
+ """Configuration for dead code analysis (multi-factor scoring)."""
61
+ min_confidence: str = "high"
62
+ show_uncertain: bool = False
63
+ custom_registrations: list[str] = field(default_factory=list)
64
+
65
+
66
+ @dataclass
67
+ class TestConfig:
68
+ """Configuration for post-write test verification."""
69
+ command: str = "pytest"
70
+ timeout_seconds: int = 30
71
+ enabled: bool = True
72
+
73
+
74
+ @dataclass
75
+ class BlockConfig:
76
+ """Files/patterns that should always be blocked from writes."""
77
+ blocked_files: list[str] = field(default_factory=lambda: [
78
+ "claude.md",
79
+ ".cursorrules",
80
+ ".claude_instructions",
81
+ ".copilot-instructions.md",
82
+ "windsurf_rules.md",
83
+ ])
84
+ blocked_patterns: list[str] = field(default_factory=list)
85
+
86
+
87
+ def _load_deadpush_toml(root: Path) -> dict[str, Any]:
88
+ """Load .deadpush.toml from project root. Returns {} if missing."""
89
+ dp_paths = [
90
+ root / "deadpush.toml",
91
+ root / ".deadpush.toml",
92
+ root / ".deadpush" / "config.toml",
93
+ ]
94
+ for dp in dp_paths:
95
+ if dp.exists():
96
+ try:
97
+ return tomllib.loads(dp.read_text(encoding="utf-8"))
98
+ except Exception:
99
+ return {}
100
+ return {}
101
+
102
+
103
+ @dataclass
104
+ class Config:
105
+ """Main deadpush configuration object passed around the system."""
106
+ repo_root: Path
107
+ languages: list[str] = field(default_factory=lambda: list(SUPPORTED_LANGUAGES))
108
+ entrypoints: EntrypointsConfig = field(default_factory=EntrypointsConfig)
109
+ debris: DebrisConfig = field(default_factory=DebrisConfig)
110
+ dead_code: DeadCodeConfig = field(default_factory=DeadCodeConfig)
111
+ test: TestConfig = field(default_factory=TestConfig)
112
+ block: BlockConfig = field(default_factory=BlockConfig)
113
+ ignore_patterns: list[str] = field(default_factory=lambda: [
114
+ "__pycache__/", ".git/", "node_modules/", ".deadpush-archive/",
115
+ ".venv/", "venv/", "dist/", "build/", "*.pyc", ".mypy_cache/",
116
+ "target/", "Cargo.lock", "package-lock.json"
117
+ ])
118
+ max_file_size_mb: int = 5
119
+ control_port: int = 14242
120
+ # Sensitive config files that trigger warnings when modified
121
+ sensitive_config_patterns: list[str] = field(default_factory=lambda: [
122
+ "Dockerfile*", "docker-compose*", ".dockerignore",
123
+ ".github/workflows/*", ".gitlab-ci.yml", "Jenkinsfile*",
124
+ "k8s/*.yaml", "k8s/*.yml", "deploy/*.yaml", "deploy/*.yml",
125
+ "terraform/*.tf", "*.tfvars",
126
+ "cloudbuild.yaml", "app.yaml", "cron.yaml",
127
+ "Procfile", "systemd/*.service", "*.plist",
128
+ "nginx.conf", "nginx/*.conf", ".env.production", ".env.staging",
129
+ ])
130
+
131
+ def is_language_enabled(self, name: str) -> bool:
132
+ name = name.lower()
133
+ if name == "ts":
134
+ name = "typescript"
135
+ if name == "js":
136
+ name = "javascript"
137
+ if name == "c++":
138
+ name = "cpp"
139
+ enabled = [l.lower() for l in self.languages]
140
+ return name in enabled or name in [l.split()[0] for l in enabled]
141
+
142
+ def should_block_debris_category(self, category: str) -> bool:
143
+ return category in self.debris.block_categories
144
+
145
+ def should_warn_debris_category(self, category: str) -> bool:
146
+ return category in self.debris.warn_categories
147
+
148
+ def is_sensitive_config(self, rel_path: str) -> bool:
149
+ """Check if a relative file path matches a sensitive config pattern."""
150
+ from fnmatch import fnmatch
151
+ rp = rel_path.replace("\\", "/")
152
+ for pat in self.sensitive_config_patterns:
153
+ if fnmatch(rp, pat) or fnmatch(rp, "**/" + pat):
154
+ return True
155
+ return False
156
+
157
+ def get_effective_ignore_spec(self) -> "pathspec.PathSpec":
158
+ """Build a pathspec for filtering. (lazy import to avoid hard dep at top)"""
159
+ import pathspec
160
+ patterns = list(self.ignore_patterns)
161
+ # Merge .gitignore if present
162
+ gi = self.repo_root / ".gitignore"
163
+ if gi.exists():
164
+ try:
165
+ for line in gi.read_text(errors="ignore").splitlines():
166
+ line = line.strip()
167
+ if line and not line.startswith("#"):
168
+ patterns.append(line)
169
+ except Exception:
170
+ pass
171
+ return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
172
+
173
+ def is_blocked(self, rel_path: str) -> bool:
174
+ """Check if a relative file path matches any blocked file/pattern."""
175
+ rp = rel_path.replace("\\", "/")
176
+ name = Path(rp).name
177
+ if name in self.block.blocked_files:
178
+ return True
179
+ from fnmatch import fnmatch
180
+ for pat in self.block.blocked_patterns:
181
+ if fnmatch(rp, pat) or fnmatch(name, pat):
182
+ return True
183
+ return False
184
+
185
+ def to_dict(self) -> dict[str, Any]:
186
+ return {
187
+ "repo_root": str(self.repo_root),
188
+ "languages": self.languages,
189
+ "entrypoints": {
190
+ "include": self.entrypoints.include,
191
+ "dynamic_patterns": self.entrypoints.dynamic_patterns,
192
+ },
193
+ }
194
+
195
+
196
+ def _find_repo_root(start: Path | None = None) -> Path:
197
+ """Walk up to find likely repo root. Robust to deleted cwd (e.g. during tests or rm -rf while in dir)."""
198
+ if start is None:
199
+ try:
200
+ start = Path.cwd()
201
+ except FileNotFoundError:
202
+ start = Path.home()
203
+ p = start.resolve()
204
+ markers = {".git", "pyproject.toml", "package.json", "Cargo.toml", "go.mod", ".deadpush"}
205
+ while True:
206
+ if any((p / m).exists() for m in markers):
207
+ return p
208
+ if p.parent == p:
209
+ return start.resolve()
210
+ p = p.parent
211
+
212
+
213
+ def load_config(explicit_root: Path | None = None) -> Config:
214
+ """Load config, merging file-based overrides if present."""
215
+ root = explicit_root or _find_repo_root()
216
+
217
+ cfg = Config(repo_root=root)
218
+
219
+ # Try pyproject.toml [tool.deadpush]
220
+ pyproj = root / "pyproject.toml"
221
+ if pyproj.exists():
222
+ try:
223
+ data = tomllib.loads(pyproj.read_text(encoding="utf-8"))
224
+ tool = data.get("tool", {}).get("deadpush", {})
225
+ if "languages" in tool:
226
+ cfg.languages = [str(x) for x in tool["languages"]]
227
+ if "entrypoints" in tool:
228
+ ep = tool["entrypoints"]
229
+ if "include" in ep:
230
+ cfg.entrypoints.include = list(ep["include"])
231
+ if "dynamic_patterns" in ep:
232
+ cfg.entrypoints.dynamic_patterns = list(ep["dynamic_patterns"])
233
+ if "ignore" in tool:
234
+ cfg.ignore_patterns.extend(tool["ignore"])
235
+ if "max_file_size_mb" in tool:
236
+ cfg.max_file_size_mb = int(tool["max_file_size_mb"])
237
+ if "control_port" in tool:
238
+ cfg.control_port = int(tool["control_port"])
239
+ if "dead_code" in tool:
240
+ dc = tool["dead_code"]
241
+ if "min_confidence" in dc:
242
+ cfg.dead_code.min_confidence = str(dc["min_confidence"])
243
+ if "show_uncertain" in dc:
244
+ cfg.dead_code.show_uncertain = bool(dc["show_uncertain"])
245
+ if "custom_registrations" in dc:
246
+ cfg.dead_code.custom_registrations = list(dc["custom_registrations"])
247
+ except Exception:
248
+ pass # ignore bad toml, use defaults
249
+
250
+ # deadpush.toml / .deadpush.toml / .deadpush/config.toml
251
+ dpt_data = _load_deadpush_toml(root)
252
+ if dpt_data:
253
+ if "languages" in dpt_data:
254
+ cfg.languages = [str(x) for x in dpt_data["languages"]]
255
+ block_data = dpt_data.get("block", {})
256
+ if "blocked_files" in block_data:
257
+ cfg.block.blocked_files = list(block_data["blocked_files"])
258
+ if "blocked_patterns" in block_data:
259
+ cfg.block.blocked_patterns = list(block_data["blocked_patterns"])
260
+ dc_data = dpt_data.get("dead_code", {})
261
+ if "min_confidence" in dc_data:
262
+ cfg.dead_code.min_confidence = str(dc_data["min_confidence"])
263
+ if "show_uncertain" in dc_data:
264
+ cfg.dead_code.show_uncertain = bool(dc_data["show_uncertain"])
265
+ if "custom_registrations" in dc_data:
266
+ cfg.dead_code.custom_registrations = list(dc_data["custom_registrations"])
267
+ test_data = dpt_data.get("tests", {})
268
+ if "command" in test_data:
269
+ cfg.test.command = str(test_data["command"])
270
+ if "timeout_seconds" in test_data:
271
+ cfg.test.timeout_seconds = int(test_data["timeout_seconds"])
272
+ if "enabled" in test_data:
273
+ cfg.test.enabled = bool(test_data["enabled"])
274
+
275
+ # Env var overrides for quick use
276
+ if os.environ.get("DEADPUSH_LANGUAGES"):
277
+ cfg.languages = [x.strip() for x in os.environ["DEADPUSH_LANGUAGES"].split(",") if x.strip()]
278
+
279
+ return cfg
280
+
281
+
282
+ # Convenience for tests / direct use
283
+ def get_default_languages() -> list[str]:
284
+ return list(SUPPORTED_LANGUAGES)
deadpush/crawler.py ADDED
@@ -0,0 +1,133 @@
1
+ """
2
+ Source file crawler for deadpush.
3
+
4
+ Discovers text source files while respecting:
5
+ - .gitignore + built-in ignores
6
+ - Config ignore patterns + language filters (indirectly via caller)
7
+ - Size limits and binary detection
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+ import stat
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+ from typing import Iterator
17
+
18
+ from .config import Config
19
+
20
+
21
+ @dataclass(frozen=True, slots=True)
22
+ class FileInfo:
23
+ """Lightweight descriptor for a discovered file passed to analyzers."""
24
+ path: Path
25
+ rel_path: Path
26
+ size: int
27
+ is_text: bool
28
+ mtime: float
29
+
30
+ @property
31
+ def suffix(self) -> str:
32
+ return self.path.suffix.lower()
33
+
34
+
35
+ def _is_text_file(path: Path, max_bytes: int = 4096) -> bool:
36
+ """Heuristic: read prefix and look for null bytes or control chars typical of binary."""
37
+ try:
38
+ with path.open("rb") as f:
39
+ chunk = f.read(max_bytes)
40
+ if b"\0" in chunk:
41
+ return False
42
+ # Allow common text
43
+ text_chars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})
44
+ return all(b in text_chars for b in chunk)
45
+ except Exception:
46
+ return False
47
+
48
+
49
+ def _iter_candidate_files(root: Path, ignore_spec) -> Iterator[Path]:
50
+ """Yield candidate paths, skipping ignored and non-regular files."""
51
+ for dirpath, dirnames, filenames in os.walk(root):
52
+ dir_p = Path(dirpath)
53
+
54
+ # Prune ignored dirs early (mutate dirnames)
55
+ kept = []
56
+ for d in dirnames:
57
+ dp = dir_p / d
58
+ rel = dp.relative_to(root)
59
+ if not ignore_spec.match_file(str(rel)) and not ignore_spec.match_file(str(rel) + "/"):
60
+ kept.append(d)
61
+ dirnames[:] = kept
62
+
63
+ for fn in filenames:
64
+ fp = dir_p / fn
65
+ try:
66
+ rel = fp.relative_to(root)
67
+ except ValueError:
68
+ continue
69
+ if ignore_spec.match_file(str(rel)):
70
+ continue
71
+ # Skip symlinks, sockets etc for safety
72
+ try:
73
+ mode = fp.lstat().st_mode
74
+ if not stat.S_ISREG(mode):
75
+ continue
76
+ except OSError:
77
+ continue
78
+ yield fp
79
+
80
+
81
+ def iter_source_files(repo_root: Path, config: Config) -> list[FileInfo]:
82
+ """
83
+ Return discovered files under repo_root.
84
+
85
+ Always returns more files than just code (debris detector wants md, env, etc).
86
+ Language plugins later filter by their extensions for graph analysis.
87
+ """
88
+ ignore_spec = config.get_effective_ignore_spec()
89
+ max_bytes = config.max_file_size_mb * 1024 * 1024
90
+
91
+ files: list[FileInfo] = []
92
+ for p in _iter_candidate_files(repo_root, ignore_spec):
93
+ try:
94
+ st = p.stat()
95
+ size = st.st_size
96
+ if size > max_bytes:
97
+ is_text = False
98
+ else:
99
+ is_text = _is_text_file(p)
100
+ rel = p.relative_to(repo_root)
101
+ files.append(FileInfo(
102
+ path=p,
103
+ rel_path=rel,
104
+ size=size,
105
+ is_text=is_text,
106
+ mtime=st.st_mtime,
107
+ ))
108
+ except OSError:
109
+ continue
110
+
111
+ # Stable sort by path for determinism
112
+ files.sort(key=lambda f: str(f.rel_path))
113
+ return files
114
+
115
+
116
+ def get_supported_extensions(config: Config | None = None) -> set[str]:
117
+ """Return a union of extensions from known plugins (used by watch etc)."""
118
+ # Avoid circular import at module load: import lazily inside function
119
+ exts: set[str] = set()
120
+ try:
121
+ from .languages import get_all_extensions
122
+ exts.update(get_all_extensions())
123
+ except Exception:
124
+ # Fallback defaults covering the common ones
125
+ exts.update({
126
+ ".py", ".pyi",
127
+ ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs",
128
+ ".go",
129
+ ".rs",
130
+ ".cpp", ".cc", ".cxx", ".hpp", ".hh", ".h", ".c",
131
+ ".java",
132
+ })
133
+ return exts