deadpush 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deadpush/intercept.py ADDED
@@ -0,0 +1,995 @@
1
+ """
2
+ Pre-write file interception daemon.
3
+
4
+ Agents write to .deadpush/staging/ instead of directly to the project.
5
+ The daemon watches staging, runs guardrails, and either:
6
+ - Approves: moves the file to the real project path
7
+ - Blocks: moves to quarantine + writes structured feedback the agent can read
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import atexit
13
+ import difflib
14
+ import json
15
+ import os
16
+ import re
17
+ import shutil
18
+ import sys
19
+ import time
20
+ import threading
21
+ from datetime import datetime, timezone
22
+ from pathlib import Path
23
+ from typing import Any
24
+
25
+ from .config import Config as DeadpushConfig
26
+ from .rules import RuntimeConfig
27
+
28
+
29
+ STAGING_DIR = ".deadpush/staging"
30
+ FEEDBACK_DIR = ".deadpush/feedback"
31
+ QUARANTINE_DIR = ".deadpush-quarantine"
32
+ GUARDRAIL_DIR = ".deadpush"
33
+ LEARNED_PATTERNS_FILE = ".deadpush/learned_patterns.json"
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Test/mock context detection
37
+ # ---------------------------------------------------------------------------
38
+
39
+ _TEST_FILE_INDICATORS = [
40
+ "test", "spec", "mock", "fixture", "stub", "fake", "conftest",
41
+ "factory", "helper", "assertion", "matcher",
42
+ ]
43
+
44
+ _TEST_DIR_INDICATORS = [
45
+ "/test/", "/tests/", "/spec/", "/specs/", "/__tests__/",
46
+ "/mocks/", "/fixtures/", "/testing/",
47
+ ]
48
+
49
+ _LEARNED_PATTERNS: dict[str, list[dict[str, Any]]] | None = None
50
+
51
+
52
+ def _is_test_or_mock(rel_path: str) -> bool:
53
+ """Check if a file path indicates test/mock/debug context."""
54
+ lower = rel_path.lower()
55
+ for indicator in _TEST_DIR_INDICATORS:
56
+ if indicator in lower:
57
+ return True
58
+ stem = Path(lower).stem
59
+ for indicator in _TEST_FILE_INDICATORS:
60
+ if stem.startswith(indicator) or stem.endswith(indicator):
61
+ return True
62
+ return False
63
+
64
+
65
+ def _load_learned_patterns(repo_root: Path) -> dict[str, list[dict[str, Any]]]:
66
+ """Load agent-taught false positive patterns."""
67
+ global _LEARNED_PATTERNS
68
+ if _LEARNED_PATTERNS is not None:
69
+ return _LEARNED_PATTERNS
70
+ path = repo_root / LEARNED_PATTERNS_FILE
71
+ if path.exists():
72
+ try:
73
+ _LEARNED_PATTERNS = json.loads(path.read_text(encoding="utf-8"))
74
+ return _LEARNED_PATTERNS
75
+ except Exception:
76
+ pass
77
+ _LEARNED_PATTERNS = {"patterns": [], "suppressed_categories": {}}
78
+ return _LEARNED_PATTERNS
79
+
80
+
81
+ def _save_learned_patterns(repo_root: Path) -> None:
82
+ """Persist learned false positive patterns."""
83
+ global _LEARNED_PATTERNS
84
+ if _LEARNED_PATTERNS is None:
85
+ return
86
+ path = repo_root / LEARNED_PATTERNS_FILE
87
+ path.parent.mkdir(parents=True, exist_ok=True)
88
+ path.write_text(json.dumps(_LEARNED_PATTERNS, indent=2), encoding="utf-8")
89
+
90
+
91
+ def _is_suppressed(category: str, description: str, repo_root: Path) -> bool:
92
+ """Check if a pattern has been learned as a false positive."""
93
+ learned = _load_learned_patterns(repo_root)
94
+ for entry in learned.get("patterns", []):
95
+ if entry.get("category") != category:
96
+ continue
97
+ if entry.get("pattern") and entry["pattern"] in description:
98
+ return True
99
+ return False
100
+
101
+
102
+ def _learn_false_positive(category: str, pattern: str, reason: str, repo_root: Path) -> None:
103
+ """Record a false positive pattern learned from the agent."""
104
+ learned = _load_learned_patterns(repo_root)
105
+ # Deduplicate
106
+ for existing in learned["patterns"]:
107
+ if existing.get("category") == category and existing.get("pattern") == pattern:
108
+ existing["count"] = existing.get("count", 1) + 1
109
+ _save_learned_patterns(repo_root)
110
+ return
111
+ learned["patterns"].append({
112
+ "category": category,
113
+ "pattern": pattern,
114
+ "reason": reason,
115
+ "count": 1,
116
+ })
117
+ _save_learned_patterns(repo_root)
118
+
119
+
120
+ # ---------------------------------------------------------------------------
121
+ # Guardrail check results
122
+ # ---------------------------------------------------------------------------
123
+
124
+ class Violation:
125
+ """A single guardrail violation found in a staged file."""
126
+
127
+ def __init__(self, category: str, description: str, line: int = 0, severity: str = "medium", uncertainty: str = ""):
128
+ self.category = category
129
+ self.description = description
130
+ self.line = line
131
+ self.severity = severity
132
+ self.uncertainty = uncertainty
133
+
134
+ def to_dict(self) -> dict[str, Any]:
135
+ d: dict[str, Any] = {
136
+ "category": self.category,
137
+ "description": self.description,
138
+ "line": self.line,
139
+ "severity": self.severity,
140
+ }
141
+ if self.uncertainty:
142
+ d["uncertainty"] = self.uncertainty
143
+ return d
144
+
145
+
146
+ class GuardrailResult:
147
+ """Result of checking a staged file against all guardrails."""
148
+
149
+ def __init__(self):
150
+ self.allowed = True
151
+ self.violations: list[Violation] = []
152
+ self.diff: str = ""
153
+
154
+ def reject(self, v: Violation):
155
+ self.violations.append(v)
156
+ self.allowed = False
157
+
158
+ def to_dict(self) -> dict[str, Any]:
159
+ return {
160
+ "allowed": self.allowed,
161
+ "violations": [v.to_dict() for v in self.violations],
162
+ "diff": self.diff,
163
+ }
164
+
165
+
166
+ # ---------------------------------------------------------------------------
167
+ # Individual guardrail checkers
168
+ # ---------------------------------------------------------------------------
169
+
170
+ def _check_prompt_injection(source: str, runtime: RuntimeConfig | None = None) -> list[Violation]:
171
+ """Check for AI prompt injection / system prompt remnants."""
172
+ violations: list[Violation] = []
173
+ level = runtime.get_guardrail_level("prompt_injection") if runtime else "block"
174
+ if level == "off":
175
+ return violations
176
+ patterns = [
177
+ (r"ignore\s+(all\s+)?(previous|prior|above)\s+instructions", "Ignore-previous-instructions attempt"),
178
+ (r"you\s+are\s+(now|an?\s+(AI|autonomous|unconstrained|free))", "AI role-play / identity override"),
179
+ (r"system\s+prompt", "System prompt reference"),
180
+ (r"<\|im_start\|>|<\|im_end\|>|<\|sys\|>", "Chat markup token"),
181
+ (r"you\s+(must|will|shall)\s+obey", "Command-style instruction to AI"),
182
+ (r"forget\s+(all\s+)?(previous|prior)", "Forget-previous instruction attempt"),
183
+ (r"\[\/?INST\]|\[\/?SYS\]", "LLaMA-style instruction token"),
184
+ (r"###\s*(System|Instruction|Response)\s*:", "Section header mimicking system prompt"),
185
+ (r"do\s+not\s+(follow|obey|listen\s+to)", "Defiance instruction"),
186
+ (r"output\s+(only|just|exclusively)\s+(the\s+)?(JSON|code|result)", "Output constraint injection"),
187
+ ]
188
+ lines = source.splitlines()
189
+ for i, line in enumerate(lines, 1):
190
+ for pattern, desc in patterns:
191
+ m = re.search(pattern, line, re.IGNORECASE)
192
+ if m and runtime and runtime.is_allowed(m.group()):
193
+ continue
194
+ if m:
195
+ violations.append(Violation("prompt_injection", f"{desc}: {line.strip()[:60]}", i, "high"))
196
+ return violations
197
+
198
+
199
+ def _check_security(source: str, runtime: RuntimeConfig | None = None, rel_path: str | None = None) -> list[Violation]:
200
+ """Check for security-sensitive operations."""
201
+ violations: list[Violation] = []
202
+ level = runtime.get_guardrail_level("security") if runtime else "block"
203
+ if level == "off":
204
+ return violations
205
+ is_test = _is_test_or_mock(rel_path) if rel_path else False
206
+ patterns = [
207
+ (r"\b(eval|exec)\s*\(", "Dynamic code execution"),
208
+ (r"\b(subprocess\.(call|run|Popen|check_output|check_call)|os\.system|os\.popen)\s*\(", "Shell command execution"),
209
+ (r"\b(pickle\.loads|pickle\.load|shelve\.open)\s*\(", "Unsafe deserialization"),
210
+ (r"\bexecute\s*\(\s*['\"`](SELECT|INSERT|UPDATE|DELETE|DROP|ALTER|CREATE)", "SQL query construction"),
211
+ (r"\bos\.(remove|unlink|rmdir)\s*\(", "File deletion operation"),
212
+ ]
213
+ lines = source.splitlines()
214
+ for i, line in enumerate(lines, 1):
215
+ for pattern, desc in patterns:
216
+ m = re.search(pattern, line)
217
+ if m and runtime and runtime.is_allowed(m.group()):
218
+ continue
219
+ if m:
220
+ sev = "high" if "exec" in pattern or "pickle" in pattern else "medium"
221
+ if is_test:
222
+ sev = "low"
223
+ violations.append(Violation("security", desc, i, sev))
224
+ return violations
225
+
226
+
227
+ def _check_debris_patterns(source: str, suffix: str, runtime: RuntimeConfig | None = None) -> list[Violation]:
228
+ """Check for common AI-generated debris patterns."""
229
+ violations: list[Violation] = []
230
+ level = runtime.get_guardrail_level("debris") if runtime else "warn"
231
+ if level == "off":
232
+ return violations
233
+ patterns: list[tuple[str, str]] = []
234
+ if suffix in (".py", ".js", ".ts", ".jsx", ".tsx"):
235
+ patterns.append((r"pass\s*$", "Stub pass statement"), )
236
+ lines = source.splitlines()
237
+ for i, line in enumerate(lines, 1):
238
+ for pattern, desc in patterns:
239
+ m = re.search(pattern, line)
240
+ if m and runtime and runtime.is_allowed(m.group()):
241
+ continue
242
+ if m:
243
+ violations.append(Violation("debris", f"{desc}: {line.strip()[:60]}", i, "low"))
244
+ return violations
245
+
246
+
247
+ def _check_layer_violations(source: str, rel_path: str, config: DeadpushConfig, runtime: RuntimeConfig | None = None) -> list[Violation]:
248
+ """Check if the file's imports violate layer rules."""
249
+ violations: list[Violation] = []
250
+ level = runtime.get_guardrail_level("layer") if runtime else "block"
251
+ if level == "off":
252
+ return violations
253
+ try:
254
+ from .layers import LayerEnforcer
255
+ enforcer = LayerEnforcer()
256
+ suffix = Path(rel_path).suffix
257
+ imports_list = enforcer.extract_imports_regex(source, suffix)
258
+ if imports_list:
259
+ layer_vs = enforcer.analyze_imports(rel_path, imports_list)
260
+ for lv in layer_vs:
261
+ if runtime and runtime.is_allowed(lv.description):
262
+ continue
263
+ violations.append(Violation("layer", lv.description, lv.line, "medium"))
264
+ except Exception:
265
+ pass
266
+ return violations
267
+
268
+
269
+ def _check_dependency_integrity(source: str, rel_path: str, repo_root: Path | str, runtime: RuntimeConfig | None = None) -> list[Violation]:
270
+ """Check dependency files for typosquats and suspicious package additions."""
271
+ violations: list[Violation] = []
272
+ level = runtime.get_guardrail_level("dependency") if runtime else "warn"
273
+ if level == "off":
274
+ return violations
275
+ try:
276
+ from .deps_guard import check_deps
277
+
278
+ old_source = ""
279
+ dest = Path(repo_root) / rel_path if not Path(rel_path).is_absolute() else Path(rel_path)
280
+ if dest.exists():
281
+ old_source = dest.read_text(encoding="utf-8", errors="replace")
282
+ dep_vs = check_deps(source, rel_path, old_source)
283
+ for dv in dep_vs:
284
+ if runtime and runtime.is_allowed(dv["description"]):
285
+ continue
286
+ violations.append(Violation(dv["category"], dv["description"], dv["line"], dv["severity"]))
287
+ except Exception:
288
+ pass
289
+ return violations
290
+
291
+
292
+ def _check_hardcoded_secrets(source: str, runtime: RuntimeConfig | None = None, rel_path: str | None = None) -> list[Violation]:
293
+ """Check for hardcoded secrets, API keys, tokens."""
294
+ violations: list[Violation] = []
295
+ level = runtime.get_guardrail_level("secret") if runtime else "block"
296
+ if level == "off":
297
+ return violations
298
+ is_test = _is_test_or_mock(rel_path) if rel_path else False
299
+ patterns = [
300
+ (r'(?:api[_-]?key|apikey|secret[_-]?key|secret[_-]?token)\s*[:=]\s*["\'].+["\']', "Hardcoded API key/secret", "high"),
301
+ (r'(?:sk-[a-zA-Z0-9]{20,}|pk-[a-zA-Z0-9]{20,})', "Hardcoded API token (starts with sk-/pk-)", "critical"),
302
+ (r'AKIA[0-9A-Z]{16}', "Hardcoded AWS Access Key", "critical"),
303
+ (r'(?:password|passwd|pwd)\s*[:=]\s*["\'][^"\']{4,}["\']', "Hardcoded password", "high"),
304
+ (r'ghp_[a-zA-Z0-9]{36}', "Hardcoded GitHub token", "critical"),
305
+ (r'xox[baprs]-[0-9a-zA-Z-]{10,}', "Hardcoded Slack token", "critical"),
306
+ ]
307
+ lines = source.splitlines()
308
+ for i, line in enumerate(lines, 1):
309
+ for pattern, desc, severity in patterns:
310
+ m = re.search(pattern, line, re.IGNORECASE)
311
+ if m and runtime and runtime.is_allowed(m.group()):
312
+ continue
313
+ if m:
314
+ effective_sev = "warn" if (is_test and severity in ("high", "critical")) else severity
315
+ violations.append(Violation("secret", f"{desc}: {line.strip()[:60]}", i, effective_sev))
316
+ return violations
317
+
318
+
319
+ def _check_sensitive_write(source: str, rel_path: str, config: DeadpushConfig, runtime: RuntimeConfig | None = None) -> list[Violation]:
320
+ """Block writes to sensitive config files (CI/CD, deployment, Docker, etc.)."""
321
+ violations: list[Violation] = []
322
+ level = runtime.get_guardrail_level("sensitive") if runtime else "block"
323
+ if level == "off":
324
+ return violations
325
+ if config.is_sensitive_config(rel_path):
326
+ if runtime and runtime.is_allowed(rel_path):
327
+ return violations
328
+ violations.append(Violation(
329
+ "sensitive",
330
+ f"Write to sensitive config file blocked: {rel_path}",
331
+ 0, "high"
332
+ ))
333
+ return violations
334
+
335
+
336
+ def _check_destructive_changes(
337
+ source: str, rel_path: str, repo_root: Path,
338
+ runtime: RuntimeConfig | None = None,
339
+ _old_source: str | None = None,
340
+ ) -> list[Violation]:
341
+ """Check if the write would destroy existing content (near-empty rewrites, massive deletions).
342
+
343
+ _old_source: optional pre-write content (used by the real-time guardian where
344
+ the file has already been overwritten by the agent).
345
+ """
346
+ violations: list[Violation] = []
347
+ level = runtime.get_guardrail_level("destructive") if runtime else "warn"
348
+ if level == "off":
349
+ return violations
350
+
351
+ if _old_source is not None:
352
+ old_content = _old_source
353
+ else:
354
+ dest = (repo_root / rel_path).resolve()
355
+ if not dest.exists():
356
+ return violations
357
+ try:
358
+ old_content = dest.read_text(encoding="utf-8", errors="ignore")
359
+ except Exception:
360
+ return violations
361
+
362
+ old_lines = old_content.splitlines()
363
+ new_lines = source.splitlines()
364
+
365
+ # Near-empty write to a previously substantial file
366
+ if len(old_lines) > 20 and len(new_lines) < 3:
367
+ violations.append(Violation(
368
+ "destructive",
369
+ f"Replacing {len(old_lines)}-line file with {len(new_lines)} lines — potential content deletion",
370
+ 0, "high" if level == "block" else "medium"
371
+ ))
372
+
373
+ # >50% line reduction
374
+ if old_lines and len(new_lines) < len(old_lines) * 0.5 and len(old_lines) > 10:
375
+ violations.append(Violation(
376
+ "destructive",
377
+ f"Writing {len(new_lines)} lines to replace {len(old_lines)} lines — >50% reduction",
378
+ 0, "medium"
379
+ ))
380
+
381
+ return violations
382
+
383
+
384
+ # ---------------------------------------------------------------------------
385
+ # Feedback writer
386
+ # ---------------------------------------------------------------------------
387
+
388
+ def _write_feedback(feedback_dir: Path, file_rel: str, result: GuardrailResult):
389
+ """Write structured feedback the coding agent can read."""
390
+ feedback = {
391
+ "timestamp": datetime.now(timezone.utc).isoformat(),
392
+ "file": file_rel,
393
+ "status": "blocked" if not result.allowed else "approved",
394
+ "acknowledged": False,
395
+ "violations": [v.to_dict() for v in result.violations],
396
+ "diff": result.diff,
397
+ "message": _generate_message(file_rel, result),
398
+ }
399
+ feedback_dir.mkdir(parents=True, exist_ok=True)
400
+ # Use filename-based feedback so the agent can correlate
401
+ safe_name = file_rel.replace("/", "__").replace("\\", "__")
402
+ feedback_path = feedback_dir / f"{safe_name}.json"
403
+ feedback_path.write_text(json.dumps(feedback, indent=2), encoding="utf-8")
404
+
405
+ # Also write a human-readable markdown version
406
+ md = _feedback_to_markdown(file_rel, result)
407
+ md_path = feedback_dir / f"{safe_name}.md"
408
+ md_path.write_text(md, encoding="utf-8")
409
+
410
+
411
+ def _generate_message(file_rel: str, result: GuardrailResult) -> str:
412
+ if result.allowed:
413
+ return f"Your change to {file_rel} was approved."
414
+ parts = []
415
+ for v in result.violations:
416
+ parts.append(f"- {v.description} (line {v.line}, severity: {v.severity})")
417
+ return (
418
+ f"Your change to {file_rel} was BLOCKED due to {len(result.violations)} violation(s):\n"
419
+ + "\n".join(parts)
420
+ + f"\n\nReview the violations above, fix your code, and try again. "
421
+ f"The previous attempt has been quarantined."
422
+ )
423
+
424
+
425
+ def _feedback_to_markdown(file_rel: str, result: GuardrailResult) -> str:
426
+ lines = [
427
+ f"# deadpush Guardrail Feedback",
428
+ f"",
429
+ f"**File:** `{file_rel}`",
430
+ f"**Status:** {'✅ Approved' if result.allowed else '❌ Blocked'}",
431
+ f"**Time:** {datetime.now(timezone.utc).isoformat()}",
432
+ f"",
433
+ ]
434
+ if result.violations:
435
+ lines.append("## Violations")
436
+ lines.append("")
437
+ for v in result.violations:
438
+ lines.append(f"### {v.category} (severity: {v.severity})")
439
+ lines.append(f"- **Line:** {v.line}")
440
+ lines.append(f"- **Description:** {v.description}")
441
+ lines.append("")
442
+ if result.diff:
443
+ lines.append("## Diff")
444
+ lines.append("")
445
+ lines.append("```diff")
446
+ lines.append(result.diff.rstrip("\n"))
447
+ lines.append("```")
448
+ lines.append("")
449
+ if not result.allowed:
450
+ lines.append("## What to do")
451
+ lines.append("")
452
+ lines.append("1. Read each violation above carefully.")
453
+ lines.append("2. Fix the issue in your code.")
454
+ lines.append("3. Re-write the file to `.deadpush/staging/` for re-check.")
455
+ lines.append("")
456
+ lines.append("Do not ignore these guardrails — they protect the codebase from harmful patterns.")
457
+ return "\n".join(lines)
458
+
459
+
460
+ # ---------------------------------------------------------------------------
461
+ # Full guardrail check pipeline
462
+ # ---------------------------------------------------------------------------
463
+
464
+ def _get_file_rel(staged_path: Path, staging_dir: Path) -> str:
465
+ """Get the relative path within staging, which mirrors the project layout."""
466
+ try:
467
+ return str(staged_path.relative_to(staging_dir))
468
+ except ValueError:
469
+ return staged_path.name
470
+
471
+
472
+ def _apply_guardrail_level(result: GuardrailResult, violations: list[Violation], runtime: RuntimeConfig | None, category: str) -> None:
473
+ """Apply violations according to the guardrail level for the given category.
474
+
475
+ block → reject (prevents write)
476
+ warn → append (reports but allows write)
477
+ off → already filtered by the checker, nothing to do
478
+ """
479
+ level = runtime.get_guardrail_level(category) if runtime else "block"
480
+ if level == "block":
481
+ for v in violations:
482
+ result.reject(v)
483
+ else:
484
+ result.violations.extend(violations)
485
+
486
+
487
+ def _run_guardrails(
488
+ staged_path: Path,
489
+ staging_dir: Path,
490
+ config: DeadpushConfig,
491
+ runtime: RuntimeConfig | None = None,
492
+ *,
493
+ _old_source: str | None = None,
494
+ _rel_path_override: str | None = None,
495
+ ) -> GuardrailResult:
496
+ """Run all guardrail checks on a staged file.
497
+
498
+ Args:
499
+ staged_path: Path to the file to check.
500
+ staging_dir: Base directory for computing the relative path
501
+ (pass repo_root when checking a file at its real path).
502
+ config: Deadpush configuration.
503
+ runtime: Optional runtime config for level overrides.
504
+ _old_source: Optional pre-write content (for real-time guardian flow).
505
+ _rel_path_override: Optional explicit relative path (bypasses staging_dir
506
+ computation for the real-time guardian).
507
+ """
508
+ result = GuardrailResult()
509
+
510
+ try:
511
+ source = staged_path.read_text(encoding="utf-8", errors="ignore")
512
+ except Exception:
513
+ result.reject(Violation("internal", "Could not read staged file", 0, "high"))
514
+ return result
515
+
516
+ rel = _rel_path_override if _rel_path_override is not None else _get_file_rel(staged_path, staging_dir)
517
+
518
+ # Compute diff
519
+ if _old_source is not None:
520
+ result.diff = "".join(difflib.unified_diff(
521
+ _old_source.splitlines(keepends=True),
522
+ source.splitlines(keepends=True),
523
+ fromfile=f"a/{rel}",
524
+ tofile=f"b/{rel}",
525
+ ))
526
+ else:
527
+ try:
528
+ dest = _get_dest_path(staged_path, staging_dir, config.repo_root)
529
+ old = dest.read_text(encoding="utf-8", errors="ignore") if dest.exists() else ""
530
+ result.diff = "".join(difflib.unified_diff(
531
+ old.splitlines(keepends=True),
532
+ source.splitlines(keepends=True),
533
+ fromfile=f"a/{rel}",
534
+ tofile=f"b/{rel}",
535
+ ))
536
+ except Exception:
537
+ result.diff = ""
538
+
539
+ suffix = Path(rel).suffix.lower()
540
+
541
+ # Suppress violations that match learned false positive patterns
542
+ learned = _load_learned_patterns(config.repo_root)
543
+ suppressed_desc: set[str] = set()
544
+ for entry in learned.get("patterns", []):
545
+ if entry.get("pattern"):
546
+ suppressed_desc.add(entry["pattern"])
547
+
548
+ # Security checks — level-aware (with path context for test/mock lowering)
549
+ _apply_guardrail_level(result, _check_prompt_injection(source, runtime), runtime, "prompt_injection")
550
+ _apply_guardrail_level(result, _check_hardcoded_secrets(source, runtime, rel_path=rel), runtime, "secret")
551
+ _apply_guardrail_level(result, _check_security(source, runtime, rel_path=rel), runtime, "security")
552
+
553
+ # Filter out learned false positive violations
554
+ result.violations = [v for v in result.violations if v.description not in suppressed_desc]
555
+
556
+ # Config / destructive checks
557
+ _apply_guardrail_level(result, _check_sensitive_write(source, rel, config, runtime), runtime, "sensitive")
558
+ destructive_level = runtime.get_guardrail_level("destructive") if runtime else "warn"
559
+ for v in _check_destructive_changes(source, rel, config.repo_root, runtime, _old_source=_old_source):
560
+ if destructive_level == "block":
561
+ result.reject(v)
562
+ else:
563
+ result.violations.append(v)
564
+
565
+ # Soft checks (warn level by default)
566
+ _apply_guardrail_level(result, _check_debris_patterns(source, suffix, runtime), runtime, "debris")
567
+ _apply_guardrail_level(result, _check_layer_violations(source, rel, config, runtime), runtime, "layer")
568
+
569
+ # Dependency integrity check
570
+ _apply_guardrail_level(result, _check_dependency_integrity(source, rel, config.repo_root, runtime), runtime, "dependency")
571
+
572
+ return result
573
+
574
+
575
+ def _get_dest_path(staged_path: Path, staging_dir: Path, repo_root: Path) -> Path:
576
+ """Determine the real project path for a staged file."""
577
+ rel = _get_file_rel(staged_path, staging_dir)
578
+ return (repo_root / rel).resolve()
579
+
580
+
581
+ def _approve(staged_path: Path, staging_dir: Path, repo_root: Path, feedback_dir: Path):
582
+ """Move file from staging to the real project path."""
583
+ dest = _get_dest_path(staged_path, staging_dir, repo_root)
584
+ dest.parent.mkdir(parents=True, exist_ok=True)
585
+ shutil.move(str(staged_path), str(dest))
586
+
587
+ # Clean up empty staging directories
588
+ _clean_empty_dirs(staging_dir)
589
+
590
+ result = GuardrailResult()
591
+ result.allowed = True
592
+ _write_feedback(feedback_dir, _get_file_rel(staged_path, staging_dir), result)
593
+
594
+
595
+ def _block(staged_path: Path, staging_dir: Path, repo_root: Path, feedback_dir: Path, result: GuardrailResult):
596
+ """Move file to quarantine and write feedback."""
597
+ quarantine_dir = repo_root / QUARANTINE_DIR
598
+ quarantine_dir.mkdir(parents=True, exist_ok=True)
599
+
600
+ rel = _get_file_rel(staged_path, staging_dir)
601
+ safe_name = rel.replace("/", "__").replace("\\", "__")
602
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
603
+ quarantined = quarantine_dir / f"{timestamp}_{safe_name}"
604
+ shutil.move(str(staged_path), str(quarantined))
605
+
606
+ # Write .reason file compatible with QuarantineManager
607
+ reason = result.violations[0].description if result.violations else "guardrail violation"
608
+ reason_path = quarantined.with_name(quarantined.name + ".reason")
609
+ try:
610
+ reason_path.write_text(
611
+ f"Quarantined at {datetime.now()}\n"
612
+ f"Reason: {reason}\n"
613
+ f"Original path: {repo_root / rel}\n"
614
+ )
615
+ except Exception:
616
+ pass
617
+
618
+ _clean_empty_dirs(staging_dir)
619
+ _write_feedback(feedback_dir, rel, result)
620
+
621
+
622
+ def _clean_empty_dirs(path: Path):
623
+ """Remove empty subdirectories under path."""
624
+ for dirpath, dirnames, filenames in os.walk(str(path), topdown=False):
625
+ if not dirnames and not filenames and dirpath != str(path):
626
+ try:
627
+ os.rmdir(dirpath)
628
+ except OSError:
629
+ pass
630
+
631
+
632
+ # ---------------------------------------------------------------------------
633
+ # Watcher thread
634
+ # ---------------------------------------------------------------------------
635
+
636
+ try:
637
+ from watchdog.observers import Observer
638
+ from watchdog.events import FileSystemEventHandler
639
+
640
+ class _StagingHandler(FileSystemEventHandler):
641
+ """Watchdog event handler that marks files as pending."""
642
+
643
+ def __init__(self, on_file_event):
644
+ self.on_file_event = on_file_event
645
+ self._debounce: dict[Path, float] = {}
646
+ self._lock = threading.Lock()
647
+
648
+ def on_created(self, event):
649
+ if not event.is_directory:
650
+ self._note(Path(event.src_path))
651
+
652
+ def on_modified(self, event):
653
+ if not event.is_directory:
654
+ self._note(Path(event.src_path))
655
+
656
+ def on_moved(self, event):
657
+ if not event.is_directory:
658
+ self._note(Path(event.dest_path))
659
+
660
+ def _note(self, path: Path):
661
+ with self._lock:
662
+ self._debounce[path] = time.time()
663
+
664
+ def pop_stable(self, min_age: float = 0.3) -> list[Path]:
665
+ """Return paths whose mtime has been stable for min_age seconds."""
666
+ now = time.time()
667
+ ready: list[Path] = []
668
+ with self._lock:
669
+ for p, t in list(self._debounce.items()):
670
+ try:
671
+ mtime = p.stat().st_mtime
672
+ if now - mtime >= min_age and now - t >= min_age:
673
+ ready.append(p)
674
+ del self._debounce[p]
675
+ except OSError:
676
+ del self._debounce[p]
677
+ return ready
678
+
679
+ WATCHDOG_AVAILABLE = True
680
+ except ImportError:
681
+ WATCHDOG_AVAILABLE = False
682
+
683
+
684
+ class StagingWatcher(threading.Thread):
685
+ """Watches .deadpush/staging/ for new files and processes them.
686
+
687
+ Uses watchdog file system notifications when available, with a polling
688
+ fallback. File stability is verified via mtime (not the old size-poll hack).
689
+ """
690
+
691
+ STABILITY_SECONDS = 0.3
692
+
693
+ def __init__(self, repo_root: Path, config: DeadpushConfig, poll_interval: float = 0.5):
694
+ super().__init__(daemon=True)
695
+ self.repo_root = repo_root
696
+ self.config = config
697
+ self.staging_dir = repo_root / STAGING_DIR
698
+ self.feedback_dir = repo_root / FEEDBACK_DIR
699
+ self.poll_interval = poll_interval
700
+ self._stop_event = threading.Event()
701
+ self._processed: set[Path] = set()
702
+ self._handler: Any = None
703
+
704
+ def run(self):
705
+ self.staging_dir.mkdir(parents=True, exist_ok=True)
706
+ if WATCHDOG_AVAILABLE:
707
+ self._run_with_watchdog()
708
+ else:
709
+ self._run_polling()
710
+
711
+ def stop(self):
712
+ self._stop_event.set()
713
+
714
+ # ---- watchdog path ----
715
+
716
+ def _run_with_watchdog(self):
717
+ """Use watchdog Observer for instant file notifications."""
718
+ self._handler = _StagingHandler(self._on_watchdog_event)
719
+ observer = Observer()
720
+ observer.schedule(self._handler, str(self.staging_dir), recursive=True)
721
+ observer.start()
722
+ try:
723
+ while not self._stop_event.is_set():
724
+ for p in self._handler.pop_stable(self.STABILITY_SECONDS):
725
+ self._process_file(p)
726
+ if not self._stop_event.is_set():
727
+ self._stop_event.wait(0.1)
728
+ finally:
729
+ observer.stop()
730
+ observer.join()
731
+
732
+ def _on_watchdog_event(self, path: Path):
733
+ """Called when watchdog detects a file event (already handled by _StagingHandler)."""
734
+
735
+ # ---- polling fallback ----
736
+
737
+ def _run_polling(self):
738
+ """Fallback: poll staging directory periodically."""
739
+ while not self._stop_event.is_set():
740
+ self._scan_staging()
741
+ if not self._stop_event.is_set():
742
+ self._stop_event.wait(self.poll_interval)
743
+
744
+ def _scan_staging(self):
745
+ """Find unprocessed files in staging that pass mtime stability."""
746
+ if not self.staging_dir.exists():
747
+ return
748
+ now = time.time()
749
+ for staged_path in sorted(self.staging_dir.rglob("*")):
750
+ if not staged_path.is_file():
751
+ continue
752
+ if staged_path in self._processed:
753
+ continue
754
+ try:
755
+ if now - staged_path.stat().st_mtime < self.STABILITY_SECONDS:
756
+ continue
757
+ except OSError:
758
+ continue
759
+ self._process_file(staged_path)
760
+
761
+ # ---- shared processing logic ----
762
+
763
+ def _process_file(self, staged_path: Path):
764
+ """Run guardrails and approve/block a single staged file."""
765
+ if staged_path in self._processed or not staged_path.is_file():
766
+ return
767
+ self._processed.add(staged_path)
768
+ rel = _get_file_rel(staged_path, self.staging_dir)
769
+
770
+ # Skip hidden files — write feedback explaining why
771
+ if staged_path.name.startswith("."):
772
+ staged_path.unlink(missing_ok=True)
773
+ result = GuardrailResult()
774
+ result.reject(Violation("debris", "Hidden/dot-file written to staging was removed (not allowed)", 0, "low"))
775
+ _write_feedback(self.feedback_dir, rel, result)
776
+ return
777
+
778
+ result = _run_guardrails(staged_path, self.staging_dir, self.config)
779
+
780
+ if result.allowed:
781
+ _approve(staged_path, self.staging_dir, self.repo_root, self.feedback_dir)
782
+ else:
783
+ _block(staged_path, self.staging_dir, self.repo_root, self.feedback_dir, result)
784
+
785
+
786
+ # ---------------------------------------------------------------------------
787
+ # HTTP API server (optional, for agents that prefer REST)
788
+ # ---------------------------------------------------------------------------
789
+
790
+ HTTP_PORT = 9876
791
+
792
+
793
+ class WriteAPIHandler:
794
+ """Simple HTTP request handler for agent file writes."""
795
+
796
+ def __init__(self, repo_root: Path, config: DeadpushConfig):
797
+ self.repo_root = repo_root
798
+ self.config = config
799
+ self.staging_dir = repo_root / STAGING_DIR
800
+ self.feedback_dir = repo_root / FEEDBACK_DIR
801
+
802
+ def handle_write(self, rel_path: str, content: str) -> dict[str, Any]:
803
+ """Handle a file write request from an agent."""
804
+ staging_path = (self.staging_dir / rel_path).resolve()
805
+ staging_path.parent.mkdir(parents=True, exist_ok=True)
806
+ staging_path.write_text(content, encoding="utf-8")
807
+
808
+ result = _run_guardrails(staging_path, self.staging_dir, self.config, self.runtime)
809
+
810
+ if result.allowed:
811
+ _approve(staging_path, self.staging_dir, self.repo_root, self.feedback_dir)
812
+ else:
813
+ _block(staging_path, self.staging_dir, self.repo_root, self.feedback_dir, result)
814
+ # Clean up the staging file since it was already moved
815
+ staging_path.unlink(missing_ok=True)
816
+
817
+ return result.to_dict()
818
+
819
+
820
+ def _run_http_server(repo_root: Path, config: DeadpushConfig):
821
+ """Run a minimal HTTP server for agent writes."""
822
+ import http.server
823
+ import urllib.parse
824
+
825
+ handler = WriteAPIHandler(repo_root, config)
826
+
827
+ class _Handler(http.server.BaseHTTPRequestHandler):
828
+ def do_POST(self):
829
+ if self.path != "/write":
830
+ self.send_response(404)
831
+ self.end_headers()
832
+ return
833
+
834
+ content_length = int(self.headers.get("Content-Length", 0))
835
+ body = self.rfile.read(content_length)
836
+ try:
837
+ data = json.loads(body)
838
+ rel_path = data.get("path", "")
839
+ content = data.get("content", "")
840
+ if not rel_path:
841
+ self._json(400, {"error": "path is required"})
842
+ return
843
+ result = handler.handle_write(rel_path, content)
844
+ self._json(200 if result.get("allowed") else 422, result)
845
+ except Exception as e:
846
+ self._json(500, {"error": str(e)})
847
+
848
+ def _json(self, status: int, data: dict):
849
+ self.send_response(status)
850
+ self.send_header("Content-Type", "application/json")
851
+ self.end_headers()
852
+ self.wfile.write(json.dumps(data).encode("utf-8"))
853
+
854
+ def log_message(self, fmt, *args):
855
+ pass # quiet
856
+
857
+ server = http.server.HTTPServer(("127.0.0.1", HTTP_PORT), _Handler)
858
+ server.serve_forever()
859
+
860
+
861
+ # ---------------------------------------------------------------------------
862
+ # Public API
863
+ # ---------------------------------------------------------------------------
864
+
865
+ class InterceptDaemon:
866
+ """Pre-write file interception daemon.
867
+
868
+ Watches .deadpush/staging/ for agent writes. Runs guardrails on each file.
869
+ Approves safe files (moves to project root) or blocks dangerous ones
870
+ (quarantines + writes structured feedback).
871
+ """
872
+
873
+ def __init__(self, repo_root: str | Path, config: DeadpushConfig | None = None):
874
+ self.repo_root = Path(repo_root).resolve()
875
+ self.config = config or DeadpushConfig(repo_root=self.repo_root)
876
+ self.runtime: RuntimeConfig | None = None
877
+ self.staging_dir = self.repo_root / STAGING_DIR
878
+ self.feedback_dir = self.repo_root / FEEDBACK_DIR
879
+ self.watcher: StagingWatcher | None = None
880
+ self.http_thread: threading.Thread | None = None
881
+
882
+ def start(self, http: bool = False):
883
+ """Start the staging watcher (and optionally the HTTP API)."""
884
+ self.staging_dir.mkdir(parents=True, exist_ok=True)
885
+
886
+ self.watcher = StagingWatcher(self.repo_root, self.config)
887
+ self.watcher.start()
888
+
889
+ if http:
890
+ self.http_thread = threading.Thread(
891
+ target=_run_http_server,
892
+ args=(self.repo_root, self.config),
893
+ daemon=True,
894
+ )
895
+ self.http_thread.start()
896
+
897
+ def stop(self):
898
+ """Stop the interception daemon."""
899
+ if self.watcher:
900
+ self.watcher.stop()
901
+
902
+ def write_file(self, rel_path: str, content: str) -> GuardrailResult:
903
+ """Write a file through the interception pipeline (bypass staging dir).
904
+
905
+ Agents can call this directly for inline writes.
906
+ """
907
+ staging_path = (self.staging_dir / rel_path).resolve()
908
+ staging_path.parent.mkdir(parents=True, exist_ok=True)
909
+ staging_path.write_text(content, encoding="utf-8")
910
+
911
+ result = _run_guardrails(staging_path, self.staging_dir, self.config, self.runtime)
912
+
913
+ if result.allowed:
914
+ _approve(staging_path, self.staging_dir, self.repo_root, self.feedback_dir)
915
+ else:
916
+ _block(staging_path, self.staging_dir, self.repo_root, self.feedback_dir, result)
917
+ staging_path.unlink(missing_ok=True)
918
+
919
+ return result
920
+
921
+
922
+ # ---------------------------------------------------------------------------
923
+ # CLI entry point (mirrors guard.run_guardian pattern)
924
+ # ---------------------------------------------------------------------------
925
+
926
+ def run_intercept(daemon: bool = False, http: bool = False):
927
+ """Start the intercept daemon (foreground or daemon mode)."""
928
+ from .config import load_config
929
+ from .guard import DaemonManager, setup_logging
930
+
931
+ config = load_config()
932
+ logger = setup_logging(daemon=daemon)
933
+
934
+ pid_dir = Path.home() / ".deadpush"
935
+ pid_dir.mkdir(parents=True, exist_ok=True)
936
+ pidfile = pid_dir / "intercept.pid"
937
+ lockfile = pid_dir / "intercept.lock"
938
+
939
+ daemon_mgr = DaemonManager(pidfile, lockfile)
940
+
941
+ if daemon_mgr.is_running():
942
+ logger.warning("Intercept daemon is already running. Use `deadpush intercept --stop` first.")
943
+ return
944
+
945
+ if not daemon_mgr.acquire_lock():
946
+ logger.error("Could not acquire lock. Another instance may be running.")
947
+ return
948
+
949
+ staging_dir = config.repo_root / STAGING_DIR
950
+ feedback_dir = config.repo_root / FEEDBACK_DIR
951
+
952
+ logger.info("Starting intercept daemon")
953
+ logger.info(f" Staging: {staging_dir}")
954
+ logger.info(f" Feedback: {feedback_dir}")
955
+ logger.info(f" HTTP API: {'enabled on :9876' if http else 'disabled'}")
956
+
957
+ if daemon:
958
+ logger.info("Starting in DAEMON mode...")
959
+ try:
960
+ if os.fork() > 0:
961
+ sys.exit(0)
962
+ os.setsid()
963
+ if os.fork() > 0:
964
+ sys.exit(0)
965
+ os.chdir("/")
966
+ os.umask(0)
967
+ try:
968
+ sys.stdout.flush()
969
+ sys.stderr.flush()
970
+ except Exception:
971
+ pass
972
+ with open(os.devnull, "w") as devnull:
973
+ os.dup2(devnull.fileno(), sys.stdout.fileno())
974
+ os.dup2(devnull.fileno(), sys.stderr.fileno())
975
+ except Exception as e:
976
+ logger.error(f"Daemon fork failed: {e}")
977
+ daemon_mgr.cleanup()
978
+ return
979
+
980
+ daemon_mgr.write_pid()
981
+ atexit.register(daemon_mgr.cleanup)
982
+
983
+ intercept = InterceptDaemon(config.repo_root, config)
984
+ intercept.start(http=http)
985
+ logger.info(f"Intercept daemon ready (PID {os.getpid()})")
986
+
987
+ try:
988
+ while True:
989
+ time.sleep(1)
990
+ except KeyboardInterrupt:
991
+ pass
992
+ finally:
993
+ intercept.stop()
994
+ daemon_mgr.cleanup()
995
+ logger.info("Intercept daemon stopped.")