codex-autorunner 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. codex_autorunner/bootstrap.py +26 -5
  2. codex_autorunner/core/config.py +176 -59
  3. codex_autorunner/core/filesystem.py +24 -0
  4. codex_autorunner/core/flows/controller.py +50 -12
  5. codex_autorunner/core/flows/runtime.py +8 -3
  6. codex_autorunner/core/hub.py +293 -16
  7. codex_autorunner/core/lifecycle_events.py +44 -5
  8. codex_autorunner/core/pma_delivery.py +81 -0
  9. codex_autorunner/core/pma_dispatches.py +224 -0
  10. codex_autorunner/core/pma_lane_worker.py +122 -0
  11. codex_autorunner/core/pma_queue.py +167 -18
  12. codex_autorunner/core/pma_reactive.py +91 -0
  13. codex_autorunner/core/pma_safety.py +58 -0
  14. codex_autorunner/core/pma_sink.py +104 -0
  15. codex_autorunner/core/pma_transcripts.py +183 -0
  16. codex_autorunner/core/safe_paths.py +117 -0
  17. codex_autorunner/housekeeping.py +77 -23
  18. codex_autorunner/integrations/agents/codex_backend.py +18 -12
  19. codex_autorunner/integrations/agents/wiring.py +2 -0
  20. codex_autorunner/integrations/app_server/client.py +31 -0
  21. codex_autorunner/integrations/app_server/supervisor.py +3 -0
  22. codex_autorunner/integrations/telegram/constants.py +1 -1
  23. codex_autorunner/integrations/telegram/handlers/commands/execution.py +16 -15
  24. codex_autorunner/integrations/telegram/handlers/commands/files.py +5 -8
  25. codex_autorunner/integrations/telegram/handlers/commands/github.py +10 -6
  26. codex_autorunner/integrations/telegram/handlers/commands/shared.py +9 -8
  27. codex_autorunner/integrations/telegram/handlers/commands/workspace.py +85 -2
  28. codex_autorunner/integrations/telegram/handlers/commands_runtime.py +29 -8
  29. codex_autorunner/integrations/telegram/helpers.py +30 -2
  30. codex_autorunner/integrations/telegram/ticket_flow_bridge.py +54 -3
  31. codex_autorunner/static/docChatCore.js +2 -0
  32. codex_autorunner/static/hub.js +59 -0
  33. codex_autorunner/static/index.html +70 -54
  34. codex_autorunner/static/notificationBell.js +173 -0
  35. codex_autorunner/static/notifications.js +154 -36
  36. codex_autorunner/static/pma.js +96 -35
  37. codex_autorunner/static/styles.css +415 -4
  38. codex_autorunner/static/utils.js +5 -1
  39. codex_autorunner/surfaces/cli/cli.py +206 -129
  40. codex_autorunner/surfaces/cli/template_repos.py +157 -0
  41. codex_autorunner/surfaces/web/app.py +193 -5
  42. codex_autorunner/surfaces/web/routes/file_chat.py +109 -61
  43. codex_autorunner/surfaces/web/routes/flows.py +125 -67
  44. codex_autorunner/surfaces/web/routes/pma.py +638 -57
  45. codex_autorunner/tickets/agent_pool.py +6 -1
  46. codex_autorunner/tickets/outbox.py +27 -14
  47. codex_autorunner/tickets/replies.py +4 -10
  48. codex_autorunner/tickets/runner.py +1 -0
  49. codex_autorunner/workspace/paths.py +8 -3
  50. {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/METADATA +1 -1
  51. {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/RECORD +55 -45
  52. {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/WHEEL +0 -0
  53. {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/entry_points.txt +0 -0
  54. {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/licenses/LICENSE +0 -0
  55. {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/top_level.txt +0 -0
@@ -141,6 +141,45 @@ class PmaSafetyChecker:
141
141
 
142
142
  return SafetyCheckResult(allowed=True)
143
143
 
144
+ def check_reactive_turn(self, *, key: str = "reactive") -> SafetyCheckResult:
145
+ if self._is_circuit_breaker_active():
146
+ return SafetyCheckResult(
147
+ allowed=False,
148
+ reason="circuit_breaker_active",
149
+ details={
150
+ "cooldown_remaining_seconds": (
151
+ int(
152
+ self._circuit_breaker_until
153
+ - datetime.now(timezone.utc).timestamp()
154
+ )
155
+ if self._circuit_breaker_until
156
+ else 0
157
+ )
158
+ },
159
+ )
160
+
161
+ if self._config.enable_rate_limit:
162
+ now = datetime.now(timezone.utc).timestamp()
163
+ self._action_timestamps[key] = [
164
+ ts
165
+ for ts in self._action_timestamps[key]
166
+ if now - ts < self._config.rate_limit_window_seconds
167
+ ]
168
+ if len(self._action_timestamps[key]) >= self._config.max_actions_per_window:
169
+ return SafetyCheckResult(
170
+ allowed=False,
171
+ reason="rate_limit_exceeded",
172
+ details={
173
+ "key": key,
174
+ "count": len(self._action_timestamps[key]),
175
+ "max_allowed": self._config.max_actions_per_window,
176
+ "window_seconds": self._config.rate_limit_window_seconds,
177
+ },
178
+ )
179
+ self._action_timestamps[key].append(now)
180
+
181
+ return SafetyCheckResult(allowed=True)
182
+
144
183
  def record_chat_result(
145
184
  self,
146
185
  agent: str,
@@ -159,6 +198,25 @@ class PmaSafetyChecker:
159
198
  key = f"chat:{agent}"
160
199
  self._failure_counts[key] = 0
161
200
 
201
+ def record_reactive_result(
202
+ self,
203
+ *,
204
+ status: str,
205
+ error: Optional[str] = None,
206
+ key: str = "reactive",
207
+ ) -> None:
208
+ if (
209
+ status in ("error", "failed", "interrupted")
210
+ and self._config.enable_circuit_breaker
211
+ ):
212
+ self._failure_counts[key] += 1
213
+ if self._failure_counts[key] >= self._config.circuit_breaker_threshold:
214
+ self._activate_circuit_breaker()
215
+ if error:
216
+ logger.warning("PMA reactive circuit breaker error: %s", error)
217
+ else:
218
+ self._failure_counts[key] = 0
219
+
162
220
  def record_action(
163
221
  self,
164
222
  action_type: PmaActionType,
@@ -0,0 +1,104 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Any, Optional
7
+
8
+ from .locks import file_lock
9
+ from .time_utils import now_iso
10
+ from .utils import atomic_write
11
+
12
+ PMA_ACTIVE_SINK_FILENAME = "active_sink.json"
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class PmaActiveSinkStore:
18
+ def __init__(self, hub_root: Path) -> None:
19
+ self._path = hub_root / ".codex-autorunner" / "pma" / PMA_ACTIVE_SINK_FILENAME
20
+
21
+ def _lock_path(self) -> Path:
22
+ return self._path.with_suffix(self._path.suffix + ".lock")
23
+
24
+ def load(self) -> Optional[dict[str, Any]]:
25
+ with file_lock(self._lock_path()):
26
+ return self._load_unlocked()
27
+
28
+ def set_web(self) -> dict[str, Any]:
29
+ payload = {
30
+ "version": 1,
31
+ "kind": "web",
32
+ "updated_at": now_iso(),
33
+ "last_delivery_turn_id": None,
34
+ }
35
+ with file_lock(self._lock_path()):
36
+ self._save_unlocked(payload)
37
+ return payload
38
+
39
+ def set_telegram(
40
+ self,
41
+ *,
42
+ chat_id: int,
43
+ thread_id: Optional[int],
44
+ topic_key: Optional[str] = None,
45
+ ) -> dict[str, Any]:
46
+ payload: dict[str, Any] = {
47
+ "version": 1,
48
+ "kind": "telegram",
49
+ "chat_id": int(chat_id),
50
+ "thread_id": int(thread_id) if thread_id is not None else None,
51
+ "updated_at": now_iso(),
52
+ "last_delivery_turn_id": None,
53
+ }
54
+ if topic_key:
55
+ payload["topic_key"] = topic_key
56
+ with file_lock(self._lock_path()):
57
+ self._save_unlocked(payload)
58
+ return payload
59
+
60
+ def clear(self) -> None:
61
+ with file_lock(self._lock_path()):
62
+ try:
63
+ self._path.unlink()
64
+ except FileNotFoundError:
65
+ return
66
+ except OSError as exc:
67
+ logger.warning("Failed to clear PMA active sink: %s", exc)
68
+
69
+ def mark_delivered(self, turn_id: str) -> bool:
70
+ if not isinstance(turn_id, str) or not turn_id:
71
+ return False
72
+ with file_lock(self._lock_path()):
73
+ payload = self._load_unlocked()
74
+ if not isinstance(payload, dict):
75
+ return False
76
+ if payload.get("last_delivery_turn_id") == turn_id:
77
+ return False
78
+ payload["last_delivery_turn_id"] = turn_id
79
+ payload["updated_at"] = now_iso()
80
+ self._save_unlocked(payload)
81
+ return True
82
+
83
+ def _load_unlocked(self) -> Optional[dict[str, Any]]:
84
+ if not self._path.exists():
85
+ return None
86
+ try:
87
+ raw = self._path.read_text(encoding="utf-8")
88
+ except OSError as exc:
89
+ logger.warning("Failed to read PMA active sink: %s", exc)
90
+ return None
91
+ try:
92
+ payload = json.loads(raw)
93
+ except json.JSONDecodeError:
94
+ return None
95
+ if not isinstance(payload, dict):
96
+ return None
97
+ return payload
98
+
99
+ def _save_unlocked(self, payload: dict[str, Any]) -> None:
100
+ self._path.parent.mkdir(parents=True, exist_ok=True)
101
+ atomic_write(self._path, json.dumps(payload, indent=2) + "\n")
102
+
103
+
104
+ __all__ = ["PmaActiveSinkStore", "PMA_ACTIVE_SINK_FILENAME"]
@@ -0,0 +1,183 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import re
6
+ from dataclasses import dataclass
7
+ from datetime import datetime, timezone
8
+ from pathlib import Path
9
+ from typing import Any, Optional
10
+
11
+ from .time_utils import now_iso
12
+ from .utils import atomic_write
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ PMA_TRANSCRIPTS_DIRNAME = "transcripts"
17
+ PMA_TRANSCRIPT_VERSION = 1
18
+ PMA_TRANSCRIPT_PREVIEW_CHARS = 400
19
+
20
+
21
+ def default_pma_transcripts_dir(hub_root: Path) -> Path:
22
+ return hub_root / ".codex-autorunner" / "pma" / PMA_TRANSCRIPTS_DIRNAME
23
+
24
+
25
+ def _safe_segment(value: str) -> str:
26
+ cleaned = re.sub(r"[^A-Za-z0-9._-]+", "-", (value or "").strip())
27
+ cleaned = cleaned.strip("-._")
28
+ if not cleaned:
29
+ return "unknown"
30
+ return cleaned[:120]
31
+
32
+
33
+ def _stamp_now() -> str:
34
+ return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
35
+
36
+
37
+ def _read_preview(path: Path) -> str:
38
+ if not path.exists():
39
+ return ""
40
+ try:
41
+ with open(path, "r", encoding="utf-8") as handle:
42
+ text = handle.read(PMA_TRANSCRIPT_PREVIEW_CHARS + 1)
43
+ except OSError as exc:
44
+ logger.warning("Failed to read PMA transcript content at %s: %s", path, exc)
45
+ return ""
46
+ text = text.strip()
47
+ if len(text) <= PMA_TRANSCRIPT_PREVIEW_CHARS:
48
+ return text
49
+ return text[:PMA_TRANSCRIPT_PREVIEW_CHARS].rstrip() + "..."
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class PmaTranscriptPointer:
54
+ turn_id: str
55
+ metadata_path: str
56
+ content_path: str
57
+ created_at: str
58
+
59
+
60
+ class PmaTranscriptStore:
61
+ def __init__(self, hub_root: Path) -> None:
62
+ self._root = hub_root
63
+ self._dir = default_pma_transcripts_dir(hub_root)
64
+
65
+ @property
66
+ def dir(self) -> Path:
67
+ return self._dir
68
+
69
+ def write_transcript(
70
+ self,
71
+ *,
72
+ turn_id: str,
73
+ metadata: dict[str, Any],
74
+ assistant_text: str,
75
+ ) -> PmaTranscriptPointer:
76
+ safe_turn_id = _safe_segment(turn_id)
77
+ stamp = _stamp_now()
78
+ base = f"{stamp}_{safe_turn_id}"
79
+ json_path = self._dir / f"{base}.json"
80
+ md_path = self._dir / f"{base}.md"
81
+
82
+ payload = dict(metadata)
83
+ payload.setdefault("version", PMA_TRANSCRIPT_VERSION)
84
+ payload.setdefault("turn_id", turn_id)
85
+ payload.setdefault("created_at", now_iso())
86
+ payload["metadata_path"] = str(json_path)
87
+ payload["content_path"] = str(md_path)
88
+ payload["assistant_text_chars"] = len(assistant_text or "")
89
+
90
+ self._dir.mkdir(parents=True, exist_ok=True)
91
+ atomic_write(md_path, (assistant_text or "") + "\n")
92
+ atomic_write(json_path, json.dumps(payload, indent=2) + "\n")
93
+
94
+ return PmaTranscriptPointer(
95
+ turn_id=turn_id,
96
+ metadata_path=str(json_path),
97
+ content_path=str(md_path),
98
+ created_at=payload["created_at"],
99
+ )
100
+
101
+ def list_recent(self, *, limit: int = 50) -> list[dict[str, Any]]:
102
+ if limit <= 0:
103
+ return []
104
+ if not self._dir.exists():
105
+ return []
106
+ entries: list[dict[str, Any]] = []
107
+ for path in sorted(self._dir.glob("*.json"), reverse=True):
108
+ try:
109
+ raw = path.read_text(encoding="utf-8")
110
+ data = json.loads(raw)
111
+ except (OSError, json.JSONDecodeError) as exc:
112
+ logger.warning(
113
+ "Failed to read PMA transcript metadata at %s: %s", path, exc
114
+ )
115
+ continue
116
+ if not isinstance(data, dict):
117
+ continue
118
+ content_path = Path(str(data.get("content_path") or ""))
119
+ if not content_path.is_absolute():
120
+ content_path = (path.parent / content_path).resolve()
121
+ data = dict(data)
122
+ data["preview"] = _read_preview(content_path)
123
+ entries.append(data)
124
+ if len(entries) >= limit:
125
+ break
126
+ return entries
127
+
128
+ def read_transcript(self, turn_id: str) -> Optional[dict[str, Any]]:
129
+ match = self._find_metadata(turn_id)
130
+ if not match:
131
+ return None
132
+ meta, meta_path = match
133
+ content_path = Path(str(meta.get("content_path") or ""))
134
+ if not content_path.is_absolute():
135
+ content_path = (meta_path.parent / content_path).resolve()
136
+ try:
137
+ content = content_path.read_text(encoding="utf-8")
138
+ except OSError as exc:
139
+ logger.warning(
140
+ "Failed to read PMA transcript content at %s: %s", content_path, exc
141
+ )
142
+ content = ""
143
+ return {"metadata": meta, "content": content}
144
+
145
+ def _find_metadata(self, turn_id: str) -> Optional[tuple[dict[str, Any], Path]]:
146
+ if not self._dir.exists():
147
+ return None
148
+ safe_turn_id = _safe_segment(turn_id)
149
+ candidates = sorted(self._dir.glob(f"*_{safe_turn_id}.json"), reverse=True)
150
+ for path in candidates:
151
+ meta = self._read_metadata(path)
152
+ if meta and str(meta.get("turn_id")) == turn_id:
153
+ return meta, path
154
+ if candidates:
155
+ meta = self._read_metadata(candidates[0])
156
+ if meta:
157
+ return meta, candidates[0]
158
+ for path in sorted(self._dir.glob("*.json"), reverse=True):
159
+ meta = self._read_metadata(path)
160
+ if meta and str(meta.get("turn_id")) == turn_id:
161
+ return meta, path
162
+ return None
163
+
164
+ def _read_metadata(self, path: Path) -> Optional[dict[str, Any]]:
165
+ try:
166
+ raw = path.read_text(encoding="utf-8")
167
+ data = json.loads(raw)
168
+ except (OSError, json.JSONDecodeError) as exc:
169
+ logger.warning(
170
+ "Failed to read PMA transcript metadata at %s: %s", path, exc
171
+ )
172
+ return None
173
+ return data if isinstance(data, dict) else None
174
+
175
+
176
+ __all__ = [
177
+ "PMA_TRANSCRIPTS_DIRNAME",
178
+ "PMA_TRANSCRIPT_PREVIEW_CHARS",
179
+ "PMA_TRANSCRIPT_VERSION",
180
+ "PmaTranscriptPointer",
181
+ "PmaTranscriptStore",
182
+ "default_pma_transcripts_dir",
183
+ ]
@@ -0,0 +1,117 @@
1
+ """Safe path validation utilities for web endpoints.
2
+
3
+ This module provides utilities for validating user-controlled paths to prevent
4
+ directory traversal attacks and other path-based security issues.
5
+ """
6
+
7
+ from pathlib import PurePosixPath
8
+ from typing import Optional
9
+
10
+
11
+ class SafePathError(Exception):
12
+ """Raised when a path fails safety validation."""
13
+
14
+ def __init__(self, message: str, path: Optional[str] = None) -> None:
15
+ super().__init__(message)
16
+ self.path = path
17
+
18
+
19
+ def validate_relative_posix_path(raw: str) -> PurePosixPath:
20
+ """Validate a user-provided path string and return a PurePosixPath.
21
+
22
+ This function validates that:
23
+ 1. The path is not absolute
24
+ 2. The path does not contain '..' segments (parent directory traversal)
25
+ 3. The path does not contain backslashes (Windows separators)
26
+ 4. The path is not empty, '.', or only slashes
27
+
28
+ Args:
29
+ raw: The user-provided path string (typically from a URL path parameter)
30
+
31
+ Returns:
32
+ A validated PurePosixPath object
33
+
34
+ Raises:
35
+ SafePathError: If the path fails validation
36
+
37
+ Examples:
38
+ >>> validate_relative_posix_path("file.txt")
39
+ PurePosixPath('file.txt')
40
+
41
+ >>> validate_relative_posix_path("a/b/c.txt")
42
+ PurePosixPath('a/b/c.txt')
43
+
44
+ >>> validate_relative_posix_path("../etc/passwd")
45
+ SafePathError: Invalid path: '..' not allowed
46
+
47
+ >>> validate_relative_posix_path("/etc/passwd")
48
+ SafePathError: Absolute paths not allowed
49
+ """
50
+ if not raw or raw.strip() == "" or raw == ".":
51
+ raise SafePathError("Invalid path: empty or '.'", path=raw)
52
+
53
+ # Reject backslashes early (Windows separators)
54
+ if "\\" in raw:
55
+ raise SafePathError("Invalid path: backslashes not allowed", path=raw)
56
+
57
+ # Reject '..' in the raw path before PurePosixPath normalizes it
58
+ # We need to check the raw string because PurePosixPath("a/../b")
59
+ # normalizes to "b", which would bypass the later parts check
60
+ if ".." in raw:
61
+ raise SafePathError("Invalid path: '..' not allowed", path=raw)
62
+
63
+ # Parse with PurePosixPath to ensure POSIX semantics
64
+ try:
65
+ file_rel = PurePosixPath(raw)
66
+ except Exception as exc:
67
+ raise SafePathError(f"Invalid path: {exc}", path=raw) from exc
68
+
69
+ # Reject absolute paths
70
+ if file_rel.is_absolute():
71
+ raise SafePathError("Absolute paths not allowed", path=raw)
72
+
73
+ # Double-check '..' traversal segments after parsing (for edge cases)
74
+ if ".." in file_rel.parts:
75
+ raise SafePathError("Invalid path: '..' not allowed", path=raw)
76
+
77
+ return file_rel
78
+
79
+
80
+ def validate_single_filename(raw: str) -> str:
81
+ """Validate that a path string represents only a single filename (no subpaths).
82
+
83
+ This is a stricter version of validate_relative_posix_path that only allows
84
+ a single filename component, not subdirectories.
85
+
86
+ Args:
87
+ raw: The user-provided path string
88
+
89
+ Returns:
90
+ The validated filename
91
+
92
+ Raises:
93
+ SafePathError: If the path contains slashes or is otherwise invalid
94
+
95
+ Examples:
96
+ >>> validate_single_filename("file.txt")
97
+ 'file.txt'
98
+
99
+ >>> validate_single_filename("a/b.txt")
100
+ SafePathError: Subpaths not allowed: only single filenames permitted
101
+
102
+ >>> validate_single_filename("../etc/passwd")
103
+ SafePathError: Subpaths not allowed: only single filenames permitted
104
+ """
105
+ file_rel = validate_relative_posix_path(raw)
106
+
107
+ # Ensure only a single component (no subpaths)
108
+ if len(file_rel.parts) != 1:
109
+ raise SafePathError(
110
+ "Subpaths not allowed: only single filenames permitted", path=raw
111
+ )
112
+
113
+ # Return the string representation of the filename
114
+ return str(file_rel)
115
+
116
+
117
+ __all__ = ["SafePathError", "validate_relative_posix_path", "validate_single_filename"]
@@ -8,6 +8,8 @@ from collections import deque
8
8
  from pathlib import Path
9
9
  from typing import Any, Iterable, Optional, Protocol, cast
10
10
 
11
+ _MAX_ERROR_SAMPLES = 5
12
+
11
13
 
12
14
  @dataclasses.dataclass(frozen=True)
13
15
  class HousekeepingRule:
@@ -42,6 +44,7 @@ class HousekeepingRuleResult:
42
44
  deleted_bytes: int = 0
43
45
  truncated_bytes: int = 0
44
46
  errors: int = 0
47
+ error_samples: list[str] = dataclasses.field(default_factory=list)
45
48
  duration_ms: int = 0
46
49
 
47
50
 
@@ -136,21 +139,26 @@ def run_housekeeping_once(
136
139
  continue
137
140
  results.append(result)
138
141
  if logger is not None:
142
+ log_fields: dict[str, Any] = {
143
+ "name": result.name,
144
+ "kind": result.kind,
145
+ "scanned_count": result.scanned_count,
146
+ "eligible_count": result.eligible_count,
147
+ "deleted_count": result.deleted_count,
148
+ "deleted_bytes": result.deleted_bytes,
149
+ "truncated_bytes": result.truncated_bytes,
150
+ "errors": result.errors,
151
+ "duration_ms": result.duration_ms,
152
+ "dry_run": config.dry_run,
153
+ "root": str(root),
154
+ }
155
+ if result.errors > 0 and result.error_samples:
156
+ log_fields["error_samples"] = result.error_samples
139
157
  _log_event(
140
158
  logger,
141
159
  logging.INFO,
142
160
  "housekeeping.rule",
143
- name=result.name,
144
- kind=result.kind,
145
- scanned_count=result.scanned_count,
146
- eligible_count=result.eligible_count,
147
- deleted_count=result.deleted_count,
148
- deleted_bytes=result.deleted_bytes,
149
- truncated_bytes=result.truncated_bytes,
150
- errors=result.errors,
151
- duration_ms=result.duration_ms,
152
- dry_run=config.dry_run,
153
- root=str(root),
161
+ **log_fields,
154
162
  )
155
163
  if logger is not None:
156
164
  _log_event(
@@ -174,7 +182,7 @@ def _apply_directory_rule(
174
182
  return result
175
183
  now = time.time()
176
184
  min_age = max(config.min_file_age_seconds, 0)
177
- files = _collect_files(base, rule)
185
+ files = _collect_files(base, rule, result)
178
186
  result.scanned_count = len(files)
179
187
  if not files:
180
188
  result.duration_ms = int((time.monotonic() - start) * 1000)
@@ -192,8 +200,9 @@ def _apply_directory_rule(
192
200
  if not config.dry_run:
193
201
  try:
194
202
  entry.path.unlink()
195
- except OSError:
203
+ except OSError as e:
196
204
  errors += 1
205
+ _add_error_sample(result, "unlink", entry.path, e)
197
206
  return
198
207
  deleted.add(entry.path)
199
208
  deleted_bytes += entry.size
@@ -254,8 +263,9 @@ def _apply_file_rule(
254
263
  return result
255
264
  try:
256
265
  stat = path.stat()
257
- except OSError:
266
+ except OSError as e:
258
267
  result.errors = 1
268
+ _add_error_sample(result, "stat", path, e)
259
269
  return result
260
270
  if not path.is_file():
261
271
  return result
@@ -271,6 +281,7 @@ def _apply_file_rule(
271
281
  path,
272
282
  rule.max_lines,
273
283
  dry_run=config.dry_run,
284
+ result=result,
274
285
  )
275
286
  result.truncated_bytes += truncated
276
287
  if rule.max_bytes is not None:
@@ -278,13 +289,16 @@ def _apply_file_rule(
278
289
  path,
279
290
  rule.max_bytes,
280
291
  dry_run=config.dry_run,
292
+ result=result,
281
293
  )
282
294
  result.truncated_bytes += truncated
283
295
  result.duration_ms = int((time.monotonic() - start) * 1000)
284
296
  return result
285
297
 
286
298
 
287
- def _collect_files(base: Path, rule: HousekeepingRule) -> list[_FileInfo]:
299
+ def _collect_files(
300
+ base: Path, rule: HousekeepingRule, result: Optional[HousekeepingRuleResult] = None
301
+ ) -> list[_FileInfo]:
288
302
  results: list[_FileInfo] = []
289
303
  glob_pattern = rule.glob or "*"
290
304
  iterator = base.rglob(glob_pattern) if rule.recursive else base.glob(glob_pattern)
@@ -293,7 +307,9 @@ def _collect_files(base: Path, rule: HousekeepingRule) -> list[_FileInfo]:
293
307
  if not path.is_file():
294
308
  continue
295
309
  stat = path.stat()
296
- except OSError:
310
+ except OSError as e:
311
+ if result is not None:
312
+ _add_error_sample(result, "stat", path, e)
297
313
  continue
298
314
  results.append(_FileInfo(path=path, size=stat.st_size, mtime=stat.st_mtime))
299
315
  return results
@@ -310,12 +326,21 @@ def _is_absolute_path(path: str) -> bool:
310
326
  return Path(path).expanduser().is_absolute()
311
327
 
312
328
 
313
- def _truncate_bytes(path: Path, max_bytes: int, *, dry_run: bool) -> int:
329
+ def _truncate_bytes(
330
+ path: Path,
331
+ max_bytes: int,
332
+ *,
333
+ dry_run: bool,
334
+ result: Optional[HousekeepingRuleResult] = None,
335
+ ) -> int:
314
336
  if max_bytes <= 0:
315
337
  return 0
316
338
  try:
317
339
  size = path.stat().st_size
318
- except OSError:
340
+ except OSError as e:
341
+ if result is not None:
342
+ result.errors += 1
343
+ _add_error_sample(result, "truncate_bytes", path, e)
319
344
  return 0
320
345
  if size <= max_bytes:
321
346
  return 0
@@ -328,23 +353,38 @@ def _truncate_bytes(path: Path, max_bytes: int, *, dry_run: bool) -> int:
328
353
  payload = handle.read()
329
354
  _atomic_write_bytes(path, payload)
330
355
  return truncated
331
- except OSError:
356
+ except OSError as e:
357
+ if result is not None:
358
+ result.errors += 1
359
+ _add_error_sample(result, "truncate_bytes", path, e)
332
360
  return 0
333
361
 
334
362
 
335
- def _truncate_lines(path: Path, max_lines: int, *, dry_run: bool) -> int:
363
+ def _truncate_lines(
364
+ path: Path,
365
+ max_lines: int,
366
+ *,
367
+ dry_run: bool,
368
+ result: Optional[HousekeepingRuleResult] = None,
369
+ ) -> int:
336
370
  if max_lines <= 0:
337
371
  return 0
338
372
  try:
339
373
  size = path.stat().st_size
340
- except OSError:
374
+ except OSError as e:
375
+ if result is not None:
376
+ result.errors += 1
377
+ _add_error_sample(result, "truncate_lines", path, e)
341
378
  return 0
342
379
  lines: deque[bytes] = deque(maxlen=max_lines)
343
380
  try:
344
381
  with path.open("rb") as handle:
345
382
  for line in handle:
346
383
  lines.append(line)
347
- except OSError:
384
+ except OSError as e:
385
+ if result is not None:
386
+ result.errors += 1
387
+ _add_error_sample(result, "truncate_lines", path, e)
348
388
  return 0
349
389
  payload = b"".join(lines)
350
390
  if len(payload) >= size:
@@ -353,7 +393,10 @@ def _truncate_lines(path: Path, max_lines: int, *, dry_run: bool) -> int:
353
393
  return size - len(payload)
354
394
  try:
355
395
  _atomic_write_bytes(path, payload)
356
- except OSError:
396
+ except OSError as e:
397
+ if result is not None:
398
+ result.errors += 1
399
+ _add_error_sample(result, "truncate_lines", path, e)
357
400
  return 0
358
401
  return size - len(payload)
359
402
 
@@ -379,6 +422,17 @@ def _prune_empty_dirs(base: Path) -> None:
379
422
  continue
380
423
 
381
424
 
425
+ def _add_error_sample(
426
+ result: HousekeepingRuleResult, operation: str, path: Path, exc: OSError
427
+ ) -> None:
428
+ if len(result.error_samples) >= _MAX_ERROR_SAMPLES:
429
+ return
430
+ exc_info = f"{type(exc).__name__}"
431
+ if exc.strerror:
432
+ exc_info += f": {exc.strerror}"
433
+ result.error_samples.append(f"{operation} {path}: {exc_info}")
434
+
435
+
382
436
  def _int_or_none(value: object) -> Optional[int]:
383
437
  if value is None:
384
438
  return None