codex-autorunner 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codex_autorunner/bootstrap.py +26 -5
- codex_autorunner/core/config.py +176 -59
- codex_autorunner/core/filesystem.py +24 -0
- codex_autorunner/core/flows/controller.py +50 -12
- codex_autorunner/core/flows/runtime.py +8 -3
- codex_autorunner/core/hub.py +293 -16
- codex_autorunner/core/lifecycle_events.py +44 -5
- codex_autorunner/core/pma_delivery.py +81 -0
- codex_autorunner/core/pma_dispatches.py +224 -0
- codex_autorunner/core/pma_lane_worker.py +122 -0
- codex_autorunner/core/pma_queue.py +167 -18
- codex_autorunner/core/pma_reactive.py +91 -0
- codex_autorunner/core/pma_safety.py +58 -0
- codex_autorunner/core/pma_sink.py +104 -0
- codex_autorunner/core/pma_transcripts.py +183 -0
- codex_autorunner/core/safe_paths.py +117 -0
- codex_autorunner/housekeeping.py +77 -23
- codex_autorunner/integrations/agents/codex_backend.py +18 -12
- codex_autorunner/integrations/agents/wiring.py +2 -0
- codex_autorunner/integrations/app_server/client.py +31 -0
- codex_autorunner/integrations/app_server/supervisor.py +3 -0
- codex_autorunner/integrations/telegram/constants.py +1 -1
- codex_autorunner/integrations/telegram/handlers/commands/execution.py +16 -15
- codex_autorunner/integrations/telegram/handlers/commands/files.py +5 -8
- codex_autorunner/integrations/telegram/handlers/commands/github.py +10 -6
- codex_autorunner/integrations/telegram/handlers/commands/shared.py +9 -8
- codex_autorunner/integrations/telegram/handlers/commands/workspace.py +85 -2
- codex_autorunner/integrations/telegram/handlers/commands_runtime.py +29 -8
- codex_autorunner/integrations/telegram/helpers.py +30 -2
- codex_autorunner/integrations/telegram/ticket_flow_bridge.py +54 -3
- codex_autorunner/static/docChatCore.js +2 -0
- codex_autorunner/static/hub.js +59 -0
- codex_autorunner/static/index.html +70 -54
- codex_autorunner/static/notificationBell.js +173 -0
- codex_autorunner/static/notifications.js +154 -36
- codex_autorunner/static/pma.js +96 -35
- codex_autorunner/static/styles.css +415 -4
- codex_autorunner/static/utils.js +5 -1
- codex_autorunner/surfaces/cli/cli.py +206 -129
- codex_autorunner/surfaces/cli/template_repos.py +157 -0
- codex_autorunner/surfaces/web/app.py +193 -5
- codex_autorunner/surfaces/web/routes/file_chat.py +109 -61
- codex_autorunner/surfaces/web/routes/flows.py +125 -67
- codex_autorunner/surfaces/web/routes/pma.py +638 -57
- codex_autorunner/tickets/agent_pool.py +6 -1
- codex_autorunner/tickets/outbox.py +27 -14
- codex_autorunner/tickets/replies.py +4 -10
- codex_autorunner/tickets/runner.py +1 -0
- codex_autorunner/workspace/paths.py +8 -3
- {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/METADATA +1 -1
- {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/RECORD +55 -45
- {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/WHEEL +0 -0
- {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/entry_points.txt +0 -0
- {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/licenses/LICENSE +0 -0
- {codex_autorunner-1.2.1.dist-info → codex_autorunner-1.3.0.dist-info}/top_level.txt +0 -0
|
@@ -141,6 +141,45 @@ class PmaSafetyChecker:
|
|
|
141
141
|
|
|
142
142
|
return SafetyCheckResult(allowed=True)
|
|
143
143
|
|
|
144
|
+
def check_reactive_turn(self, *, key: str = "reactive") -> SafetyCheckResult:
|
|
145
|
+
if self._is_circuit_breaker_active():
|
|
146
|
+
return SafetyCheckResult(
|
|
147
|
+
allowed=False,
|
|
148
|
+
reason="circuit_breaker_active",
|
|
149
|
+
details={
|
|
150
|
+
"cooldown_remaining_seconds": (
|
|
151
|
+
int(
|
|
152
|
+
self._circuit_breaker_until
|
|
153
|
+
- datetime.now(timezone.utc).timestamp()
|
|
154
|
+
)
|
|
155
|
+
if self._circuit_breaker_until
|
|
156
|
+
else 0
|
|
157
|
+
)
|
|
158
|
+
},
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if self._config.enable_rate_limit:
|
|
162
|
+
now = datetime.now(timezone.utc).timestamp()
|
|
163
|
+
self._action_timestamps[key] = [
|
|
164
|
+
ts
|
|
165
|
+
for ts in self._action_timestamps[key]
|
|
166
|
+
if now - ts < self._config.rate_limit_window_seconds
|
|
167
|
+
]
|
|
168
|
+
if len(self._action_timestamps[key]) >= self._config.max_actions_per_window:
|
|
169
|
+
return SafetyCheckResult(
|
|
170
|
+
allowed=False,
|
|
171
|
+
reason="rate_limit_exceeded",
|
|
172
|
+
details={
|
|
173
|
+
"key": key,
|
|
174
|
+
"count": len(self._action_timestamps[key]),
|
|
175
|
+
"max_allowed": self._config.max_actions_per_window,
|
|
176
|
+
"window_seconds": self._config.rate_limit_window_seconds,
|
|
177
|
+
},
|
|
178
|
+
)
|
|
179
|
+
self._action_timestamps[key].append(now)
|
|
180
|
+
|
|
181
|
+
return SafetyCheckResult(allowed=True)
|
|
182
|
+
|
|
144
183
|
def record_chat_result(
|
|
145
184
|
self,
|
|
146
185
|
agent: str,
|
|
@@ -159,6 +198,25 @@ class PmaSafetyChecker:
|
|
|
159
198
|
key = f"chat:{agent}"
|
|
160
199
|
self._failure_counts[key] = 0
|
|
161
200
|
|
|
201
|
+
def record_reactive_result(
|
|
202
|
+
self,
|
|
203
|
+
*,
|
|
204
|
+
status: str,
|
|
205
|
+
error: Optional[str] = None,
|
|
206
|
+
key: str = "reactive",
|
|
207
|
+
) -> None:
|
|
208
|
+
if (
|
|
209
|
+
status in ("error", "failed", "interrupted")
|
|
210
|
+
and self._config.enable_circuit_breaker
|
|
211
|
+
):
|
|
212
|
+
self._failure_counts[key] += 1
|
|
213
|
+
if self._failure_counts[key] >= self._config.circuit_breaker_threshold:
|
|
214
|
+
self._activate_circuit_breaker()
|
|
215
|
+
if error:
|
|
216
|
+
logger.warning("PMA reactive circuit breaker error: %s", error)
|
|
217
|
+
else:
|
|
218
|
+
self._failure_counts[key] = 0
|
|
219
|
+
|
|
162
220
|
def record_action(
|
|
163
221
|
self,
|
|
164
222
|
action_type: PmaActionType,
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Optional
|
|
7
|
+
|
|
8
|
+
from .locks import file_lock
|
|
9
|
+
from .time_utils import now_iso
|
|
10
|
+
from .utils import atomic_write
|
|
11
|
+
|
|
12
|
+
PMA_ACTIVE_SINK_FILENAME = "active_sink.json"
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PmaActiveSinkStore:
|
|
18
|
+
def __init__(self, hub_root: Path) -> None:
|
|
19
|
+
self._path = hub_root / ".codex-autorunner" / "pma" / PMA_ACTIVE_SINK_FILENAME
|
|
20
|
+
|
|
21
|
+
def _lock_path(self) -> Path:
|
|
22
|
+
return self._path.with_suffix(self._path.suffix + ".lock")
|
|
23
|
+
|
|
24
|
+
def load(self) -> Optional[dict[str, Any]]:
|
|
25
|
+
with file_lock(self._lock_path()):
|
|
26
|
+
return self._load_unlocked()
|
|
27
|
+
|
|
28
|
+
def set_web(self) -> dict[str, Any]:
|
|
29
|
+
payload = {
|
|
30
|
+
"version": 1,
|
|
31
|
+
"kind": "web",
|
|
32
|
+
"updated_at": now_iso(),
|
|
33
|
+
"last_delivery_turn_id": None,
|
|
34
|
+
}
|
|
35
|
+
with file_lock(self._lock_path()):
|
|
36
|
+
self._save_unlocked(payload)
|
|
37
|
+
return payload
|
|
38
|
+
|
|
39
|
+
def set_telegram(
|
|
40
|
+
self,
|
|
41
|
+
*,
|
|
42
|
+
chat_id: int,
|
|
43
|
+
thread_id: Optional[int],
|
|
44
|
+
topic_key: Optional[str] = None,
|
|
45
|
+
) -> dict[str, Any]:
|
|
46
|
+
payload: dict[str, Any] = {
|
|
47
|
+
"version": 1,
|
|
48
|
+
"kind": "telegram",
|
|
49
|
+
"chat_id": int(chat_id),
|
|
50
|
+
"thread_id": int(thread_id) if thread_id is not None else None,
|
|
51
|
+
"updated_at": now_iso(),
|
|
52
|
+
"last_delivery_turn_id": None,
|
|
53
|
+
}
|
|
54
|
+
if topic_key:
|
|
55
|
+
payload["topic_key"] = topic_key
|
|
56
|
+
with file_lock(self._lock_path()):
|
|
57
|
+
self._save_unlocked(payload)
|
|
58
|
+
return payload
|
|
59
|
+
|
|
60
|
+
def clear(self) -> None:
|
|
61
|
+
with file_lock(self._lock_path()):
|
|
62
|
+
try:
|
|
63
|
+
self._path.unlink()
|
|
64
|
+
except FileNotFoundError:
|
|
65
|
+
return
|
|
66
|
+
except OSError as exc:
|
|
67
|
+
logger.warning("Failed to clear PMA active sink: %s", exc)
|
|
68
|
+
|
|
69
|
+
def mark_delivered(self, turn_id: str) -> bool:
|
|
70
|
+
if not isinstance(turn_id, str) or not turn_id:
|
|
71
|
+
return False
|
|
72
|
+
with file_lock(self._lock_path()):
|
|
73
|
+
payload = self._load_unlocked()
|
|
74
|
+
if not isinstance(payload, dict):
|
|
75
|
+
return False
|
|
76
|
+
if payload.get("last_delivery_turn_id") == turn_id:
|
|
77
|
+
return False
|
|
78
|
+
payload["last_delivery_turn_id"] = turn_id
|
|
79
|
+
payload["updated_at"] = now_iso()
|
|
80
|
+
self._save_unlocked(payload)
|
|
81
|
+
return True
|
|
82
|
+
|
|
83
|
+
def _load_unlocked(self) -> Optional[dict[str, Any]]:
|
|
84
|
+
if not self._path.exists():
|
|
85
|
+
return None
|
|
86
|
+
try:
|
|
87
|
+
raw = self._path.read_text(encoding="utf-8")
|
|
88
|
+
except OSError as exc:
|
|
89
|
+
logger.warning("Failed to read PMA active sink: %s", exc)
|
|
90
|
+
return None
|
|
91
|
+
try:
|
|
92
|
+
payload = json.loads(raw)
|
|
93
|
+
except json.JSONDecodeError:
|
|
94
|
+
return None
|
|
95
|
+
if not isinstance(payload, dict):
|
|
96
|
+
return None
|
|
97
|
+
return payload
|
|
98
|
+
|
|
99
|
+
def _save_unlocked(self, payload: dict[str, Any]) -> None:
|
|
100
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
atomic_write(self._path, json.dumps(payload, indent=2) + "\n")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
__all__ = ["PmaActiveSinkStore", "PMA_ACTIVE_SINK_FILENAME"]
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Optional
|
|
10
|
+
|
|
11
|
+
from .time_utils import now_iso
|
|
12
|
+
from .utils import atomic_write
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
PMA_TRANSCRIPTS_DIRNAME = "transcripts"
|
|
17
|
+
PMA_TRANSCRIPT_VERSION = 1
|
|
18
|
+
PMA_TRANSCRIPT_PREVIEW_CHARS = 400
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def default_pma_transcripts_dir(hub_root: Path) -> Path:
|
|
22
|
+
return hub_root / ".codex-autorunner" / "pma" / PMA_TRANSCRIPTS_DIRNAME
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _safe_segment(value: str) -> str:
|
|
26
|
+
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "-", (value or "").strip())
|
|
27
|
+
cleaned = cleaned.strip("-._")
|
|
28
|
+
if not cleaned:
|
|
29
|
+
return "unknown"
|
|
30
|
+
return cleaned[:120]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _stamp_now() -> str:
|
|
34
|
+
return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _read_preview(path: Path) -> str:
|
|
38
|
+
if not path.exists():
|
|
39
|
+
return ""
|
|
40
|
+
try:
|
|
41
|
+
with open(path, "r", encoding="utf-8") as handle:
|
|
42
|
+
text = handle.read(PMA_TRANSCRIPT_PREVIEW_CHARS + 1)
|
|
43
|
+
except OSError as exc:
|
|
44
|
+
logger.warning("Failed to read PMA transcript content at %s: %s", path, exc)
|
|
45
|
+
return ""
|
|
46
|
+
text = text.strip()
|
|
47
|
+
if len(text) <= PMA_TRANSCRIPT_PREVIEW_CHARS:
|
|
48
|
+
return text
|
|
49
|
+
return text[:PMA_TRANSCRIPT_PREVIEW_CHARS].rstrip() + "..."
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass(frozen=True)
|
|
53
|
+
class PmaTranscriptPointer:
|
|
54
|
+
turn_id: str
|
|
55
|
+
metadata_path: str
|
|
56
|
+
content_path: str
|
|
57
|
+
created_at: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class PmaTranscriptStore:
|
|
61
|
+
def __init__(self, hub_root: Path) -> None:
|
|
62
|
+
self._root = hub_root
|
|
63
|
+
self._dir = default_pma_transcripts_dir(hub_root)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def dir(self) -> Path:
|
|
67
|
+
return self._dir
|
|
68
|
+
|
|
69
|
+
def write_transcript(
|
|
70
|
+
self,
|
|
71
|
+
*,
|
|
72
|
+
turn_id: str,
|
|
73
|
+
metadata: dict[str, Any],
|
|
74
|
+
assistant_text: str,
|
|
75
|
+
) -> PmaTranscriptPointer:
|
|
76
|
+
safe_turn_id = _safe_segment(turn_id)
|
|
77
|
+
stamp = _stamp_now()
|
|
78
|
+
base = f"{stamp}_{safe_turn_id}"
|
|
79
|
+
json_path = self._dir / f"{base}.json"
|
|
80
|
+
md_path = self._dir / f"{base}.md"
|
|
81
|
+
|
|
82
|
+
payload = dict(metadata)
|
|
83
|
+
payload.setdefault("version", PMA_TRANSCRIPT_VERSION)
|
|
84
|
+
payload.setdefault("turn_id", turn_id)
|
|
85
|
+
payload.setdefault("created_at", now_iso())
|
|
86
|
+
payload["metadata_path"] = str(json_path)
|
|
87
|
+
payload["content_path"] = str(md_path)
|
|
88
|
+
payload["assistant_text_chars"] = len(assistant_text or "")
|
|
89
|
+
|
|
90
|
+
self._dir.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
atomic_write(md_path, (assistant_text or "") + "\n")
|
|
92
|
+
atomic_write(json_path, json.dumps(payload, indent=2) + "\n")
|
|
93
|
+
|
|
94
|
+
return PmaTranscriptPointer(
|
|
95
|
+
turn_id=turn_id,
|
|
96
|
+
metadata_path=str(json_path),
|
|
97
|
+
content_path=str(md_path),
|
|
98
|
+
created_at=payload["created_at"],
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def list_recent(self, *, limit: int = 50) -> list[dict[str, Any]]:
|
|
102
|
+
if limit <= 0:
|
|
103
|
+
return []
|
|
104
|
+
if not self._dir.exists():
|
|
105
|
+
return []
|
|
106
|
+
entries: list[dict[str, Any]] = []
|
|
107
|
+
for path in sorted(self._dir.glob("*.json"), reverse=True):
|
|
108
|
+
try:
|
|
109
|
+
raw = path.read_text(encoding="utf-8")
|
|
110
|
+
data = json.loads(raw)
|
|
111
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
112
|
+
logger.warning(
|
|
113
|
+
"Failed to read PMA transcript metadata at %s: %s", path, exc
|
|
114
|
+
)
|
|
115
|
+
continue
|
|
116
|
+
if not isinstance(data, dict):
|
|
117
|
+
continue
|
|
118
|
+
content_path = Path(str(data.get("content_path") or ""))
|
|
119
|
+
if not content_path.is_absolute():
|
|
120
|
+
content_path = (path.parent / content_path).resolve()
|
|
121
|
+
data = dict(data)
|
|
122
|
+
data["preview"] = _read_preview(content_path)
|
|
123
|
+
entries.append(data)
|
|
124
|
+
if len(entries) >= limit:
|
|
125
|
+
break
|
|
126
|
+
return entries
|
|
127
|
+
|
|
128
|
+
def read_transcript(self, turn_id: str) -> Optional[dict[str, Any]]:
|
|
129
|
+
match = self._find_metadata(turn_id)
|
|
130
|
+
if not match:
|
|
131
|
+
return None
|
|
132
|
+
meta, meta_path = match
|
|
133
|
+
content_path = Path(str(meta.get("content_path") or ""))
|
|
134
|
+
if not content_path.is_absolute():
|
|
135
|
+
content_path = (meta_path.parent / content_path).resolve()
|
|
136
|
+
try:
|
|
137
|
+
content = content_path.read_text(encoding="utf-8")
|
|
138
|
+
except OSError as exc:
|
|
139
|
+
logger.warning(
|
|
140
|
+
"Failed to read PMA transcript content at %s: %s", content_path, exc
|
|
141
|
+
)
|
|
142
|
+
content = ""
|
|
143
|
+
return {"metadata": meta, "content": content}
|
|
144
|
+
|
|
145
|
+
def _find_metadata(self, turn_id: str) -> Optional[tuple[dict[str, Any], Path]]:
|
|
146
|
+
if not self._dir.exists():
|
|
147
|
+
return None
|
|
148
|
+
safe_turn_id = _safe_segment(turn_id)
|
|
149
|
+
candidates = sorted(self._dir.glob(f"*_{safe_turn_id}.json"), reverse=True)
|
|
150
|
+
for path in candidates:
|
|
151
|
+
meta = self._read_metadata(path)
|
|
152
|
+
if meta and str(meta.get("turn_id")) == turn_id:
|
|
153
|
+
return meta, path
|
|
154
|
+
if candidates:
|
|
155
|
+
meta = self._read_metadata(candidates[0])
|
|
156
|
+
if meta:
|
|
157
|
+
return meta, candidates[0]
|
|
158
|
+
for path in sorted(self._dir.glob("*.json"), reverse=True):
|
|
159
|
+
meta = self._read_metadata(path)
|
|
160
|
+
if meta and str(meta.get("turn_id")) == turn_id:
|
|
161
|
+
return meta, path
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
def _read_metadata(self, path: Path) -> Optional[dict[str, Any]]:
|
|
165
|
+
try:
|
|
166
|
+
raw = path.read_text(encoding="utf-8")
|
|
167
|
+
data = json.loads(raw)
|
|
168
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
169
|
+
logger.warning(
|
|
170
|
+
"Failed to read PMA transcript metadata at %s: %s", path, exc
|
|
171
|
+
)
|
|
172
|
+
return None
|
|
173
|
+
return data if isinstance(data, dict) else None
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
__all__ = [
|
|
177
|
+
"PMA_TRANSCRIPTS_DIRNAME",
|
|
178
|
+
"PMA_TRANSCRIPT_PREVIEW_CHARS",
|
|
179
|
+
"PMA_TRANSCRIPT_VERSION",
|
|
180
|
+
"PmaTranscriptPointer",
|
|
181
|
+
"PmaTranscriptStore",
|
|
182
|
+
"default_pma_transcripts_dir",
|
|
183
|
+
]
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Safe path validation utilities for web endpoints.
|
|
2
|
+
|
|
3
|
+
This module provides utilities for validating user-controlled paths to prevent
|
|
4
|
+
directory traversal attacks and other path-based security issues.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import PurePosixPath
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SafePathError(Exception):
|
|
12
|
+
"""Raised when a path fails safety validation."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, message: str, path: Optional[str] = None) -> None:
|
|
15
|
+
super().__init__(message)
|
|
16
|
+
self.path = path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def validate_relative_posix_path(raw: str) -> PurePosixPath:
|
|
20
|
+
"""Validate a user-provided path string and return a PurePosixPath.
|
|
21
|
+
|
|
22
|
+
This function validates that:
|
|
23
|
+
1. The path is not absolute
|
|
24
|
+
2. The path does not contain '..' segments (parent directory traversal)
|
|
25
|
+
3. The path does not contain backslashes (Windows separators)
|
|
26
|
+
4. The path is not empty, '.', or only slashes
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
raw: The user-provided path string (typically from a URL path parameter)
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
A validated PurePosixPath object
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
SafePathError: If the path fails validation
|
|
36
|
+
|
|
37
|
+
Examples:
|
|
38
|
+
>>> validate_relative_posix_path("file.txt")
|
|
39
|
+
PurePosixPath('file.txt')
|
|
40
|
+
|
|
41
|
+
>>> validate_relative_posix_path("a/b/c.txt")
|
|
42
|
+
PurePosixPath('a/b/c.txt')
|
|
43
|
+
|
|
44
|
+
>>> validate_relative_posix_path("../etc/passwd")
|
|
45
|
+
SafePathError: Invalid path: '..' not allowed
|
|
46
|
+
|
|
47
|
+
>>> validate_relative_posix_path("/etc/passwd")
|
|
48
|
+
SafePathError: Absolute paths not allowed
|
|
49
|
+
"""
|
|
50
|
+
if not raw or raw.strip() == "" or raw == ".":
|
|
51
|
+
raise SafePathError("Invalid path: empty or '.'", path=raw)
|
|
52
|
+
|
|
53
|
+
# Reject backslashes early (Windows separators)
|
|
54
|
+
if "\\" in raw:
|
|
55
|
+
raise SafePathError("Invalid path: backslashes not allowed", path=raw)
|
|
56
|
+
|
|
57
|
+
# Reject '..' in the raw path before PurePosixPath normalizes it
|
|
58
|
+
# We need to check the raw string because PurePosixPath("a/../b")
|
|
59
|
+
# normalizes to "b", which would bypass the later parts check
|
|
60
|
+
if ".." in raw:
|
|
61
|
+
raise SafePathError("Invalid path: '..' not allowed", path=raw)
|
|
62
|
+
|
|
63
|
+
# Parse with PurePosixPath to ensure POSIX semantics
|
|
64
|
+
try:
|
|
65
|
+
file_rel = PurePosixPath(raw)
|
|
66
|
+
except Exception as exc:
|
|
67
|
+
raise SafePathError(f"Invalid path: {exc}", path=raw) from exc
|
|
68
|
+
|
|
69
|
+
# Reject absolute paths
|
|
70
|
+
if file_rel.is_absolute():
|
|
71
|
+
raise SafePathError("Absolute paths not allowed", path=raw)
|
|
72
|
+
|
|
73
|
+
# Double-check '..' traversal segments after parsing (for edge cases)
|
|
74
|
+
if ".." in file_rel.parts:
|
|
75
|
+
raise SafePathError("Invalid path: '..' not allowed", path=raw)
|
|
76
|
+
|
|
77
|
+
return file_rel
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def validate_single_filename(raw: str) -> str:
|
|
81
|
+
"""Validate that a path string represents only a single filename (no subpaths).
|
|
82
|
+
|
|
83
|
+
This is a stricter version of validate_relative_posix_path that only allows
|
|
84
|
+
a single filename component, not subdirectories.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
raw: The user-provided path string
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
The validated filename
|
|
91
|
+
|
|
92
|
+
Raises:
|
|
93
|
+
SafePathError: If the path contains slashes or is otherwise invalid
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
>>> validate_single_filename("file.txt")
|
|
97
|
+
'file.txt'
|
|
98
|
+
|
|
99
|
+
>>> validate_single_filename("a/b.txt")
|
|
100
|
+
SafePathError: Subpaths not allowed: only single filenames permitted
|
|
101
|
+
|
|
102
|
+
>>> validate_single_filename("../etc/passwd")
|
|
103
|
+
SafePathError: Subpaths not allowed: only single filenames permitted
|
|
104
|
+
"""
|
|
105
|
+
file_rel = validate_relative_posix_path(raw)
|
|
106
|
+
|
|
107
|
+
# Ensure only a single component (no subpaths)
|
|
108
|
+
if len(file_rel.parts) != 1:
|
|
109
|
+
raise SafePathError(
|
|
110
|
+
"Subpaths not allowed: only single filenames permitted", path=raw
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Return the string representation of the filename
|
|
114
|
+
return str(file_rel)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
__all__ = ["SafePathError", "validate_relative_posix_path", "validate_single_filename"]
|
codex_autorunner/housekeeping.py
CHANGED
|
@@ -8,6 +8,8 @@ from collections import deque
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Any, Iterable, Optional, Protocol, cast
|
|
10
10
|
|
|
11
|
+
_MAX_ERROR_SAMPLES = 5
|
|
12
|
+
|
|
11
13
|
|
|
12
14
|
@dataclasses.dataclass(frozen=True)
|
|
13
15
|
class HousekeepingRule:
|
|
@@ -42,6 +44,7 @@ class HousekeepingRuleResult:
|
|
|
42
44
|
deleted_bytes: int = 0
|
|
43
45
|
truncated_bytes: int = 0
|
|
44
46
|
errors: int = 0
|
|
47
|
+
error_samples: list[str] = dataclasses.field(default_factory=list)
|
|
45
48
|
duration_ms: int = 0
|
|
46
49
|
|
|
47
50
|
|
|
@@ -136,21 +139,26 @@ def run_housekeeping_once(
|
|
|
136
139
|
continue
|
|
137
140
|
results.append(result)
|
|
138
141
|
if logger is not None:
|
|
142
|
+
log_fields: dict[str, Any] = {
|
|
143
|
+
"name": result.name,
|
|
144
|
+
"kind": result.kind,
|
|
145
|
+
"scanned_count": result.scanned_count,
|
|
146
|
+
"eligible_count": result.eligible_count,
|
|
147
|
+
"deleted_count": result.deleted_count,
|
|
148
|
+
"deleted_bytes": result.deleted_bytes,
|
|
149
|
+
"truncated_bytes": result.truncated_bytes,
|
|
150
|
+
"errors": result.errors,
|
|
151
|
+
"duration_ms": result.duration_ms,
|
|
152
|
+
"dry_run": config.dry_run,
|
|
153
|
+
"root": str(root),
|
|
154
|
+
}
|
|
155
|
+
if result.errors > 0 and result.error_samples:
|
|
156
|
+
log_fields["error_samples"] = result.error_samples
|
|
139
157
|
_log_event(
|
|
140
158
|
logger,
|
|
141
159
|
logging.INFO,
|
|
142
160
|
"housekeeping.rule",
|
|
143
|
-
|
|
144
|
-
kind=result.kind,
|
|
145
|
-
scanned_count=result.scanned_count,
|
|
146
|
-
eligible_count=result.eligible_count,
|
|
147
|
-
deleted_count=result.deleted_count,
|
|
148
|
-
deleted_bytes=result.deleted_bytes,
|
|
149
|
-
truncated_bytes=result.truncated_bytes,
|
|
150
|
-
errors=result.errors,
|
|
151
|
-
duration_ms=result.duration_ms,
|
|
152
|
-
dry_run=config.dry_run,
|
|
153
|
-
root=str(root),
|
|
161
|
+
**log_fields,
|
|
154
162
|
)
|
|
155
163
|
if logger is not None:
|
|
156
164
|
_log_event(
|
|
@@ -174,7 +182,7 @@ def _apply_directory_rule(
|
|
|
174
182
|
return result
|
|
175
183
|
now = time.time()
|
|
176
184
|
min_age = max(config.min_file_age_seconds, 0)
|
|
177
|
-
files = _collect_files(base, rule)
|
|
185
|
+
files = _collect_files(base, rule, result)
|
|
178
186
|
result.scanned_count = len(files)
|
|
179
187
|
if not files:
|
|
180
188
|
result.duration_ms = int((time.monotonic() - start) * 1000)
|
|
@@ -192,8 +200,9 @@ def _apply_directory_rule(
|
|
|
192
200
|
if not config.dry_run:
|
|
193
201
|
try:
|
|
194
202
|
entry.path.unlink()
|
|
195
|
-
except OSError:
|
|
203
|
+
except OSError as e:
|
|
196
204
|
errors += 1
|
|
205
|
+
_add_error_sample(result, "unlink", entry.path, e)
|
|
197
206
|
return
|
|
198
207
|
deleted.add(entry.path)
|
|
199
208
|
deleted_bytes += entry.size
|
|
@@ -254,8 +263,9 @@ def _apply_file_rule(
|
|
|
254
263
|
return result
|
|
255
264
|
try:
|
|
256
265
|
stat = path.stat()
|
|
257
|
-
except OSError:
|
|
266
|
+
except OSError as e:
|
|
258
267
|
result.errors = 1
|
|
268
|
+
_add_error_sample(result, "stat", path, e)
|
|
259
269
|
return result
|
|
260
270
|
if not path.is_file():
|
|
261
271
|
return result
|
|
@@ -271,6 +281,7 @@ def _apply_file_rule(
|
|
|
271
281
|
path,
|
|
272
282
|
rule.max_lines,
|
|
273
283
|
dry_run=config.dry_run,
|
|
284
|
+
result=result,
|
|
274
285
|
)
|
|
275
286
|
result.truncated_bytes += truncated
|
|
276
287
|
if rule.max_bytes is not None:
|
|
@@ -278,13 +289,16 @@ def _apply_file_rule(
|
|
|
278
289
|
path,
|
|
279
290
|
rule.max_bytes,
|
|
280
291
|
dry_run=config.dry_run,
|
|
292
|
+
result=result,
|
|
281
293
|
)
|
|
282
294
|
result.truncated_bytes += truncated
|
|
283
295
|
result.duration_ms = int((time.monotonic() - start) * 1000)
|
|
284
296
|
return result
|
|
285
297
|
|
|
286
298
|
|
|
287
|
-
def _collect_files(
|
|
299
|
+
def _collect_files(
|
|
300
|
+
base: Path, rule: HousekeepingRule, result: Optional[HousekeepingRuleResult] = None
|
|
301
|
+
) -> list[_FileInfo]:
|
|
288
302
|
results: list[_FileInfo] = []
|
|
289
303
|
glob_pattern = rule.glob or "*"
|
|
290
304
|
iterator = base.rglob(glob_pattern) if rule.recursive else base.glob(glob_pattern)
|
|
@@ -293,7 +307,9 @@ def _collect_files(base: Path, rule: HousekeepingRule) -> list[_FileInfo]:
|
|
|
293
307
|
if not path.is_file():
|
|
294
308
|
continue
|
|
295
309
|
stat = path.stat()
|
|
296
|
-
except OSError:
|
|
310
|
+
except OSError as e:
|
|
311
|
+
if result is not None:
|
|
312
|
+
_add_error_sample(result, "stat", path, e)
|
|
297
313
|
continue
|
|
298
314
|
results.append(_FileInfo(path=path, size=stat.st_size, mtime=stat.st_mtime))
|
|
299
315
|
return results
|
|
@@ -310,12 +326,21 @@ def _is_absolute_path(path: str) -> bool:
|
|
|
310
326
|
return Path(path).expanduser().is_absolute()
|
|
311
327
|
|
|
312
328
|
|
|
313
|
-
def _truncate_bytes(
|
|
329
|
+
def _truncate_bytes(
|
|
330
|
+
path: Path,
|
|
331
|
+
max_bytes: int,
|
|
332
|
+
*,
|
|
333
|
+
dry_run: bool,
|
|
334
|
+
result: Optional[HousekeepingRuleResult] = None,
|
|
335
|
+
) -> int:
|
|
314
336
|
if max_bytes <= 0:
|
|
315
337
|
return 0
|
|
316
338
|
try:
|
|
317
339
|
size = path.stat().st_size
|
|
318
|
-
except OSError:
|
|
340
|
+
except OSError as e:
|
|
341
|
+
if result is not None:
|
|
342
|
+
result.errors += 1
|
|
343
|
+
_add_error_sample(result, "truncate_bytes", path, e)
|
|
319
344
|
return 0
|
|
320
345
|
if size <= max_bytes:
|
|
321
346
|
return 0
|
|
@@ -328,23 +353,38 @@ def _truncate_bytes(path: Path, max_bytes: int, *, dry_run: bool) -> int:
|
|
|
328
353
|
payload = handle.read()
|
|
329
354
|
_atomic_write_bytes(path, payload)
|
|
330
355
|
return truncated
|
|
331
|
-
except OSError:
|
|
356
|
+
except OSError as e:
|
|
357
|
+
if result is not None:
|
|
358
|
+
result.errors += 1
|
|
359
|
+
_add_error_sample(result, "truncate_bytes", path, e)
|
|
332
360
|
return 0
|
|
333
361
|
|
|
334
362
|
|
|
335
|
-
def _truncate_lines(
|
|
363
|
+
def _truncate_lines(
|
|
364
|
+
path: Path,
|
|
365
|
+
max_lines: int,
|
|
366
|
+
*,
|
|
367
|
+
dry_run: bool,
|
|
368
|
+
result: Optional[HousekeepingRuleResult] = None,
|
|
369
|
+
) -> int:
|
|
336
370
|
if max_lines <= 0:
|
|
337
371
|
return 0
|
|
338
372
|
try:
|
|
339
373
|
size = path.stat().st_size
|
|
340
|
-
except OSError:
|
|
374
|
+
except OSError as e:
|
|
375
|
+
if result is not None:
|
|
376
|
+
result.errors += 1
|
|
377
|
+
_add_error_sample(result, "truncate_lines", path, e)
|
|
341
378
|
return 0
|
|
342
379
|
lines: deque[bytes] = deque(maxlen=max_lines)
|
|
343
380
|
try:
|
|
344
381
|
with path.open("rb") as handle:
|
|
345
382
|
for line in handle:
|
|
346
383
|
lines.append(line)
|
|
347
|
-
except OSError:
|
|
384
|
+
except OSError as e:
|
|
385
|
+
if result is not None:
|
|
386
|
+
result.errors += 1
|
|
387
|
+
_add_error_sample(result, "truncate_lines", path, e)
|
|
348
388
|
return 0
|
|
349
389
|
payload = b"".join(lines)
|
|
350
390
|
if len(payload) >= size:
|
|
@@ -353,7 +393,10 @@ def _truncate_lines(path: Path, max_lines: int, *, dry_run: bool) -> int:
|
|
|
353
393
|
return size - len(payload)
|
|
354
394
|
try:
|
|
355
395
|
_atomic_write_bytes(path, payload)
|
|
356
|
-
except OSError:
|
|
396
|
+
except OSError as e:
|
|
397
|
+
if result is not None:
|
|
398
|
+
result.errors += 1
|
|
399
|
+
_add_error_sample(result, "truncate_lines", path, e)
|
|
357
400
|
return 0
|
|
358
401
|
return size - len(payload)
|
|
359
402
|
|
|
@@ -379,6 +422,17 @@ def _prune_empty_dirs(base: Path) -> None:
|
|
|
379
422
|
continue
|
|
380
423
|
|
|
381
424
|
|
|
425
|
+
def _add_error_sample(
|
|
426
|
+
result: HousekeepingRuleResult, operation: str, path: Path, exc: OSError
|
|
427
|
+
) -> None:
|
|
428
|
+
if len(result.error_samples) >= _MAX_ERROR_SAMPLES:
|
|
429
|
+
return
|
|
430
|
+
exc_info = f"{type(exc).__name__}"
|
|
431
|
+
if exc.strerror:
|
|
432
|
+
exc_info += f": {exc.strerror}"
|
|
433
|
+
result.error_samples.append(f"{operation} {path}: {exc_info}")
|
|
434
|
+
|
|
435
|
+
|
|
382
436
|
def _int_or_none(value: object) -> Optional[int]:
|
|
383
437
|
if value is None:
|
|
384
438
|
return None
|