pascal-agent 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pascal/sandbox.py ADDED
@@ -0,0 +1,287 @@
1
+ """Sandbox -- isolated command execution.
2
+
3
+ Docker (if available) → RestrictedSandbox (fallback).
4
+ Ported from Dorothy2. Zero framework dependencies.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import os
11
+ import re
12
+ import shlex
13
+ import subprocess
14
+ import tempfile
15
+ import time
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ _MAX_OUTPUT = 10_000
22
+
23
+ # Env vars to strip in restricted/delegate mode
24
+ _STRIPPED_ENV_KEYS = {
25
+ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN",
26
+ "GOOGLE_APPLICATION_CREDENTIALS", "AZURE_CLIENT_SECRET",
27
+ "DATABASE_URL", "DB_PASSWORD", "OPENAI_API_KEY",
28
+ "ANTHROPIC_API_KEY", "GITHUB_TOKEN", "NPM_TOKEN",
29
+ "DOCKER_HOST", "KUBECONFIG",
30
+ }
31
+
32
+
33
+ @dataclass
34
+ class SandboxResult:
35
+ ok: bool
36
+ stdout: str = ""
37
+ stderr: str = ""
38
+ return_code: int = -1
39
+ elapsed_ms: float = 0.0
40
+ sandbox_type: str = ""
41
+ error: str = ""
42
+
43
+ @property
44
+ def status(self) -> str:
45
+ if self.error and "timeout" in self.error.lower():
46
+ return "unknown"
47
+ return "ok" if self.ok else "error"
48
+
49
+ @property
50
+ def output(self) -> str:
51
+ return (self.stdout + self.stderr)[:2000]
52
+
53
+
54
+ class DockerSandbox:
55
+ """Run commands in Docker with --rm, --read-only, --network none, resource limits."""
56
+
57
+ def __init__(self, image: str = "python:3.12-slim", workspace: str = ""):
58
+ self._image = image
59
+ self._workspace = workspace
60
+ self._available: bool | None = None
61
+
62
+ def is_available(self) -> bool:
63
+ if self._available is not None:
64
+ return self._available
65
+ try:
66
+ r = subprocess.run(["docker", "info"], capture_output=True, timeout=5)
67
+ self._available = r.returncode == 0
68
+ except Exception:
69
+ self._available = False
70
+ return self._available
71
+
72
+ def execute(self, command: str, *, timeout: int = 60, allow_network: bool = False) -> SandboxResult:
73
+ t0 = time.time()
74
+ cmd = [
75
+ "docker", "run", "--rm",
76
+ "--memory", "512m", "--cpus", "1.0", "--pids-limit", "64",
77
+ "--read-only", "--tmpfs", "/tmp:size=100m",
78
+ ]
79
+ if not allow_network:
80
+ cmd += ["--network", "none"]
81
+ if self._workspace and os.path.isdir(self._workspace):
82
+ cmd += ["-v", f"{os.path.realpath(self._workspace)}:/workspace", "-w", "/workspace"]
83
+ cmd += [self._image, "sh", "-c", command]
84
+
85
+ try:
86
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout,
87
+ encoding="utf-8", errors="replace")
88
+ except subprocess.TimeoutExpired:
89
+ return SandboxResult(ok=False, elapsed_ms=(time.time() - t0) * 1000,
90
+ sandbox_type="docker", error=f"Timeout ({timeout}s)")
91
+ except Exception as e:
92
+ return SandboxResult(ok=False, elapsed_ms=(time.time() - t0) * 1000,
93
+ sandbox_type="docker", error=f"Docker error: {e}")
94
+
95
+ stdout, stderr = _truncate(result.stdout or ""), _truncate(result.stderr or "")
96
+ return SandboxResult(
97
+ ok=result.returncode == 0, stdout=stdout, stderr=stderr,
98
+ return_code=result.returncode, elapsed_ms=(time.time() - t0) * 1000,
99
+ sandbox_type="docker",
100
+ )
101
+
102
+
103
+ # Patterns that indicate shell features requiring sh -c wrapping
104
+ _SHELL_FEATURES = re.compile(r"[|><;`$&]|\b&&\b|\b\|\|\b")
105
+ # Windows shell built-ins that need cmd /c
106
+ _WIN_BUILTINS = re.compile(r"^\s*(?:start|dir|copy|move|del|type|cls|set|title|assoc|ftype)\b", re.I)
107
+
108
+
109
+ def _needs_shell(command: str) -> bool:
110
+ """Return True if the command uses shell features or Windows built-ins."""
111
+ if _SHELL_FEATURES.search(command):
112
+ return True
113
+ if os.name == "nt" and _WIN_BUILTINS.search(command):
114
+ return True
115
+ return False
116
+
117
+
118
+ def _check_paths_in_command(command: str, workspace: str) -> str | None:
119
+ """Check for paths outside workspace, symlinks, and relative traversal."""
120
+ if not workspace:
121
+ return None
122
+ # Block symlink/junction/hardlink creation
123
+ if re.search(r'\bln\b\s+.*-s', command):
124
+ return "Symlink creation is not allowed in restricted sandbox"
125
+ if re.search(r'\bmklink\b', command, re.I):
126
+ return "mklink is not allowed in restricted sandbox"
127
+ if re.search(r'New-Item.*SymbolicLink', command, re.I):
128
+ return "Symlink creation is not allowed in restricted sandbox"
129
+ # Block relative traversal patterns
130
+ if re.search(r'\.\.[/\\]', command):
131
+ return "Relative path traversal (../) is not allowed in restricted sandbox"
132
+ ws = Path(workspace).resolve()
133
+ # Find absolute paths: unquoted, single-quoted, and double-quoted
134
+ # Pattern 1: unquoted paths (original)
135
+ # Pattern 2: paths inside quotes (new — catches python -c "open('/etc/passwd')")
136
+ path_patterns = [
137
+ r'(?:^|\s)(/[\w./-]+|[a-zA-Z]:\\[\w.\\ /-]+)', # unquoted
138
+ r"""['"](/[\w./ -]+|[a-zA-Z]:\\[\w.\\ /-]+)['"]""", # quoted
139
+ ]
140
+ for pattern in path_patterns:
141
+ for match in re.finditer(pattern, command):
142
+ path_str = match.group(1) if match.lastindex else match.group().strip()
143
+ try:
144
+ resolved = Path(path_str).resolve()
145
+ if not (resolved == ws or resolved.is_relative_to(ws)):
146
+ return f"Path '{path_str}' is outside workspace '{ws}'"
147
+ except (OSError, ValueError):
148
+ continue
149
+ return None
150
+
151
+
152
+ # Track temp directories created by make_safe_env for cleanup
153
+ _sandbox_temp_dirs: list[str] = []
154
+ _SANDBOX_TEMP_MAX = 50 # trigger cleanup when this many accumulate
155
+
156
+
157
+ def make_safe_env() -> tuple[dict[str, str], str]:
158
+ """Create a stripped env dict + temp HOME. Shared by sandbox and delegate.
159
+
160
+ Temp directories are tracked and cleaned up periodically via cleanup_sandbox_temps().
161
+ """
162
+ safe_env = {}
163
+ for k, v in os.environ.items():
164
+ if k.upper() in _STRIPPED_ENV_KEYS:
165
+ continue
166
+ k_up = k.upper()
167
+ if any(pat in k_up for pat in ("SECRET", "TOKEN", "PASSWORD", "CREDENTIAL", "PRIVATE_KEY")):
168
+ continue
169
+ safe_env[k] = v
170
+ tmp_home = tempfile.mkdtemp(prefix="pascal_sandbox_")
171
+ _sandbox_temp_dirs.append(tmp_home)
172
+ # Proactive cleanup: remove old temp dirs when too many accumulate
173
+ if len(_sandbox_temp_dirs) > _SANDBOX_TEMP_MAX:
174
+ cleanup_sandbox_temps(keep_last=5)
175
+ safe_env["HOME"] = tmp_home
176
+ safe_env["TMPDIR"] = tmp_home
177
+ # Windows: also override user profile and temp directories
178
+ if os.name == "nt":
179
+ safe_env["USERPROFILE"] = tmp_home
180
+ safe_env["TEMP"] = tmp_home
181
+ safe_env["TMP"] = tmp_home
182
+ safe_env["APPDATA"] = os.path.join(tmp_home, "AppData", "Roaming")
183
+ safe_env["LOCALAPPDATA"] = os.path.join(tmp_home, "AppData", "Local")
184
+ return safe_env, tmp_home
185
+
186
+
187
+ def cleanup_sandbox_temps(keep_last: int = 0) -> int:
188
+ """Remove tracked sandbox temp directories. Returns count removed."""
189
+ import shutil
190
+ removed = 0
191
+ to_remove = _sandbox_temp_dirs[:-keep_last] if keep_last else list(_sandbox_temp_dirs)
192
+ for tmp_dir in to_remove:
193
+ try:
194
+ if os.path.isdir(tmp_dir):
195
+ shutil.rmtree(tmp_dir, ignore_errors=True)
196
+ removed += 1
197
+ except Exception:
198
+ pass
199
+ if tmp_dir in _sandbox_temp_dirs:
200
+ _sandbox_temp_dirs.remove(tmp_dir)
201
+ return removed
202
+
203
+
204
+ class RestrictedSandbox:
205
+ """Subprocess with stripped env vars, workspace boundary, temp HOME."""
206
+
207
+ def __init__(self, workspace: str = ""):
208
+ self._workspace = workspace
209
+
210
+ def execute(self, command: str, *, timeout: int = 60) -> SandboxResult:
211
+ t0 = time.time()
212
+ cwd = self._workspace if self._workspace and os.path.isdir(self._workspace) else None
213
+
214
+ # Path boundary check: block absolute paths outside workspace
215
+ path_err = _check_paths_in_command(command, self._workspace)
216
+ if path_err:
217
+ return SandboxResult(ok=False, elapsed_ms=0, sandbox_type="restricted",
218
+ error=path_err)
219
+
220
+ safe_env, tmp_home = make_safe_env()
221
+
222
+ # shell=False by default; wrap with shell when needed
223
+ if _needs_shell(command):
224
+ cmd: list[str] | str = ["cmd", "/c", command] if os.name == "nt" else ["sh", "-c", command]
225
+ else:
226
+ try:
227
+ cmd = shlex.split(command)
228
+ except ValueError:
229
+ # Malformed command -- fall back to platform-appropriate shell
230
+ cmd = ["cmd", "/c", command] if os.name == "nt" else ["sh", "-c", command]
231
+
232
+ try:
233
+ result = subprocess.run(
234
+ cmd, shell=False, capture_output=True, text=True,
235
+ timeout=timeout, cwd=cwd, env=safe_env,
236
+ encoding="utf-8", errors="replace",
237
+ )
238
+ except subprocess.TimeoutExpired:
239
+ return SandboxResult(ok=False, elapsed_ms=(time.time() - t0) * 1000,
240
+ sandbox_type="restricted", error=f"Timeout ({timeout}s)")
241
+ except FileNotFoundError as e:
242
+ return SandboxResult(ok=False, elapsed_ms=(time.time() - t0) * 1000,
243
+ sandbox_type="restricted", error=f"Command not found: {e}")
244
+ except Exception as e:
245
+ return SandboxResult(ok=False, elapsed_ms=(time.time() - t0) * 1000,
246
+ sandbox_type="restricted", error=f"Execution error: {e}")
247
+ finally:
248
+ # Clean up this invocation's temp dir immediately
249
+ import shutil as _shutil
250
+ try:
251
+ if os.path.isdir(tmp_home):
252
+ _shutil.rmtree(tmp_home, ignore_errors=True)
253
+ if tmp_home in _sandbox_temp_dirs:
254
+ _sandbox_temp_dirs.remove(tmp_home)
255
+ except Exception:
256
+ pass
257
+
258
+ stdout, stderr = _truncate(result.stdout or ""), _truncate(result.stderr or "")
259
+ return SandboxResult(
260
+ ok=result.returncode == 0, stdout=stdout, stderr=stderr,
261
+ return_code=result.returncode, elapsed_ms=(time.time() - t0) * 1000,
262
+ sandbox_type="restricted",
263
+ )
264
+
265
+
266
+ class SandboxManager:
267
+ """Auto-selects Docker if available, falls back to Restricted."""
268
+
269
+ def __init__(self, workspace: str = "", prefer_docker: bool = True):
270
+ self._docker = DockerSandbox(workspace=workspace)
271
+ self._restricted = RestrictedSandbox(workspace=workspace)
272
+ self._prefer_docker = prefer_docker
273
+
274
+ def run(self, command: str, *, timeout: int = 60, allow_network: bool = False) -> SandboxResult:
275
+ if self._prefer_docker and self._docker.is_available():
276
+ result = self._docker.execute(command, timeout=timeout, allow_network=allow_network)
277
+ if not result.ok and "Docker" in result.error:
278
+ return self._restricted.execute(command, timeout=timeout)
279
+ return result
280
+ return self._restricted.execute(command, timeout=timeout)
281
+
282
+
283
+
284
+ def _truncate(text: str) -> str:
285
+ if len(text) <= _MAX_OUTPUT:
286
+ return text
287
+ return text[:_MAX_OUTPUT] + f"\n... [truncated, {len(text)} chars total]"
pascal/scheduler.py ADDED
@@ -0,0 +1,243 @@
1
+ """Scheduler -- bounded tick for periodic checks. Not a daemon.
2
+
3
+ Usage:
4
+ pascal --tick # manual
5
+ */5 * * * * pascal --tick # cron
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import os
13
+ import urllib.error
14
+ import urllib.request
15
+ from datetime import datetime, timedelta, timezone
16
+ from typing import Any, Callable
17
+
18
+ from pascal.state import PascalStore
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def _cron_field_matches(field: str, value: int) -> bool:
24
+ """Match a single cron field against a value. Supports *, integers, and */N step."""
25
+ if field == "*":
26
+ return True
27
+ if field.startswith("*/"):
28
+ try:
29
+ step = int(field[2:])
30
+ return step > 0 and value % step == 0
31
+ except (ValueError, TypeError):
32
+ return False
33
+ try:
34
+ return int(field) == value
35
+ except (ValueError, TypeError):
36
+ return False
37
+
38
+
39
+ def _cron_matches(cron_expr: str, dt: datetime) -> bool:
40
+ """Simple cron matching: 'minute hour day month weekday'.
41
+ Supports *, integers, and */N step. Weekday: 0=Monday ... 6=Sunday."""
42
+ try:
43
+ parts = cron_expr.strip().split()
44
+ if len(parts) != 5:
45
+ return False
46
+ checks = [
47
+ (parts[0], dt.minute),
48
+ (parts[1], dt.hour),
49
+ (parts[2], dt.day),
50
+ (parts[3], dt.month),
51
+ (parts[4], dt.weekday()),
52
+ ]
53
+ return all(_cron_field_matches(field, value) for field, value in checks)
54
+ except (ValueError, TypeError):
55
+ return False
56
+
57
+
58
+ def _send_webhook(url: str, text: str) -> bool:
59
+ """Send a message via webhook (Slack or Discord)."""
60
+ if not url:
61
+ return False
62
+ payload = json.dumps(
63
+ {"text": text} if "slack" in url else {"content": text[:2000], "username": "Pascal"}
64
+ ).encode("utf-8")
65
+ req = urllib.request.Request(
66
+ url, data=payload,
67
+ headers={"Content-Type": "application/json"}, method="POST",
68
+ )
69
+ try:
70
+ with urllib.request.urlopen(req, timeout=10) as resp:
71
+ return resp.status in (200, 204)
72
+ except (urllib.error.URLError, OSError) as exc:
73
+ logger.warning("Webhook send failed: %s", exc)
74
+ return False
75
+
76
+
77
+ class Scheduler:
78
+ def __init__(self, store: PascalStore, emit: Callable[[str], None] | None = None):
79
+ self.store = store
80
+ self._emit = emit or print
81
+ self._slack_url = os.environ.get("PASCAL_SLACK_WEBHOOK", "")
82
+ self._discord_url = os.environ.get("PASCAL_DISCORD_WEBHOOK", "")
83
+
84
+ def tick(self) -> dict[str, Any]:
85
+ """One bounded tick. Returns summary."""
86
+ overdue = self._check_overdue()
87
+ stale = self._check_stale()
88
+ expired = self.store.cleanup_expired_context()
89
+ archived = 0 # memory decay disabled until access_count tracking is re-enabled
90
+ pending = self._count_pending_notifications()
91
+ evolved = self._evolve()
92
+
93
+ for item in overdue:
94
+ promised = ""
95
+ if item.get("promised_to"):
96
+ try:
97
+ names = json.loads(item["promised_to"])
98
+ promised = f"\nPromised to: {', '.join(names)}"
99
+ except (json.JSONDecodeError, TypeError):
100
+ pass
101
+ msg = f"Overdue: {item['goal']}\nDue: {item['due_at']}{promised}"
102
+ self._broadcast(msg)
103
+
104
+ for item in stale:
105
+ self._broadcast(f"Stale ({item['status']}): {item['goal']}")
106
+
107
+ due_routines = self._check_routines()
108
+
109
+ return {
110
+ "overdue": len(overdue),
111
+ "stale": len(stale),
112
+ "expired_context": expired,
113
+ "archived_memories": archived,
114
+ "pending_notifications": pending,
115
+ "due_routines": due_routines,
116
+ "evolved": evolved,
117
+ }
118
+
119
+ def _broadcast(self, message: str) -> None:
120
+ """Send to CLI + any configured webhooks."""
121
+ self._emit(message)
122
+ if self._slack_url:
123
+ _send_webhook(self._slack_url, message)
124
+ if self._discord_url:
125
+ _send_webhook(self._discord_url, message)
126
+
127
+ def _check_overdue(self) -> list[dict[str, Any]]:
128
+ now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
129
+ rows = self.store.connection.execute(
130
+ "SELECT id, goal, status, due_at, promised_to FROM tasks "
131
+ "WHERE due_at IS NOT NULL AND due_at < ? AND status NOT IN ('done', 'failed') "
132
+ "ORDER BY due_at ASC", (now,),
133
+ ).fetchall()
134
+ return [dict(r) for r in rows]
135
+
136
+ def _check_stale(self, days: int = 3) -> list[dict[str, Any]]:
137
+ cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
138
+ rows = self.store.connection.execute(
139
+ "SELECT id, goal, status, updated_at FROM tasks "
140
+ "WHERE status IN ('active', 'paused') "
141
+ "AND updated_at < ?", (cutoff,),
142
+ ).fetchall()
143
+ return [dict(r) for r in rows]
144
+
145
+ def _count_pending_notifications(self) -> int:
146
+ row = self.store.connection.execute(
147
+ "SELECT COUNT(*) FROM notifications WHERE status = 'pending'"
148
+ ).fetchone()
149
+ return row[0] if row else 0
150
+
151
+ def _check_routines(self) -> int:
152
+ """Check context for due routines, create notifications for them."""
153
+ routines = self.store.get_context("routines")
154
+ if not routines or not isinstance(routines, list):
155
+ return 0
156
+ now = datetime.now(timezone.utc)
157
+ dedup_prefix = now.strftime("%Y%m%d%H")
158
+ fired = 0
159
+ for r in routines:
160
+ name = r.get("name", "")
161
+ cron = r.get("cron", "")
162
+ goal = r.get("task_goal", "")
163
+ if not cron or not goal:
164
+ continue
165
+ if not _cron_matches(cron, now):
166
+ continue
167
+ idem_key = f"routine:{name}:{dedup_prefix}"
168
+ existing = self.store.connection.execute(
169
+ "SELECT id FROM notifications WHERE metadata LIKE ? AND status != 'dismissed'",
170
+ (f'%{idem_key}%',),
171
+ ).fetchone()
172
+ if existing:
173
+ continue
174
+ self.store.push_notification(
175
+ source="routine",
176
+ message=f"[Routine: {name}] {goal}",
177
+ priority="normal",
178
+ metadata={"routine_name": name, "idem_key": idem_key},
179
+ )
180
+ fired += 1
181
+ return fired
182
+
183
+ # ── Self-Evolution (scorecard → strategy adjustment, zero LLM calls) ──
184
+
185
+ _EVOLVE_WINDOW = 20 # recent actions to analyze
186
+ _EVOLVE_MIN_ACTIONS = 5 # minimum actions before evolving
187
+
188
+ def _evolve(self) -> dict[str, Any]:
189
+ """Compute scorecard from recent history and adjust strategy. No LLM calls."""
190
+ rows = self.store.connection.execute(
191
+ "SELECT action_status FROM history ORDER BY created_at DESC LIMIT ?",
192
+ (self._EVOLVE_WINDOW,),
193
+ ).fetchall()
194
+ total = len(rows)
195
+ if total < self._EVOLVE_MIN_ACTIONS:
196
+ return {}
197
+
198
+ ok = sum(1 for r in rows if r["action_status"] == "ok")
199
+ errors = sum(1 for r in rows if r["action_status"] == "error")
200
+ unknown = sum(1 for r in rows if r["action_status"] == "unknown")
201
+ success_rate = ok / total
202
+ failure_rate = errors / total
203
+
204
+ scorecard = {
205
+ "total": total, "ok": ok, "errors": errors, "unknown": unknown,
206
+ "success_rate": round(success_rate, 2),
207
+ "failure_rate": round(failure_rate, 2),
208
+ }
209
+
210
+ # Strategy adjustment
211
+ strategy = "balanced"
212
+ if total >= 10 and failure_rate >= 0.4:
213
+ strategy = "careful"
214
+ elif total >= 10 and success_rate >= 0.9 and errors == 0:
215
+ strategy = "fast"
216
+
217
+ prev = self.store.get_context("evolution_strategy")
218
+ if prev != strategy:
219
+ self.store.set_context("evolution_strategy", strategy)
220
+ self.store.set_context("evolution_scorecard", scorecard)
221
+ self._emit(f"Evolution: strategy → {strategy} (success={success_rate:.0%}, fail={failure_rate:.0%})")
222
+
223
+ return {"strategy": strategy, "scorecard": scorecard}
224
+
225
+ def _archive_stale_memories(self, max_age_days: int = 90, min_access: int = 0) -> int:
226
+ """Delete memories not accessed in max_age_days with low access count."""
227
+ cutoff = (datetime.now(timezone.utc) - timedelta(days=max_age_days)).strftime(
228
+ "%Y-%m-%dT%H:%M:%S.%fZ"
229
+ )
230
+ # Collect rows to delete so we can clean up FTS5 index too
231
+ rows = self.store.connection.execute(
232
+ "SELECT id FROM memories WHERE access_count <= ? "
233
+ "AND (last_accessed IS NULL OR last_accessed < ?) "
234
+ "AND created_at < ?",
235
+ (min_access, cutoff, cutoff),
236
+ ).fetchall()
237
+ count = 0
238
+ for row in rows:
239
+ self.store.delete_memory(row["id"])
240
+ count += 1
241
+ if count > 0:
242
+ self._emit(f"Archived {count} stale memories (>{max_age_days}d, access={min_access})")
243
+ return count