@zigrivers/scaffold 3.13.0 → 3.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -10
- package/content/knowledge/research/research-architecture.md +385 -0
- package/content/knowledge/research/research-conventions.md +248 -0
- package/content/knowledge/research/research-dev-environment.md +303 -0
- package/content/knowledge/research/research-experiment-loop.md +429 -0
- package/content/knowledge/research/research-experiment-tracking.md +336 -0
- package/content/knowledge/research/research-ml-architecture-search.md +383 -0
- package/content/knowledge/research/research-ml-evaluation.md +407 -0
- package/content/knowledge/research/research-ml-experiment-tracking.md +466 -0
- package/content/knowledge/research/research-ml-training-patterns.md +413 -0
- package/content/knowledge/research/research-observability.md +395 -0
- package/content/knowledge/research/research-overfitting-prevention.md +306 -0
- package/content/knowledge/research/research-project-structure.md +264 -0
- package/content/knowledge/research/research-quant-backtesting.md +326 -0
- package/content/knowledge/research/research-quant-market-data.md +366 -0
- package/content/knowledge/research/research-quant-metrics.md +335 -0
- package/content/knowledge/research/research-quant-requirements.md +223 -0
- package/content/knowledge/research/research-quant-risk.md +469 -0
- package/content/knowledge/research/research-quant-strategy-patterns.md +412 -0
- package/content/knowledge/research/research-requirements.md +201 -0
- package/content/knowledge/research/research-security.md +374 -0
- package/content/knowledge/research/research-sim-compute-management.md +538 -0
- package/content/knowledge/research/research-sim-engine-patterns.md +448 -0
- package/content/knowledge/research/research-sim-parameter-spaces.md +425 -0
- package/content/knowledge/research/research-sim-validation.md +456 -0
- package/content/knowledge/research/research-testing.md +334 -0
- package/content/methodology/research-ml-research.yml +23 -0
- package/content/methodology/research-overlay.yml +65 -0
- package/content/methodology/research-quant-finance.yml +29 -0
- package/content/methodology/research-simulation.yml +23 -0
- package/dist/cli/commands/adopt.d.ts.map +1 -1
- package/dist/cli/commands/adopt.js +30 -8
- package/dist/cli/commands/adopt.js.map +1 -1
- package/dist/cli/commands/adopt.serialization.test.js +49 -0
- package/dist/cli/commands/adopt.serialization.test.js.map +1 -1
- package/dist/cli/commands/adopt.test.js +8 -0
- package/dist/cli/commands/adopt.test.js.map +1 -1
- package/dist/cli/commands/build.d.ts.map +1 -1
- package/dist/cli/commands/build.js +191 -180
- package/dist/cli/commands/build.js.map +1 -1
- package/dist/cli/commands/complete.d.ts.map +1 -1
- package/dist/cli/commands/complete.js +16 -12
- package/dist/cli/commands/complete.js.map +1 -1
- package/dist/cli/commands/complete.test.js +14 -5
- package/dist/cli/commands/complete.test.js.map +1 -1
- package/dist/cli/commands/init.d.ts +4 -0
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +75 -51
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/init.test.js +33 -27
- package/dist/cli/commands/init.test.js.map +1 -1
- package/dist/cli/commands/reset.d.ts.map +1 -1
- package/dist/cli/commands/reset.js +44 -40
- package/dist/cli/commands/reset.js.map +1 -1
- package/dist/cli/commands/reset.test.js +42 -20
- package/dist/cli/commands/reset.test.js.map +1 -1
- package/dist/cli/commands/rework.d.ts.map +1 -1
- package/dist/cli/commands/rework.js +16 -12
- package/dist/cli/commands/rework.js.map +1 -1
- package/dist/cli/commands/rework.test.js +12 -3
- package/dist/cli/commands/rework.test.js.map +1 -1
- package/dist/cli/commands/run.d.ts.map +1 -1
- package/dist/cli/commands/run.js +318 -298
- package/dist/cli/commands/run.js.map +1 -1
- package/dist/cli/commands/run.test.js +92 -120
- package/dist/cli/commands/run.test.js.map +1 -1
- package/dist/cli/commands/skip.d.ts.map +1 -1
- package/dist/cli/commands/skip.js +19 -15
- package/dist/cli/commands/skip.js.map +1 -1
- package/dist/cli/commands/skip.test.js +22 -11
- package/dist/cli/commands/skip.test.js.map +1 -1
- package/dist/cli/commands/update.d.ts.map +1 -1
- package/dist/cli/commands/update.js +3 -1
- package/dist/cli/commands/update.js.map +1 -1
- package/dist/cli/commands/update.test.js +8 -4
- package/dist/cli/commands/update.test.js.map +1 -1
- package/dist/cli/commands/version.d.ts.map +1 -1
- package/dist/cli/commands/version.js +3 -1
- package/dist/cli/commands/version.js.map +1 -1
- package/dist/cli/commands/version.test.js +9 -5
- package/dist/cli/commands/version.test.js.map +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +2 -0
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init-flag-families.d.ts +6 -1
- package/dist/cli/init-flag-families.d.ts.map +1 -1
- package/dist/cli/init-flag-families.js +32 -1
- package/dist/cli/init-flag-families.js.map +1 -1
- package/dist/cli/init-flag-families.test.js +47 -0
- package/dist/cli/init-flag-families.test.js.map +1 -1
- package/dist/cli/output/interactive.d.ts +1 -0
- package/dist/cli/output/interactive.d.ts.map +1 -1
- package/dist/cli/output/interactive.js +5 -0
- package/dist/cli/output/interactive.js.map +1 -1
- package/dist/cli/shutdown.d.ts +51 -0
- package/dist/cli/shutdown.d.ts.map +1 -0
- package/dist/cli/shutdown.js +199 -0
- package/dist/cli/shutdown.js.map +1 -0
- package/dist/cli/shutdown.test.d.ts +2 -0
- package/dist/cli/shutdown.test.d.ts.map +1 -0
- package/dist/cli/shutdown.test.js +316 -0
- package/dist/cli/shutdown.test.js.map +1 -0
- package/dist/config/schema.d.ts +272 -16
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +25 -1
- package/dist/config/schema.js.map +1 -1
- package/dist/config/schema.test.js +103 -3
- package/dist/config/schema.test.js.map +1 -1
- package/dist/core/assembly/overlay-loader.d.ts +12 -0
- package/dist/core/assembly/overlay-loader.d.ts.map +1 -1
- package/dist/core/assembly/overlay-loader.js +30 -0
- package/dist/core/assembly/overlay-loader.js.map +1 -1
- package/dist/core/assembly/overlay-loader.test.js +66 -1
- package/dist/core/assembly/overlay-loader.test.js.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.d.ts.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.js +48 -19
- package/dist/core/assembly/overlay-state-resolver.js.map +1 -1
- package/dist/core/assembly/overlay-state-resolver.test.js +80 -0
- package/dist/core/assembly/overlay-state-resolver.test.js.map +1 -1
- package/dist/e2e/init.test.js +5 -4
- package/dist/e2e/init.test.js.map +1 -1
- package/dist/e2e/project-type-overlays.test.js +119 -0
- package/dist/e2e/project-type-overlays.test.js.map +1 -1
- package/dist/project/adopt.d.ts.map +1 -1
- package/dist/project/adopt.js +3 -1
- package/dist/project/adopt.js.map +1 -1
- package/dist/project/detectors/disambiguate.js +1 -1
- package/dist/project/detectors/disambiguate.js.map +1 -1
- package/dist/project/detectors/index.d.ts.map +1 -1
- package/dist/project/detectors/index.js +2 -1
- package/dist/project/detectors/index.js.map +1 -1
- package/dist/project/detectors/ml.d.ts.map +1 -1
- package/dist/project/detectors/ml.js +2 -6
- package/dist/project/detectors/ml.js.map +1 -1
- package/dist/project/detectors/research.d.ts +4 -0
- package/dist/project/detectors/research.d.ts.map +1 -0
- package/dist/project/detectors/research.js +141 -0
- package/dist/project/detectors/research.js.map +1 -0
- package/dist/project/detectors/research.test.d.ts +2 -0
- package/dist/project/detectors/research.test.d.ts.map +1 -0
- package/dist/project/detectors/research.test.js +235 -0
- package/dist/project/detectors/research.test.js.map +1 -0
- package/dist/project/detectors/shared-signals.d.ts +3 -0
- package/dist/project/detectors/shared-signals.d.ts.map +1 -0
- package/dist/project/detectors/shared-signals.js +9 -0
- package/dist/project/detectors/shared-signals.js.map +1 -0
- package/dist/project/detectors/types.d.ts +6 -2
- package/dist/project/detectors/types.d.ts.map +1 -1
- package/dist/project/detectors/types.js.map +1 -1
- package/dist/state/lock-manager.d.ts +1 -0
- package/dist/state/lock-manager.d.ts.map +1 -1
- package/dist/state/lock-manager.js +1 -1
- package/dist/state/lock-manager.js.map +1 -1
- package/dist/types/config.d.ts +7 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/wizard/copy/core.d.ts.map +1 -1
- package/dist/wizard/copy/core.js +4 -0
- package/dist/wizard/copy/core.js.map +1 -1
- package/dist/wizard/copy/index.d.ts.map +1 -1
- package/dist/wizard/copy/index.js +2 -0
- package/dist/wizard/copy/index.js.map +1 -1
- package/dist/wizard/copy/research.d.ts +3 -0
- package/dist/wizard/copy/research.d.ts.map +1 -0
- package/dist/wizard/copy/research.js +27 -0
- package/dist/wizard/copy/research.js.map +1 -0
- package/dist/wizard/copy/types.d.ts +5 -1
- package/dist/wizard/copy/types.d.ts.map +1 -1
- package/dist/wizard/flags.d.ts +7 -1
- package/dist/wizard/flags.d.ts.map +1 -1
- package/dist/wizard/questions.d.ts +4 -2
- package/dist/wizard/questions.d.ts.map +1 -1
- package/dist/wizard/questions.js +27 -1
- package/dist/wizard/questions.js.map +1 -1
- package/dist/wizard/questions.test.js +51 -0
- package/dist/wizard/questions.test.js.map +1 -1
- package/dist/wizard/wizard.d.ts +3 -2
- package/dist/wizard/wizard.d.ts.map +1 -1
- package/dist/wizard/wizard.js +3 -1
- package/dist/wizard/wizard.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: research-security
|
|
3
|
+
description: Security for autonomous research agents including sandboxing experiment execution, resource limits, credential isolation, and code injection prevention
|
|
4
|
+
topics: [research, security, sandboxing, resource-limits, credentials, code-injection, autonomous-agents]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Autonomous research agents run code, modify files, and access data without human oversight during each iteration. This creates a security surface that traditional software projects do not have: the agent can accidentally run destructive operations, consume unbounded resources, leak credentials through experiment outputs, or produce code that introduces injection vulnerabilities. Security must be enforced at the infrastructure level -- relying on the agent to "be careful" is not a security strategy.
|
|
8
|
+
|
|
9
|
+
## Summary
|
|
10
|
+
|
|
11
|
+
Sandbox experiment runs using OS-level isolation (containers, cgroups, filesystem restrictions) so that a runaway experiment cannot affect the host system. Enforce resource limits (CPU time, memory, disk, network) at the process level. Isolate credentials from experiment code using environment injection with read-only access. Prevent code injection by validating all experiment-generated code before running it. Log all agent actions for audit and anomaly detection.
|
|
12
|
+
|
|
13
|
+
## Deep Guidance
|
|
14
|
+
|
|
15
|
+
### Sandboxing Experiment Execution
|
|
16
|
+
|
|
17
|
+
Every experiment run should run in an isolated environment that limits blast radius:
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
# src/security/sandbox.py
|
|
21
|
+
import subprocess
|
|
22
|
+
import os
|
|
23
|
+
import signal
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class SandboxLimits:
|
|
28
|
+
"""Resource limits for sandboxed experiment runs."""
|
|
29
|
+
max_cpu_seconds: int = 300 # 5 minutes per run
|
|
30
|
+
max_memory_mb: int = 4096 # 4 GB
|
|
31
|
+
max_disk_mb: int = 1024 # 1 GB scratch space
|
|
32
|
+
max_processes: int = 32 # Subprocess limit
|
|
33
|
+
network_enabled: bool = False # Disable network by default
|
|
34
|
+
writable_paths: list[str] | None = None # Whitelist
|
|
35
|
+
|
|
36
|
+
class ProcessSandbox:
|
|
37
|
+
"""
|
|
38
|
+
Sandboxed run using OS-level resource limits.
|
|
39
|
+
Uses ulimit on Linux/macOS for basic resource control.
|
|
40
|
+
For production use, prefer containers (Docker) or VMs.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, limits: SandboxLimits):
|
|
44
|
+
self.limits = limits
|
|
45
|
+
|
|
46
|
+
def run_sandboxed(self, command: list[str], cwd: str | None = None,
|
|
47
|
+
env: dict[str, str] | None = None) -> subprocess.CompletedProcess:
|
|
48
|
+
"""Run a command within resource limits."""
|
|
49
|
+
safe_env = self._build_safe_env(env)
|
|
50
|
+
|
|
51
|
+
def set_limits():
|
|
52
|
+
import resource
|
|
53
|
+
# CPU time limit
|
|
54
|
+
resource.setrlimit(
|
|
55
|
+
resource.RLIMIT_CPU,
|
|
56
|
+
(self.limits.max_cpu_seconds, self.limits.max_cpu_seconds),
|
|
57
|
+
)
|
|
58
|
+
# Memory limit
|
|
59
|
+
mem_bytes = self.limits.max_memory_mb * 1024 * 1024
|
|
60
|
+
resource.setrlimit(resource.RLIMIT_AS, (mem_bytes, mem_bytes))
|
|
61
|
+
# Process limit
|
|
62
|
+
resource.setrlimit(
|
|
63
|
+
resource.RLIMIT_NPROC,
|
|
64
|
+
(self.limits.max_processes, self.limits.max_processes),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
result = subprocess.run(
|
|
69
|
+
command,
|
|
70
|
+
cwd=cwd,
|
|
71
|
+
env=safe_env,
|
|
72
|
+
capture_output=True,
|
|
73
|
+
text=True,
|
|
74
|
+
timeout=self.limits.max_cpu_seconds + 30,
|
|
75
|
+
preexec_fn=set_limits,
|
|
76
|
+
)
|
|
77
|
+
return result
|
|
78
|
+
except subprocess.TimeoutExpired:
|
|
79
|
+
raise RuntimeError(
|
|
80
|
+
f"Experiment exceeded time limit ({self.limits.max_cpu_seconds}s)"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def _build_safe_env(self, extra_env: dict[str, str] | None = None) -> dict[str, str]:
|
|
84
|
+
"""Build a minimal, safe environment."""
|
|
85
|
+
safe = {
|
|
86
|
+
"PATH": "/usr/local/bin:/usr/bin:/bin",
|
|
87
|
+
"HOME": os.environ.get("HOME", "/tmp"),
|
|
88
|
+
"LANG": "en_US.UTF-8",
|
|
89
|
+
}
|
|
90
|
+
venv = os.environ.get("VIRTUAL_ENV")
|
|
91
|
+
if venv:
|
|
92
|
+
safe["VIRTUAL_ENV"] = venv
|
|
93
|
+
safe["PATH"] = f"{venv}/bin:{safe['PATH']}"
|
|
94
|
+
if extra_env:
|
|
95
|
+
safe.update(extra_env)
|
|
96
|
+
return safe
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Container-Based Isolation
|
|
100
|
+
|
|
101
|
+
For stronger isolation, run experiments in containers:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
# src/security/container_sandbox.py
|
|
105
|
+
import subprocess
|
|
106
|
+
from dataclasses import dataclass
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class ContainerConfig:
|
|
110
|
+
image: str = "python:3.11-slim"
|
|
111
|
+
memory_limit: str = "4g"
|
|
112
|
+
cpu_limit: str = "2.0"
|
|
113
|
+
network_mode: str = "none"
|
|
114
|
+
read_only_root: bool = True
|
|
115
|
+
tmpfs_size: str = "1g"
|
|
116
|
+
|
|
117
|
+
def run_in_container(command: str, config: ContainerConfig,
|
|
118
|
+
volumes: dict[str, str] | None = None,
|
|
119
|
+
env: dict[str, str] | None = None) -> dict:
|
|
120
|
+
"""Run an experiment command inside a Docker container."""
|
|
121
|
+
docker_cmd = [
|
|
122
|
+
"docker", "run", "--rm",
|
|
123
|
+
"--memory", config.memory_limit,
|
|
124
|
+
"--cpus", config.cpu_limit,
|
|
125
|
+
"--network", config.network_mode,
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
if config.read_only_root:
|
|
129
|
+
docker_cmd.extend(["--read-only", "--tmpfs", f"/tmp:size={config.tmpfs_size}"])
|
|
130
|
+
|
|
131
|
+
if volumes:
|
|
132
|
+
for host_path, container_path in volumes.items():
|
|
133
|
+
docker_cmd.extend(["-v", f"{host_path}:{container_path}"])
|
|
134
|
+
|
|
135
|
+
if env:
|
|
136
|
+
for key, value in env.items():
|
|
137
|
+
docker_cmd.extend(["-e", f"{key}={value}"])
|
|
138
|
+
|
|
139
|
+
docker_cmd.extend([config.image, "bash", "-c", command])
|
|
140
|
+
|
|
141
|
+
result = subprocess.run(docker_cmd, capture_output=True, text=True, timeout=600)
|
|
142
|
+
return {
|
|
143
|
+
"stdout": result.stdout,
|
|
144
|
+
"stderr": result.stderr,
|
|
145
|
+
"returncode": result.returncode,
|
|
146
|
+
}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Resource Limits
|
|
150
|
+
|
|
151
|
+
Enforce limits at multiple levels to prevent runaway experiments:
|
|
152
|
+
|
|
153
|
+
| Resource | Limit | Enforcement |
|
|
154
|
+
|----------|-------|-------------|
|
|
155
|
+
| CPU time | Per-run timeout | `subprocess.timeout` + `RLIMIT_CPU` |
|
|
156
|
+
| Memory | Per-process cap | `RLIMIT_AS` or Docker `--memory` |
|
|
157
|
+
| Disk | Scratch space quota | `tmpfs` with size limit |
|
|
158
|
+
| Network | Disabled by default | Docker `--network none` or firewall |
|
|
159
|
+
| Processes | Fork bomb prevention | `RLIMIT_NPROC` |
|
|
160
|
+
| GPU memory | Per-process fraction | `CUDA_VISIBLE_DEVICES` + framework limits |
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
# GPU resource limiting
|
|
164
|
+
import os
|
|
165
|
+
|
|
166
|
+
def limit_gpu(device_ids: list[int], memory_fraction: float = 0.5) -> None:
|
|
167
|
+
"""Restrict GPU access for the current process."""
|
|
168
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in device_ids)
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
import torch
|
|
172
|
+
if torch.cuda.is_available():
|
|
173
|
+
torch.cuda.set_per_process_memory_fraction(memory_fraction)
|
|
174
|
+
except ImportError:
|
|
175
|
+
pass
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Credential Isolation
|
|
179
|
+
|
|
180
|
+
Credentials (API keys, database passwords) must never be accessible to experiment code directly:
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
# src/security/credentials.py
|
|
184
|
+
import os
|
|
185
|
+
from typing import Any
|
|
186
|
+
|
|
187
|
+
# Credentials that experiment code should NEVER have access to
|
|
188
|
+
BLOCKED_ENV_VARS = {
|
|
189
|
+
"AWS_SECRET_ACCESS_KEY",
|
|
190
|
+
"AWS_SESSION_TOKEN",
|
|
191
|
+
"DATABASE_URL",
|
|
192
|
+
"GITHUB_TOKEN",
|
|
193
|
+
"GIT_AUTHOR_EMAIL",
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
def filtered_environment(allowed_vars: set[str] | None = None) -> dict[str, str]:
|
|
197
|
+
"""
|
|
198
|
+
Return a filtered copy of environment variables.
|
|
199
|
+
Blocks all known credential variables.
|
|
200
|
+
Optionally restricts to only explicitly allowed variables.
|
|
201
|
+
"""
|
|
202
|
+
env = {}
|
|
203
|
+
for key, value in os.environ.items():
|
|
204
|
+
if key in BLOCKED_ENV_VARS:
|
|
205
|
+
continue
|
|
206
|
+
if allowed_vars is not None and key not in allowed_vars:
|
|
207
|
+
continue
|
|
208
|
+
env[key] = value
|
|
209
|
+
return env
|
|
210
|
+
|
|
211
|
+
def inject_data_credentials(env: dict[str, str],
|
|
212
|
+
data_config: dict[str, Any]) -> dict[str, str]:
|
|
213
|
+
"""
|
|
214
|
+
Inject data access credentials into experiment environment.
|
|
215
|
+
These are read-only credentials with minimal scope.
|
|
216
|
+
"""
|
|
217
|
+
result = env.copy()
|
|
218
|
+
if "data_source_path" in data_config:
|
|
219
|
+
result["DATA_SOURCE_PATH"] = data_config["data_source_path"]
|
|
220
|
+
if "api_key_env" in data_config:
|
|
221
|
+
key = os.environ.get(data_config["api_key_env"], "")
|
|
222
|
+
if key:
|
|
223
|
+
result["DATA_API_KEY"] = key
|
|
224
|
+
return result
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### Code Injection Prevention
|
|
228
|
+
|
|
229
|
+
For code-driven experiments where the agent modifies source files, validate the generated code before running it:
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
# src/security/code_validator.py
|
|
233
|
+
import ast
|
|
234
|
+
import re
|
|
235
|
+
from pathlib import Path
|
|
236
|
+
|
|
237
|
+
DANGEROUS_PATTERNS = [
|
|
238
|
+
r"os\.system\(",
|
|
239
|
+
r"shutil\.rmtree\(",
|
|
240
|
+
r"__import__\(",
|
|
241
|
+
r"compile\(",
|
|
242
|
+
r"importlib\.import_module",
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
def validate_experiment_code(file_path: str) -> list[str]:
|
|
246
|
+
"""
|
|
247
|
+
Validate that experiment code does not contain dangerous patterns.
|
|
248
|
+
Returns list of violations (empty means safe).
|
|
249
|
+
"""
|
|
250
|
+
violations = []
|
|
251
|
+
content = Path(file_path).read_text()
|
|
252
|
+
|
|
253
|
+
# Regex-based pattern detection
|
|
254
|
+
for pattern in DANGEROUS_PATTERNS:
|
|
255
|
+
matches = re.findall(pattern, content)
|
|
256
|
+
if matches:
|
|
257
|
+
violations.append(
|
|
258
|
+
f"Dangerous pattern detected: {pattern} ({len(matches)} occurrences)"
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# AST-based analysis for import validation
|
|
262
|
+
try:
|
|
263
|
+
tree = ast.parse(content)
|
|
264
|
+
for node in ast.walk(tree):
|
|
265
|
+
if isinstance(node, ast.Import):
|
|
266
|
+
for alias in node.names:
|
|
267
|
+
if alias.name in ("shutil", "ctypes"):
|
|
268
|
+
violations.append(
|
|
269
|
+
f"Forbidden import: {alias.name} (line {node.lineno})"
|
|
270
|
+
)
|
|
271
|
+
elif isinstance(node, ast.ImportFrom):
|
|
272
|
+
if node.module in ("shutil", "ctypes"):
|
|
273
|
+
violations.append(
|
|
274
|
+
f"Forbidden import from: {node.module} (line {node.lineno})"
|
|
275
|
+
)
|
|
276
|
+
except SyntaxError as e:
|
|
277
|
+
violations.append(f"Syntax error in generated code: {e}")
|
|
278
|
+
|
|
279
|
+
return violations
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### Filesystem Access Control
|
|
283
|
+
|
|
284
|
+
Restrict which paths the experiment can read and write:
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
# src/security/filesystem.py
|
|
288
|
+
from pathlib import Path
|
|
289
|
+
|
|
290
|
+
class FilesystemPolicy:
|
|
291
|
+
"""Define read/write permissions for experiment runs."""
|
|
292
|
+
|
|
293
|
+
def __init__(self, project_root: str):
|
|
294
|
+
self.root = Path(project_root).resolve()
|
|
295
|
+
self.readable = {
|
|
296
|
+
self.root / "src",
|
|
297
|
+
self.root / "configs",
|
|
298
|
+
self.root / "data" / "raw",
|
|
299
|
+
self.root / "data" / "processed",
|
|
300
|
+
self.root / "tests" / "fixtures",
|
|
301
|
+
}
|
|
302
|
+
self.writable = {
|
|
303
|
+
self.root / "results",
|
|
304
|
+
self.root / "src" / "strategies", # Code-driven: agent writes here
|
|
305
|
+
Path("/tmp"),
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
def can_read(self, path: str) -> bool:
|
|
309
|
+
resolved = Path(path).resolve()
|
|
310
|
+
return any(
|
|
311
|
+
resolved == allowed or resolved.is_relative_to(allowed)
|
|
312
|
+
for allowed in self.readable | self.writable
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
def can_write(self, path: str) -> bool:
|
|
316
|
+
resolved = Path(path).resolve()
|
|
317
|
+
return any(
|
|
318
|
+
resolved == allowed or resolved.is_relative_to(allowed)
|
|
319
|
+
for allowed in self.writable
|
|
320
|
+
)
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### Audit Logging
|
|
324
|
+
|
|
325
|
+
Log all agent actions for post-hoc review and anomaly detection:
|
|
326
|
+
|
|
327
|
+
```python
|
|
328
|
+
# src/security/audit.py
|
|
329
|
+
import json
|
|
330
|
+
import logging
|
|
331
|
+
from datetime import datetime
|
|
332
|
+
from pathlib import Path
|
|
333
|
+
|
|
334
|
+
class AuditLogger:
|
|
335
|
+
"""Structured audit log for agent actions."""
|
|
336
|
+
|
|
337
|
+
def __init__(self, log_path: str):
|
|
338
|
+
self.log_path = Path(log_path)
|
|
339
|
+
self.log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
340
|
+
self.logger = logging.getLogger("audit")
|
|
341
|
+
|
|
342
|
+
def log_action(self, action: str, details: dict) -> None:
|
|
343
|
+
record = {
|
|
344
|
+
"timestamp": datetime.now().isoformat(),
|
|
345
|
+
"action": action,
|
|
346
|
+
**details,
|
|
347
|
+
}
|
|
348
|
+
with open(self.log_path, "a") as f:
|
|
349
|
+
f.write(json.dumps(record) + "\n")
|
|
350
|
+
self.logger.info("AUDIT: %s -- %s", action, json.dumps(details))
|
|
351
|
+
|
|
352
|
+
def log_file_write(self, path: str, size: int) -> None:
|
|
353
|
+
self.log_action("file_write", {"path": path, "size_bytes": size})
|
|
354
|
+
|
|
355
|
+
def log_process_run(self, command: list[str], exit_code: int) -> None:
|
|
356
|
+
self.log_action("process_run", {
|
|
357
|
+
"command": command[:5], # Truncate for safety
|
|
358
|
+
"exit_code": exit_code,
|
|
359
|
+
})
|
|
360
|
+
|
|
361
|
+
def log_credential_access(self, credential_name: str) -> None:
|
|
362
|
+
self.log_action("credential_access", {"credential": credential_name})
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
### Security Checklist for Research Projects
|
|
366
|
+
|
|
367
|
+
1. All experiment runs use a sandbox (process limits or container).
|
|
368
|
+
2. Network access is disabled by default (enable only for API-driven experiments with explicit allowlist).
|
|
369
|
+
3. Credentials are injected via environment variables, never in config files or source code.
|
|
370
|
+
4. Agent-generated code is validated before running.
|
|
371
|
+
5. Filesystem writes are restricted to designated directories.
|
|
372
|
+
6. Resource limits are enforced at the OS level, not in application code.
|
|
373
|
+
7. All agent actions are audit-logged.
|
|
374
|
+
8. Results directory is treated as untrusted output (sanitize before display).
|