@zigrivers/scaffold 3.13.0 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/README.md +32 -10
  2. package/content/knowledge/research/research-architecture.md +385 -0
  3. package/content/knowledge/research/research-conventions.md +248 -0
  4. package/content/knowledge/research/research-dev-environment.md +303 -0
  5. package/content/knowledge/research/research-experiment-loop.md +429 -0
  6. package/content/knowledge/research/research-experiment-tracking.md +336 -0
  7. package/content/knowledge/research/research-ml-architecture-search.md +383 -0
  8. package/content/knowledge/research/research-ml-evaluation.md +407 -0
  9. package/content/knowledge/research/research-ml-experiment-tracking.md +466 -0
  10. package/content/knowledge/research/research-ml-training-patterns.md +413 -0
  11. package/content/knowledge/research/research-observability.md +395 -0
  12. package/content/knowledge/research/research-overfitting-prevention.md +306 -0
  13. package/content/knowledge/research/research-project-structure.md +264 -0
  14. package/content/knowledge/research/research-quant-backtesting.md +326 -0
  15. package/content/knowledge/research/research-quant-market-data.md +366 -0
  16. package/content/knowledge/research/research-quant-metrics.md +335 -0
  17. package/content/knowledge/research/research-quant-requirements.md +223 -0
  18. package/content/knowledge/research/research-quant-risk.md +469 -0
  19. package/content/knowledge/research/research-quant-strategy-patterns.md +412 -0
  20. package/content/knowledge/research/research-requirements.md +201 -0
  21. package/content/knowledge/research/research-security.md +374 -0
  22. package/content/knowledge/research/research-sim-compute-management.md +538 -0
  23. package/content/knowledge/research/research-sim-engine-patterns.md +448 -0
  24. package/content/knowledge/research/research-sim-parameter-spaces.md +425 -0
  25. package/content/knowledge/research/research-sim-validation.md +456 -0
  26. package/content/knowledge/research/research-testing.md +334 -0
  27. package/content/methodology/research-ml-research.yml +23 -0
  28. package/content/methodology/research-overlay.yml +65 -0
  29. package/content/methodology/research-quant-finance.yml +29 -0
  30. package/content/methodology/research-simulation.yml +23 -0
  31. package/dist/cli/commands/adopt.d.ts.map +1 -1
  32. package/dist/cli/commands/adopt.js +30 -8
  33. package/dist/cli/commands/adopt.js.map +1 -1
  34. package/dist/cli/commands/adopt.serialization.test.js +49 -0
  35. package/dist/cli/commands/adopt.serialization.test.js.map +1 -1
  36. package/dist/cli/commands/adopt.test.js +8 -0
  37. package/dist/cli/commands/adopt.test.js.map +1 -1
  38. package/dist/cli/commands/build.d.ts.map +1 -1
  39. package/dist/cli/commands/build.js +191 -180
  40. package/dist/cli/commands/build.js.map +1 -1
  41. package/dist/cli/commands/complete.d.ts.map +1 -1
  42. package/dist/cli/commands/complete.js +16 -12
  43. package/dist/cli/commands/complete.js.map +1 -1
  44. package/dist/cli/commands/complete.test.js +14 -5
  45. package/dist/cli/commands/complete.test.js.map +1 -1
  46. package/dist/cli/commands/init.d.ts +4 -0
  47. package/dist/cli/commands/init.d.ts.map +1 -1
  48. package/dist/cli/commands/init.js +75 -51
  49. package/dist/cli/commands/init.js.map +1 -1
  50. package/dist/cli/commands/init.test.js +33 -27
  51. package/dist/cli/commands/init.test.js.map +1 -1
  52. package/dist/cli/commands/reset.d.ts.map +1 -1
  53. package/dist/cli/commands/reset.js +44 -40
  54. package/dist/cli/commands/reset.js.map +1 -1
  55. package/dist/cli/commands/reset.test.js +42 -20
  56. package/dist/cli/commands/reset.test.js.map +1 -1
  57. package/dist/cli/commands/rework.d.ts.map +1 -1
  58. package/dist/cli/commands/rework.js +16 -12
  59. package/dist/cli/commands/rework.js.map +1 -1
  60. package/dist/cli/commands/rework.test.js +12 -3
  61. package/dist/cli/commands/rework.test.js.map +1 -1
  62. package/dist/cli/commands/run.d.ts.map +1 -1
  63. package/dist/cli/commands/run.js +318 -298
  64. package/dist/cli/commands/run.js.map +1 -1
  65. package/dist/cli/commands/run.test.js +92 -120
  66. package/dist/cli/commands/run.test.js.map +1 -1
  67. package/dist/cli/commands/skip.d.ts.map +1 -1
  68. package/dist/cli/commands/skip.js +19 -15
  69. package/dist/cli/commands/skip.js.map +1 -1
  70. package/dist/cli/commands/skip.test.js +22 -11
  71. package/dist/cli/commands/skip.test.js.map +1 -1
  72. package/dist/cli/commands/update.d.ts.map +1 -1
  73. package/dist/cli/commands/update.js +3 -1
  74. package/dist/cli/commands/update.js.map +1 -1
  75. package/dist/cli/commands/update.test.js +8 -4
  76. package/dist/cli/commands/update.test.js.map +1 -1
  77. package/dist/cli/commands/version.d.ts.map +1 -1
  78. package/dist/cli/commands/version.js +3 -1
  79. package/dist/cli/commands/version.js.map +1 -1
  80. package/dist/cli/commands/version.test.js +9 -5
  81. package/dist/cli/commands/version.test.js.map +1 -1
  82. package/dist/cli/index.d.ts.map +1 -1
  83. package/dist/cli/index.js +2 -0
  84. package/dist/cli/index.js.map +1 -1
  85. package/dist/cli/init-flag-families.d.ts +6 -1
  86. package/dist/cli/init-flag-families.d.ts.map +1 -1
  87. package/dist/cli/init-flag-families.js +32 -1
  88. package/dist/cli/init-flag-families.js.map +1 -1
  89. package/dist/cli/init-flag-families.test.js +47 -0
  90. package/dist/cli/init-flag-families.test.js.map +1 -1
  91. package/dist/cli/output/interactive.d.ts +1 -0
  92. package/dist/cli/output/interactive.d.ts.map +1 -1
  93. package/dist/cli/output/interactive.js +5 -0
  94. package/dist/cli/output/interactive.js.map +1 -1
  95. package/dist/cli/shutdown.d.ts +51 -0
  96. package/dist/cli/shutdown.d.ts.map +1 -0
  97. package/dist/cli/shutdown.js +199 -0
  98. package/dist/cli/shutdown.js.map +1 -0
  99. package/dist/cli/shutdown.test.d.ts +2 -0
  100. package/dist/cli/shutdown.test.d.ts.map +1 -0
  101. package/dist/cli/shutdown.test.js +316 -0
  102. package/dist/cli/shutdown.test.js.map +1 -0
  103. package/dist/config/schema.d.ts +272 -16
  104. package/dist/config/schema.d.ts.map +1 -1
  105. package/dist/config/schema.js +25 -1
  106. package/dist/config/schema.js.map +1 -1
  107. package/dist/config/schema.test.js +103 -3
  108. package/dist/config/schema.test.js.map +1 -1
  109. package/dist/core/assembly/overlay-loader.d.ts +12 -0
  110. package/dist/core/assembly/overlay-loader.d.ts.map +1 -1
  111. package/dist/core/assembly/overlay-loader.js +30 -0
  112. package/dist/core/assembly/overlay-loader.js.map +1 -1
  113. package/dist/core/assembly/overlay-loader.test.js +66 -1
  114. package/dist/core/assembly/overlay-loader.test.js.map +1 -1
  115. package/dist/core/assembly/overlay-state-resolver.d.ts.map +1 -1
  116. package/dist/core/assembly/overlay-state-resolver.js +48 -19
  117. package/dist/core/assembly/overlay-state-resolver.js.map +1 -1
  118. package/dist/core/assembly/overlay-state-resolver.test.js +80 -0
  119. package/dist/core/assembly/overlay-state-resolver.test.js.map +1 -1
  120. package/dist/e2e/init.test.js +5 -4
  121. package/dist/e2e/init.test.js.map +1 -1
  122. package/dist/e2e/project-type-overlays.test.js +119 -0
  123. package/dist/e2e/project-type-overlays.test.js.map +1 -1
  124. package/dist/project/adopt.d.ts.map +1 -1
  125. package/dist/project/adopt.js +3 -1
  126. package/dist/project/adopt.js.map +1 -1
  127. package/dist/project/detectors/disambiguate.js +1 -1
  128. package/dist/project/detectors/disambiguate.js.map +1 -1
  129. package/dist/project/detectors/index.d.ts.map +1 -1
  130. package/dist/project/detectors/index.js +2 -1
  131. package/dist/project/detectors/index.js.map +1 -1
  132. package/dist/project/detectors/ml.d.ts.map +1 -1
  133. package/dist/project/detectors/ml.js +2 -6
  134. package/dist/project/detectors/ml.js.map +1 -1
  135. package/dist/project/detectors/research.d.ts +4 -0
  136. package/dist/project/detectors/research.d.ts.map +1 -0
  137. package/dist/project/detectors/research.js +141 -0
  138. package/dist/project/detectors/research.js.map +1 -0
  139. package/dist/project/detectors/research.test.d.ts +2 -0
  140. package/dist/project/detectors/research.test.d.ts.map +1 -0
  141. package/dist/project/detectors/research.test.js +235 -0
  142. package/dist/project/detectors/research.test.js.map +1 -0
  143. package/dist/project/detectors/shared-signals.d.ts +3 -0
  144. package/dist/project/detectors/shared-signals.d.ts.map +1 -0
  145. package/dist/project/detectors/shared-signals.js +9 -0
  146. package/dist/project/detectors/shared-signals.js.map +1 -0
  147. package/dist/project/detectors/types.d.ts +6 -2
  148. package/dist/project/detectors/types.d.ts.map +1 -1
  149. package/dist/project/detectors/types.js.map +1 -1
  150. package/dist/state/lock-manager.d.ts +1 -0
  151. package/dist/state/lock-manager.d.ts.map +1 -1
  152. package/dist/state/lock-manager.js +1 -1
  153. package/dist/state/lock-manager.js.map +1 -1
  154. package/dist/types/config.d.ts +7 -1
  155. package/dist/types/config.d.ts.map +1 -1
  156. package/dist/wizard/copy/core.d.ts.map +1 -1
  157. package/dist/wizard/copy/core.js +4 -0
  158. package/dist/wizard/copy/core.js.map +1 -1
  159. package/dist/wizard/copy/index.d.ts.map +1 -1
  160. package/dist/wizard/copy/index.js +2 -0
  161. package/dist/wizard/copy/index.js.map +1 -1
  162. package/dist/wizard/copy/research.d.ts +3 -0
  163. package/dist/wizard/copy/research.d.ts.map +1 -0
  164. package/dist/wizard/copy/research.js +27 -0
  165. package/dist/wizard/copy/research.js.map +1 -0
  166. package/dist/wizard/copy/types.d.ts +5 -1
  167. package/dist/wizard/copy/types.d.ts.map +1 -1
  168. package/dist/wizard/flags.d.ts +7 -1
  169. package/dist/wizard/flags.d.ts.map +1 -1
  170. package/dist/wizard/questions.d.ts +4 -2
  171. package/dist/wizard/questions.d.ts.map +1 -1
  172. package/dist/wizard/questions.js +27 -1
  173. package/dist/wizard/questions.js.map +1 -1
  174. package/dist/wizard/questions.test.js +51 -0
  175. package/dist/wizard/questions.test.js.map +1 -1
  176. package/dist/wizard/wizard.d.ts +3 -2
  177. package/dist/wizard/wizard.d.ts.map +1 -1
  178. package/dist/wizard/wizard.js +3 -1
  179. package/dist/wizard/wizard.js.map +1 -1
  180. package/package.json +1 -1
@@ -0,0 +1,374 @@
1
+ ---
2
+ name: research-security
3
+ description: Security for autonomous research agents including sandboxing experiment execution, resource limits, credential isolation, and code injection prevention
4
+ topics: [research, security, sandboxing, resource-limits, credentials, code-injection, autonomous-agents]
5
+ ---
6
+
7
+ Autonomous research agents run code, modify files, and access data without human oversight during each iteration. This creates a security surface that traditional software projects do not have: the agent can accidentally run destructive operations, consume unbounded resources, leak credentials through experiment outputs, or produce code that introduces injection vulnerabilities. Security must be enforced at the infrastructure level -- relying on the agent to "be careful" is not a security strategy.
8
+
9
+ ## Summary
10
+
11
+ Sandbox experiment runs using OS-level isolation (containers, cgroups, filesystem restrictions) so that a runaway experiment cannot affect the host system. Enforce resource limits (CPU time, memory, disk, network) at the process level. Isolate credentials from experiment code using environment injection with read-only access. Prevent code injection by validating all experiment-generated code before running it. Log all agent actions for audit and anomaly detection.
12
+
13
+ ## Deep Guidance
14
+
15
+ ### Sandboxing Experiment Execution
16
+
17
+ Every experiment run should run in an isolated environment that limits blast radius:
18
+
19
+ ```python
20
+ # src/security/sandbox.py
21
+ import subprocess
22
+ import os
23
+ import signal
24
+ from dataclasses import dataclass
25
+
26
+ @dataclass
27
+ class SandboxLimits:
28
+ """Resource limits for sandboxed experiment runs."""
29
+ max_cpu_seconds: int = 300 # 5 minutes per run
30
+ max_memory_mb: int = 4096 # 4 GB
31
+ max_disk_mb: int = 1024 # 1 GB scratch space
32
+ max_processes: int = 32 # Subprocess limit
33
+ network_enabled: bool = False # Disable network by default
34
+ writable_paths: list[str] | None = None # Whitelist
35
+
36
+ class ProcessSandbox:
37
+ """
38
+ Sandboxed run using OS-level resource limits.
39
+ Uses ulimit on Linux/macOS for basic resource control.
40
+ For production use, prefer containers (Docker) or VMs.
41
+ """
42
+
43
+ def __init__(self, limits: SandboxLimits):
44
+ self.limits = limits
45
+
46
+ def run_sandboxed(self, command: list[str], cwd: str | None = None,
47
+ env: dict[str, str] | None = None) -> subprocess.CompletedProcess:
48
+ """Run a command within resource limits."""
49
+ safe_env = self._build_safe_env(env)
50
+
51
+ def set_limits():
52
+ import resource
53
+ # CPU time limit
54
+ resource.setrlimit(
55
+ resource.RLIMIT_CPU,
56
+ (self.limits.max_cpu_seconds, self.limits.max_cpu_seconds),
57
+ )
58
+ # Memory limit
59
+ mem_bytes = self.limits.max_memory_mb * 1024 * 1024
60
+ resource.setrlimit(resource.RLIMIT_AS, (mem_bytes, mem_bytes))
61
+ # Process limit
62
+ resource.setrlimit(
63
+ resource.RLIMIT_NPROC,
64
+ (self.limits.max_processes, self.limits.max_processes),
65
+ )
66
+
67
+ try:
68
+ result = subprocess.run(
69
+ command,
70
+ cwd=cwd,
71
+ env=safe_env,
72
+ capture_output=True,
73
+ text=True,
74
+ timeout=self.limits.max_cpu_seconds + 30,
75
+ preexec_fn=set_limits,
76
+ )
77
+ return result
78
+ except subprocess.TimeoutExpired:
79
+ raise RuntimeError(
80
+ f"Experiment exceeded time limit ({self.limits.max_cpu_seconds}s)"
81
+ )
82
+
83
+ def _build_safe_env(self, extra_env: dict[str, str] | None = None) -> dict[str, str]:
84
+ """Build a minimal, safe environment."""
85
+ safe = {
86
+ "PATH": "/usr/local/bin:/usr/bin:/bin",
87
+ "HOME": os.environ.get("HOME", "/tmp"),
88
+ "LANG": "en_US.UTF-8",
89
+ }
90
+ venv = os.environ.get("VIRTUAL_ENV")
91
+ if venv:
92
+ safe["VIRTUAL_ENV"] = venv
93
+ safe["PATH"] = f"{venv}/bin:{safe['PATH']}"
94
+ if extra_env:
95
+ safe.update(extra_env)
96
+ return safe
97
+ ```
98
+
99
+ ### Container-Based Isolation
100
+
101
+ For stronger isolation, run experiments in containers:
102
+
103
+ ```python
104
+ # src/security/container_sandbox.py
105
+ import subprocess
106
+ from dataclasses import dataclass
107
+
108
+ @dataclass
109
+ class ContainerConfig:
110
+ image: str = "python:3.11-slim"
111
+ memory_limit: str = "4g"
112
+ cpu_limit: str = "2.0"
113
+ network_mode: str = "none"
114
+ read_only_root: bool = True
115
+ tmpfs_size: str = "1g"
116
+
117
+ def run_in_container(command: str, config: ContainerConfig,
118
+ volumes: dict[str, str] | None = None,
119
+ env: dict[str, str] | None = None) -> dict:
120
+ """Run an experiment command inside a Docker container."""
121
+ docker_cmd = [
122
+ "docker", "run", "--rm",
123
+ "--memory", config.memory_limit,
124
+ "--cpus", config.cpu_limit,
125
+ "--network", config.network_mode,
126
+ ]
127
+
128
+ if config.read_only_root:
129
+ docker_cmd.extend(["--read-only", "--tmpfs", f"/tmp:size={config.tmpfs_size}"])
130
+
131
+ if volumes:
132
+ for host_path, container_path in volumes.items():
133
+ docker_cmd.extend(["-v", f"{host_path}:{container_path}"])
134
+
135
+ if env:
136
+ for key, value in env.items():
137
+ docker_cmd.extend(["-e", f"{key}={value}"])
138
+
139
+ docker_cmd.extend([config.image, "bash", "-c", command])
140
+
141
+ result = subprocess.run(docker_cmd, capture_output=True, text=True, timeout=600)
142
+ return {
143
+ "stdout": result.stdout,
144
+ "stderr": result.stderr,
145
+ "returncode": result.returncode,
146
+ }
147
+ ```
148
+
149
+ ### Resource Limits
150
+
151
+ Enforce limits at multiple levels to prevent runaway experiments:
152
+
153
+ | Resource | Limit | Enforcement |
154
+ |----------|-------|-------------|
155
+ | CPU time | Per-run timeout | `subprocess.timeout` + `RLIMIT_CPU` |
156
+ | Memory | Per-process cap | `RLIMIT_AS` or Docker `--memory` |
157
+ | Disk | Scratch space quota | `tmpfs` with size limit |
158
+ | Network | Disabled by default | Docker `--network none` or firewall |
159
+ | Processes | Fork bomb prevention | `RLIMIT_NPROC` |
160
+ | GPU memory | Per-process fraction | `CUDA_VISIBLE_DEVICES` + framework limits |
161
+
162
+ ```python
163
+ # GPU resource limiting
164
+ import os
165
+
166
+ def limit_gpu(device_ids: list[int], memory_fraction: float = 0.5) -> None:
167
+ """Restrict GPU access for the current process."""
168
+ os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in device_ids)
169
+
170
+ try:
171
+ import torch
172
+ if torch.cuda.is_available():
173
+ torch.cuda.set_per_process_memory_fraction(memory_fraction)
174
+ except ImportError:
175
+ pass
176
+ ```
177
+
178
+ ### Credential Isolation
179
+
180
+ Credentials (API keys, database passwords) must never be accessible to experiment code directly:
181
+
182
+ ```python
183
+ # src/security/credentials.py
184
+ import os
185
+ from typing import Any
186
+
187
+ # Credentials that experiment code should NEVER have access to
188
+ BLOCKED_ENV_VARS = {
189
+ "AWS_SECRET_ACCESS_KEY",
190
+ "AWS_SESSION_TOKEN",
191
+ "DATABASE_URL",
192
+ "GITHUB_TOKEN",
193
+ "GIT_AUTHOR_EMAIL",
194
+ }
195
+
196
+ def filtered_environment(allowed_vars: set[str] | None = None) -> dict[str, str]:
197
+ """
198
+ Return a filtered copy of environment variables.
199
+ Blocks all known credential variables.
200
+ Optionally restricts to only explicitly allowed variables.
201
+ """
202
+ env = {}
203
+ for key, value in os.environ.items():
204
+ if key in BLOCKED_ENV_VARS:
205
+ continue
206
+ if allowed_vars is not None and key not in allowed_vars:
207
+ continue
208
+ env[key] = value
209
+ return env
210
+
211
+ def inject_data_credentials(env: dict[str, str],
212
+ data_config: dict[str, Any]) -> dict[str, str]:
213
+ """
214
+ Inject data access credentials into experiment environment.
215
+ These are read-only credentials with minimal scope.
216
+ """
217
+ result = env.copy()
218
+ if "data_source_path" in data_config:
219
+ result["DATA_SOURCE_PATH"] = data_config["data_source_path"]
220
+ if "api_key_env" in data_config:
221
+ key = os.environ.get(data_config["api_key_env"], "")
222
+ if key:
223
+ result["DATA_API_KEY"] = key
224
+ return result
225
+ ```
226
+
227
+ ### Code Injection Prevention
228
+
229
+ For code-driven experiments where the agent modifies source files, validate the generated code before running it:
230
+
231
+ ```python
232
+ # src/security/code_validator.py
233
+ import ast
234
+ import re
235
+ from pathlib import Path
236
+
237
+ DANGEROUS_PATTERNS = [
238
+ r"os\.system\(",
239
+ r"shutil\.rmtree\(",
240
+ r"__import__\(",
241
+ r"compile\(",
242
+ r"importlib\.import_module",
243
+ ]
244
+
245
+ def validate_experiment_code(file_path: str) -> list[str]:
246
+ """
247
+ Validate that experiment code does not contain dangerous patterns.
248
+ Returns list of violations (empty means safe).
249
+ """
250
+ violations = []
251
+ content = Path(file_path).read_text()
252
+
253
+ # Regex-based pattern detection
254
+ for pattern in DANGEROUS_PATTERNS:
255
+ matches = re.findall(pattern, content)
256
+ if matches:
257
+ violations.append(
258
+ f"Dangerous pattern detected: {pattern} ({len(matches)} occurrences)"
259
+ )
260
+
261
+ # AST-based analysis for import validation
262
+ try:
263
+ tree = ast.parse(content)
264
+ for node in ast.walk(tree):
265
+ if isinstance(node, ast.Import):
266
+ for alias in node.names:
267
+ if alias.name in ("shutil", "ctypes"):
268
+ violations.append(
269
+ f"Forbidden import: {alias.name} (line {node.lineno})"
270
+ )
271
+ elif isinstance(node, ast.ImportFrom):
272
+ if node.module in ("shutil", "ctypes"):
273
+ violations.append(
274
+ f"Forbidden import from: {node.module} (line {node.lineno})"
275
+ )
276
+ except SyntaxError as e:
277
+ violations.append(f"Syntax error in generated code: {e}")
278
+
279
+ return violations
280
+ ```
281
+
282
+ ### Filesystem Access Control
283
+
284
+ Restrict which paths the experiment can read and write:
285
+
286
+ ```python
287
+ # src/security/filesystem.py
288
+ from pathlib import Path
289
+
290
+ class FilesystemPolicy:
291
+ """Define read/write permissions for experiment runs."""
292
+
293
+ def __init__(self, project_root: str):
294
+ self.root = Path(project_root).resolve()
295
+ self.readable = {
296
+ self.root / "src",
297
+ self.root / "configs",
298
+ self.root / "data" / "raw",
299
+ self.root / "data" / "processed",
300
+ self.root / "tests" / "fixtures",
301
+ }
302
+ self.writable = {
303
+ self.root / "results",
304
+ self.root / "src" / "strategies", # Code-driven: agent writes here
305
+ Path("/tmp"),
306
+ }
307
+
308
+ def can_read(self, path: str) -> bool:
309
+ resolved = Path(path).resolve()
310
+ return any(
311
+ resolved == allowed or resolved.is_relative_to(allowed)
312
+ for allowed in self.readable | self.writable
313
+ )
314
+
315
+ def can_write(self, path: str) -> bool:
316
+ resolved = Path(path).resolve()
317
+ return any(
318
+ resolved == allowed or resolved.is_relative_to(allowed)
319
+ for allowed in self.writable
320
+ )
321
+ ```
322
+
323
+ ### Audit Logging
324
+
325
+ Log all agent actions for post-hoc review and anomaly detection:
326
+
327
+ ```python
328
+ # src/security/audit.py
329
+ import json
330
+ import logging
331
+ from datetime import datetime
332
+ from pathlib import Path
333
+
334
+ class AuditLogger:
335
+ """Structured audit log for agent actions."""
336
+
337
+ def __init__(self, log_path: str):
338
+ self.log_path = Path(log_path)
339
+ self.log_path.parent.mkdir(parents=True, exist_ok=True)
340
+ self.logger = logging.getLogger("audit")
341
+
342
+ def log_action(self, action: str, details: dict) -> None:
343
+ record = {
344
+ "timestamp": datetime.now().isoformat(),
345
+ "action": action,
346
+ **details,
347
+ }
348
+ with open(self.log_path, "a") as f:
349
+ f.write(json.dumps(record) + "\n")
350
+ self.logger.info("AUDIT: %s -- %s", action, json.dumps(details))
351
+
352
+ def log_file_write(self, path: str, size: int) -> None:
353
+ self.log_action("file_write", {"path": path, "size_bytes": size})
354
+
355
+ def log_process_run(self, command: list[str], exit_code: int) -> None:
356
+ self.log_action("process_run", {
357
+ "command": command[:5], # Truncate for safety
358
+ "exit_code": exit_code,
359
+ })
360
+
361
+ def log_credential_access(self, credential_name: str) -> None:
362
+ self.log_action("credential_access", {"credential": credential_name})
363
+ ```
364
+
365
+ ### Security Checklist for Research Projects
366
+
367
+ 1. All experiment runs use a sandbox (process limits or container).
368
+ 2. Network access is disabled by default (enable only for API-driven experiments with explicit allowlist).
369
+ 3. Credentials are injected via environment variables, never in config files or source code.
370
+ 4. Agent-generated code is validated before running.
371
+ 5. Filesystem writes are restricted to designated directories.
372
+ 6. Resource limits are enforced at the OS level, not in application code.
373
+ 7. All agent actions are audit-logged.
374
+ 8. Results directory is treated as untrusted output (sanitize before display).