gdmcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gdmcode-0.1.0.dist-info/METADATA +240 -0
- gdmcode-0.1.0.dist-info/RECORD +131 -0
- gdmcode-0.1.0.dist-info/WHEEL +4 -0
- gdmcode-0.1.0.dist-info/entry_points.txt +2 -0
- src/__init__.py +1 -0
- src/_internal/__init__.py +0 -0
- src/_internal/constants.py +244 -0
- src/_internal/domain_skills.py +339 -0
- src/agent/__init__.py +0 -0
- src/agent/commit_classifier.py +91 -0
- src/agent/context_budget.py +391 -0
- src/agent/daemon.py +681 -0
- src/agent/dag_validator.py +153 -0
- src/agent/debug_loop.py +473 -0
- src/agent/impact_analyzer.py +149 -0
- src/agent/impact_graph.py +117 -0
- src/agent/loop.py +1410 -0
- src/agent/orchestrator.py +141 -0
- src/agent/regression_guard.py +251 -0
- src/agent/review_gate.py +648 -0
- src/agent/risk_scorer.py +169 -0
- src/agent/self_healing.py +145 -0
- src/agent/smart_test_selector.py +89 -0
- src/agent/system_prompt.py +226 -0
- src/agent/task_tracker.py +320 -0
- src/agent/test_validator.py +210 -0
- src/agent/tool_orchestrator.py +402 -0
- src/agent/transcript.py +230 -0
- src/agent/verification_loop.py +133 -0
- src/agent/work_director.py +136 -0
- src/agent/worktree_manager.py +53 -0
- src/artifacts/__init__.py +16 -0
- src/artifacts/artifact_store.py +456 -0
- src/artifacts/verification_graph.py +75 -0
- src/auth.py +411 -0
- src/cli.py +1290 -0
- src/commands.py +1398 -0
- src/config.py +762 -0
- src/cost_tracker.py +348 -0
- src/db/__init__.py +4 -0
- src/db/migrations.py +337 -0
- src/enterprise/__init__.py +3 -0
- src/enterprise/audit_log.py +182 -0
- src/enterprise/identity.py +90 -0
- src/enterprise/rbac.py +100 -0
- src/enterprise/team_config.py +125 -0
- src/enterprise/usage_analytics.py +261 -0
- src/exceptions.py +207 -0
- src/git_workflow.py +651 -0
- src/integrations/__init__.py +6 -0
- src/integrations/github_actions.py +106 -0
- src/integrations/mcp_server.py +333 -0
- src/integrations/sentry_integration.py +100 -0
- src/integrations/sentry_server.py +82 -0
- src/integrations/webhook_security.py +19 -0
- src/main.py +27 -0
- src/memory/__init__.py +0 -0
- src/memory/code_index.py +376 -0
- src/memory/compressor.py +378 -0
- src/memory/context_memory.py +135 -0
- src/memory/continuous_memory.py +234 -0
- src/memory/conventions.py +495 -0
- src/memory/db.py +1119 -0
- src/memory/document_index.py +205 -0
- src/memory/file_cache.py +128 -0
- src/memory/project_scanner.py +178 -0
- src/memory/session_store.py +201 -0
- src/models/__init__.py +0 -0
- src/models/client.py +715 -0
- src/models/definitions.py +459 -0
- src/models/router.py +418 -0
- src/models/schemas.py +389 -0
- src/permissions.py +294 -0
- src/remote/__init__.py +5 -0
- src/remote/command_filter.py +33 -0
- src/remote/models.py +31 -0
- src/remote/permission_handler.py +79 -0
- src/remote/phone_ui.py +48 -0
- src/remote/protocol.py +59 -0
- src/remote/qr.py +65 -0
- src/remote/server.py +586 -0
- src/remote/token_manager.py +61 -0
- src/remote/tunnel.py +212 -0
- src/repl.py +475 -0
- src/runtime/__init__.py +1 -0
- src/runtime/branch_farm.py +372 -0
- src/runtime/replay.py +351 -0
- src/sandbox/__init__.py +2 -0
- src/sandbox/hermetic.py +214 -0
- src/sandbox/policy.py +44 -0
- src/sdk/__init__.py +3 -0
- src/sdk/plugin_base.py +39 -0
- src/sdk/plugin_host.py +100 -0
- src/sdk/plugin_loader.py +101 -0
- src/security.py +409 -0
- src/server/__init__.py +7 -0
- src/server/bridge.py +427 -0
- src/server/bridge_cli.py +103 -0
- src/server/bridge_client.py +170 -0
- src/server/protocol_version.py +103 -0
- src/session/__init__.py +10 -0
- src/session/event_fanout.py +46 -0
- src/session/input_broker.py +38 -0
- src/session/permission_bridge.py +100 -0
- src/tools/__init__.py +160 -0
- src/tools/_atomic.py +72 -0
- src/tools/agent_tools.py +423 -0
- src/tools/ask_user_tool.py +83 -0
- src/tools/bash_tool.py +384 -0
- src/tools/browser_tool.py +352 -0
- src/tools/browser_tools.py +179 -0
- src/tools/dep_tools.py +210 -0
- src/tools/document_reader.py +167 -0
- src/tools/document_tool.py +240 -0
- src/tools/document_writer.py +171 -0
- src/tools/impact_tools.py +240 -0
- src/tools/playwright_tool.py +172 -0
- src/tools/quality_tools.py +366 -0
- src/tools/read_tools.py +318 -0
- src/tools/result_cache.py +157 -0
- src/tools/search_tools.py +310 -0
- src/tools/shell_tools.py +311 -0
- src/tools/write_tools.py +337 -0
- src/voice/__init__.py +25 -0
- src/voice/audio_capture.py +92 -0
- src/voice/audio_playback.py +68 -0
- src/voice/errors.py +14 -0
- src/voice/models.py +35 -0
- src/voice/providers.py +143 -0
- src/voice/vad.py +55 -0
- src/voice/voice_loop.py +156 -0
src/runtime/replay.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""Session Replay Engine — runtime-003.
|
|
2
|
+
|
|
3
|
+
Loads any past agent session from the event log and provides:
|
|
4
|
+
- Linear step-forward/back navigation through turns
|
|
5
|
+
- Fork from an arbitrary turn (new session inheriting context up to that point)
|
|
6
|
+
- Side-by-side session comparison (common prefix + divergence summary)
|
|
7
|
+
- Turn annotation (tag turns as good/bad/interesting for eval curation)
|
|
8
|
+
- Export to OpenAI fine-tune JSONL or Anthropic prompt/completion format
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import dataclasses
|
|
13
|
+
import json
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
import uuid
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from src.memory.db import GdmDatabase
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
# Exceptions
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
class ReplayError(Exception):
|
|
28
|
+
"""Raised when the replay engine cannot fulfil a request."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
# Data structures
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ReplayFrame:
|
|
37
|
+
"""All data associated with one agent turn."""
|
|
38
|
+
|
|
39
|
+
event_id: str
|
|
40
|
+
turn_index: int
|
|
41
|
+
model: str
|
|
42
|
+
provider: str
|
|
43
|
+
tier: str
|
|
44
|
+
user_message: str | None
|
|
45
|
+
assistant_text: str | None
|
|
46
|
+
tool_calls: list[dict] # from tool_call_log; include parsed args/result
|
|
47
|
+
patches: list[dict] # from patch_log
|
|
48
|
+
cost_usd: float
|
|
49
|
+
input_tokens: int
|
|
50
|
+
output_tokens: int
|
|
51
|
+
cached_tokens: int
|
|
52
|
+
ts: str
|
|
53
|
+
annotation: str | None = None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class SessionDiff:
|
|
58
|
+
"""Side-by-side comparison of two sessions."""
|
|
59
|
+
|
|
60
|
+
session_a: str
|
|
61
|
+
session_b: str
|
|
62
|
+
frames_a: list[ReplayFrame] = field(default_factory=list)
|
|
63
|
+
frames_b: list[ReplayFrame] = field(default_factory=list)
|
|
64
|
+
|
|
65
|
+
def common_prefix_length(self) -> int:
|
|
66
|
+
"""Number of leading turns that share the same user_message."""
|
|
67
|
+
n = min(len(self.frames_a), len(self.frames_b))
|
|
68
|
+
for i in range(n):
|
|
69
|
+
if self.frames_a[i].user_message != self.frames_b[i].user_message:
|
|
70
|
+
return i
|
|
71
|
+
return n
|
|
72
|
+
|
|
73
|
+
def divergence_summary(self) -> dict:
|
|
74
|
+
prefix = self.common_prefix_length()
|
|
75
|
+
return {
|
|
76
|
+
"common_turns": prefix,
|
|
77
|
+
"session_a_unique_turns": len(self.frames_a) - prefix,
|
|
78
|
+
"session_b_unique_turns": len(self.frames_b) - prefix,
|
|
79
|
+
"cost_a_usd": sum(f.cost_usd for f in self.frames_a),
|
|
80
|
+
"cost_b_usd": sum(f.cost_usd for f in self.frames_b),
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
# Main class
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
class SessionReplay:
|
|
89
|
+
"""Interactive replay engine for a single past session."""
|
|
90
|
+
|
|
91
|
+
def __init__(self, db: "GdmDatabase", session_id: str) -> None:
|
|
92
|
+
self._db = db
|
|
93
|
+
self._session_id = session_id
|
|
94
|
+
self._frames: list[ReplayFrame] = []
|
|
95
|
+
self._cursor: int = -1 # -1 = not yet loaded
|
|
96
|
+
|
|
97
|
+
# ------------------------------------------------------------------
|
|
98
|
+
# Loading
|
|
99
|
+
# ------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
def load_session(self) -> None:
|
|
102
|
+
"""Load all turns for session_id from the event log.
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
ReplayError: if no event log rows exist for the session.
|
|
106
|
+
"""
|
|
107
|
+
raw = self._db.event_log_load_session(self._session_id)
|
|
108
|
+
if not raw:
|
|
109
|
+
raise ReplayError(
|
|
110
|
+
f"No event log found for session {self._session_id!r}. "
|
|
111
|
+
"Sessions recorded before the event log schema was deployed "
|
|
112
|
+
"cannot be replayed."
|
|
113
|
+
)
|
|
114
|
+
self._frames = [self._to_frame(r) for r in raw]
|
|
115
|
+
self._cursor = 0
|
|
116
|
+
|
|
117
|
+
# ------------------------------------------------------------------
|
|
118
|
+
# Navigation
|
|
119
|
+
# ------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
def step_forward(self) -> ReplayFrame | None:
|
|
122
|
+
"""Advance one turn; returns new frame or None if already at end."""
|
|
123
|
+
if self._cursor >= len(self._frames) - 1:
|
|
124
|
+
return None
|
|
125
|
+
self._cursor += 1
|
|
126
|
+
return self._frames[self._cursor]
|
|
127
|
+
|
|
128
|
+
def step_back(self) -> ReplayFrame | None:
|
|
129
|
+
"""Go back one turn; returns new frame or None if already at start."""
|
|
130
|
+
if self._cursor <= 0:
|
|
131
|
+
return None
|
|
132
|
+
self._cursor -= 1
|
|
133
|
+
return self._frames[self._cursor]
|
|
134
|
+
|
|
135
|
+
def show_turn(self, n: int) -> ReplayFrame:
|
|
136
|
+
"""Jump to turn n and return its frame.
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
IndexError: for out-of-range n.
|
|
140
|
+
"""
|
|
141
|
+
if not self._frames:
|
|
142
|
+
raise IndexError("Session not loaded; call load_session() first.")
|
|
143
|
+
if not 0 <= n < len(self._frames):
|
|
144
|
+
raise IndexError(
|
|
145
|
+
f"Turn {n} out of range [0, {len(self._frames) - 1}]"
|
|
146
|
+
)
|
|
147
|
+
self._cursor = n
|
|
148
|
+
return self._frames[n]
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def current_frame(self) -> ReplayFrame | None:
|
|
152
|
+
if self._cursor < 0 or not self._frames:
|
|
153
|
+
return None
|
|
154
|
+
return self._frames[self._cursor]
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def turn_count(self) -> int:
|
|
158
|
+
return len(self._frames)
|
|
159
|
+
|
|
160
|
+
# ------------------------------------------------------------------
|
|
161
|
+
# Fork
|
|
162
|
+
# ------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
def fork_from(self, turn_index: int, new_session_id: str) -> str:
|
|
165
|
+
"""Create a new session inheriting context up to turn N (inclusive).
|
|
166
|
+
|
|
167
|
+
Looks up the project_id from the source session, inserts a new row
|
|
168
|
+
in ``sessions``, and writes memory turns for turns 0..turn_index.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
new_session_id (same value passed in, for chaining).
|
|
172
|
+
"""
|
|
173
|
+
if not self._frames:
|
|
174
|
+
raise ReplayError("Session not loaded; call load_session() first.")
|
|
175
|
+
if not 0 <= turn_index < len(self._frames):
|
|
176
|
+
raise IndexError(
|
|
177
|
+
f"turn_index {turn_index} out of range [0, {len(self._frames) - 1}]"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Discover project_id from source session
|
|
181
|
+
row = self._db.execute_one(
|
|
182
|
+
"SELECT project_id FROM sessions WHERE session_id=?",
|
|
183
|
+
(self._session_id,),
|
|
184
|
+
)
|
|
185
|
+
project_id = row["project_id"] if row else "default"
|
|
186
|
+
|
|
187
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
188
|
+
self._db.execute(
|
|
189
|
+
"INSERT INTO sessions (session_id, project_id, created_at, updated_at) "
|
|
190
|
+
"VALUES (?, ?, ?, ?)",
|
|
191
|
+
(new_session_id, project_id, now, now),
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
frames = self._frames[: turn_index + 1]
|
|
195
|
+
turns = []
|
|
196
|
+
for f in frames:
|
|
197
|
+
if f.user_message:
|
|
198
|
+
turns.append(
|
|
199
|
+
{
|
|
200
|
+
"role": "user",
|
|
201
|
+
"content": f.user_message,
|
|
202
|
+
"tokens": f.input_tokens,
|
|
203
|
+
"tool_name": None,
|
|
204
|
+
"tool_call_id": None,
|
|
205
|
+
"tool_calls": None,
|
|
206
|
+
}
|
|
207
|
+
)
|
|
208
|
+
if f.assistant_text:
|
|
209
|
+
turns.append(
|
|
210
|
+
{
|
|
211
|
+
"role": "assistant",
|
|
212
|
+
"content": f.assistant_text,
|
|
213
|
+
"tokens": f.output_tokens,
|
|
214
|
+
"tool_name": None,
|
|
215
|
+
"tool_call_id": None,
|
|
216
|
+
"tool_calls": None,
|
|
217
|
+
}
|
|
218
|
+
)
|
|
219
|
+
if turns:
|
|
220
|
+
self._db.memory_save_turns(new_session_id, turns)
|
|
221
|
+
|
|
222
|
+
return new_session_id
|
|
223
|
+
|
|
224
|
+
# ------------------------------------------------------------------
|
|
225
|
+
# Annotation
|
|
226
|
+
# ------------------------------------------------------------------
|
|
227
|
+
|
|
228
|
+
def annotate_turn(self, turn_index: int, note: str) -> None:
|
|
229
|
+
"""Persist a text annotation on a turn.
|
|
230
|
+
|
|
231
|
+
Stored in the ``annotation`` column of ``session_events`` and kept in
|
|
232
|
+
sync with the in-memory frame.
|
|
233
|
+
"""
|
|
234
|
+
frame = self.show_turn(turn_index)
|
|
235
|
+
self._db.execute(
|
|
236
|
+
"UPDATE session_events SET annotation=? WHERE event_id=?",
|
|
237
|
+
(note, frame.event_id),
|
|
238
|
+
)
|
|
239
|
+
self._frames[turn_index].annotation = note
|
|
240
|
+
|
|
241
|
+
# ------------------------------------------------------------------
|
|
242
|
+
# Compare
|
|
243
|
+
# ------------------------------------------------------------------
|
|
244
|
+
|
|
245
|
+
def compare(self, other_session_id: str) -> SessionDiff:
|
|
246
|
+
"""Return a side-by-side diff against another session."""
|
|
247
|
+
other_raw = self._db.event_log_load_session(other_session_id)
|
|
248
|
+
other_frames = [self._to_frame(r) for r in other_raw]
|
|
249
|
+
return SessionDiff(
|
|
250
|
+
session_a=self._session_id,
|
|
251
|
+
session_b=other_session_id,
|
|
252
|
+
frames_a=list(self._frames),
|
|
253
|
+
frames_b=other_frames,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# ------------------------------------------------------------------
|
|
257
|
+
# Export
|
|
258
|
+
# ------------------------------------------------------------------
|
|
259
|
+
|
|
260
|
+
def export(self, fmt: str = "openai") -> list[dict]:
|
|
261
|
+
"""Export session as a list of training examples.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
fmt: ``"openai"`` (messages array), ``"anthropic"``
|
|
265
|
+
(prompt/completion), or ``"raw"`` (full frame dicts).
|
|
266
|
+
|
|
267
|
+
Raises:
|
|
268
|
+
ValueError: for unknown format strings.
|
|
269
|
+
"""
|
|
270
|
+
if fmt == "openai":
|
|
271
|
+
return self._export_openai()
|
|
272
|
+
if fmt == "anthropic":
|
|
273
|
+
return self._export_anthropic()
|
|
274
|
+
if fmt == "raw":
|
|
275
|
+
return [dataclasses.asdict(f) for f in self._frames]
|
|
276
|
+
raise ValueError(
|
|
277
|
+
f"Unknown export format {fmt!r}. Use 'openai', 'anthropic', or 'raw'."
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# ------------------------------------------------------------------
|
|
281
|
+
# Private helpers
|
|
282
|
+
# ------------------------------------------------------------------
|
|
283
|
+
|
|
284
|
+
@staticmethod
|
|
285
|
+
def _to_frame(raw: dict) -> ReplayFrame:
|
|
286
|
+
return ReplayFrame(
|
|
287
|
+
event_id=raw["event_id"],
|
|
288
|
+
turn_index=raw["turn_index"],
|
|
289
|
+
model=raw["model"],
|
|
290
|
+
provider=raw["provider"],
|
|
291
|
+
tier=raw["tier"],
|
|
292
|
+
user_message=raw.get("user_message"),
|
|
293
|
+
assistant_text=raw.get("assistant_text"),
|
|
294
|
+
tool_calls=raw.get("tool_calls") or [],
|
|
295
|
+
patches=raw.get("patches") or [],
|
|
296
|
+
cost_usd=raw.get("cost_usd") or 0.0,
|
|
297
|
+
input_tokens=raw.get("input_tokens") or 0,
|
|
298
|
+
output_tokens=raw.get("output_tokens") or 0,
|
|
299
|
+
cached_tokens=raw.get("cached_tokens") or 0,
|
|
300
|
+
ts=raw.get("ts") or "",
|
|
301
|
+
annotation=raw.get("annotation"),
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
def _export_openai(self) -> list[dict]:
|
|
305
|
+
"""OpenAI fine-tune format: one dict per turn with a 'messages' list."""
|
|
306
|
+
records = []
|
|
307
|
+
for frame in self._frames:
|
|
308
|
+
messages = []
|
|
309
|
+
if frame.user_message:
|
|
310
|
+
messages.append({"role": "user", "content": frame.user_message})
|
|
311
|
+
for tc in frame.tool_calls:
|
|
312
|
+
messages.append(
|
|
313
|
+
{
|
|
314
|
+
"role": "assistant",
|
|
315
|
+
"content": None,
|
|
316
|
+
"tool_calls": [
|
|
317
|
+
{
|
|
318
|
+
"id": tc.get("call_id", ""),
|
|
319
|
+
"type": "function",
|
|
320
|
+
"function": {
|
|
321
|
+
"name": tc.get("tool_name", ""),
|
|
322
|
+
"arguments": json.dumps(tc.get("args", {})),
|
|
323
|
+
},
|
|
324
|
+
}
|
|
325
|
+
],
|
|
326
|
+
}
|
|
327
|
+
)
|
|
328
|
+
if tc.get("result") is not None:
|
|
329
|
+
messages.append(
|
|
330
|
+
{
|
|
331
|
+
"role": "tool",
|
|
332
|
+
"tool_call_id": tc.get("call_id", ""),
|
|
333
|
+
"content": json.dumps(tc["result"]),
|
|
334
|
+
}
|
|
335
|
+
)
|
|
336
|
+
if frame.assistant_text:
|
|
337
|
+
messages.append(
|
|
338
|
+
{"role": "assistant", "content": frame.assistant_text}
|
|
339
|
+
)
|
|
340
|
+
if messages:
|
|
341
|
+
records.append({"messages": messages})
|
|
342
|
+
return records
|
|
343
|
+
|
|
344
|
+
def _export_anthropic(self) -> list[dict]:
|
|
345
|
+
"""Anthropic prompt/completion format: one dict per turn."""
|
|
346
|
+
records = []
|
|
347
|
+
for frame in self._frames:
|
|
348
|
+
prompt = frame.user_message or ""
|
|
349
|
+
completion = frame.assistant_text or ""
|
|
350
|
+
records.append({"prompt": prompt, "completion": completion})
|
|
351
|
+
return records
|
src/sandbox/__init__.py
ADDED
src/sandbox/hermetic.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hermetic sandbox for executing untrusted tool outputs and code snippets.
|
|
3
|
+
|
|
4
|
+
Backends (in order of preference):
|
|
5
|
+
1. docker — full container isolation (preferred)
|
|
6
|
+
2. firejail — Linux namespace sandbox (fallback)
|
|
7
|
+
3. subprocess — plain subprocess with resource limits (last resort, warns)
|
|
8
|
+
|
|
9
|
+
The sandbox intercepts shell_exec and run_code tool calls when enabled.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
import platform
|
|
15
|
+
import shutil
|
|
16
|
+
import subprocess
|
|
17
|
+
import tempfile
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from enum import Enum
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Optional
|
|
22
|
+
|
|
23
|
+
log = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SandboxBackend(str, Enum):
|
|
27
|
+
DOCKER = "docker"
|
|
28
|
+
FIREJAIL = "firejail"
|
|
29
|
+
SUBPROCESS = "subprocess" # last resort
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class SandboxConfig:
|
|
34
|
+
backend: SandboxBackend = SandboxBackend.DOCKER
|
|
35
|
+
timeout_seconds: int = 30
|
|
36
|
+
max_memory_mb: int = 512
|
|
37
|
+
max_cpu_seconds: int = 10
|
|
38
|
+
allow_network: bool = False
|
|
39
|
+
allow_write_paths: list[str] = field(default_factory=list)
|
|
40
|
+
docker_image: str = "python:3.12-slim"
|
|
41
|
+
read_only_root: bool = True
|
|
42
|
+
# Security flags
|
|
43
|
+
no_new_privs: bool = True
|
|
44
|
+
drop_caps: bool = True
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class SandboxResult:
|
|
49
|
+
stdout: str
|
|
50
|
+
stderr: str
|
|
51
|
+
exit_code: int
|
|
52
|
+
timed_out: bool = False
|
|
53
|
+
oom_killed: bool = False
|
|
54
|
+
backend_used: SandboxBackend = SandboxBackend.SUBPROCESS
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def success(self) -> bool:
|
|
58
|
+
return self.exit_code == 0 and not self.timed_out and not self.oom_killed
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class SandboxUnavailableError(Exception):
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class HermeticSandbox:
|
|
66
|
+
def __init__(self, config: SandboxConfig = None):
|
|
67
|
+
self._config = config or SandboxConfig()
|
|
68
|
+
self._backend = self._detect_backend()
|
|
69
|
+
|
|
70
|
+
def _detect_backend(self) -> SandboxBackend:
|
|
71
|
+
requested = self._config.backend
|
|
72
|
+
if requested == SandboxBackend.DOCKER and shutil.which("docker"):
|
|
73
|
+
# Verify docker daemon is accessible
|
|
74
|
+
try:
|
|
75
|
+
r = subprocess.run(["docker", "info"], capture_output=True, timeout=5)
|
|
76
|
+
if r.returncode == 0:
|
|
77
|
+
return SandboxBackend.DOCKER
|
|
78
|
+
except Exception:
|
|
79
|
+
pass
|
|
80
|
+
log.warning("Docker requested but daemon not available; falling back.")
|
|
81
|
+
if requested in (SandboxBackend.DOCKER, SandboxBackend.FIREJAIL):
|
|
82
|
+
if shutil.which("firejail") and platform.system() == "Linux":
|
|
83
|
+
return SandboxBackend.FIREJAIL
|
|
84
|
+
log.warning(
|
|
85
|
+
"No isolation backend available. Using plain subprocess — NOT production-safe."
|
|
86
|
+
)
|
|
87
|
+
return SandboxBackend.SUBPROCESS
|
|
88
|
+
|
|
89
|
+
def run(self, command: str, stdin_data: str = "",
|
|
90
|
+
working_dir: Optional[Path] = None) -> SandboxResult:
|
|
91
|
+
backend = self._backend
|
|
92
|
+
if backend == SandboxBackend.DOCKER:
|
|
93
|
+
return self._run_docker(command, stdin_data, working_dir)
|
|
94
|
+
if backend == SandboxBackend.FIREJAIL:
|
|
95
|
+
return self._run_firejail(command, stdin_data, working_dir)
|
|
96
|
+
return self._run_subprocess(command, stdin_data, working_dir)
|
|
97
|
+
|
|
98
|
+
def run_python(self, code: str) -> SandboxResult:
|
|
99
|
+
with tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False) as f:
|
|
100
|
+
f.write(code)
|
|
101
|
+
script = f.name
|
|
102
|
+
try:
|
|
103
|
+
return self.run(f"python {script}")
|
|
104
|
+
finally:
|
|
105
|
+
try:
|
|
106
|
+
os.unlink(script)
|
|
107
|
+
except Exception:
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
def _run_docker(self, command: str, stdin_data: str,
|
|
111
|
+
working_dir: Optional[Path]) -> SandboxResult:
|
|
112
|
+
cfg = self._config
|
|
113
|
+
docker_args = [
|
|
114
|
+
"docker", "run", "--rm",
|
|
115
|
+
"--network", "none" if not cfg.allow_network else "bridge",
|
|
116
|
+
"--memory", f"{cfg.max_memory_mb}m",
|
|
117
|
+
"--cpus", "1",
|
|
118
|
+
"--pids-limit", "64",
|
|
119
|
+
"--ulimit", f"cpu={cfg.max_cpu_seconds}",
|
|
120
|
+
"--read-only" if cfg.read_only_root else None,
|
|
121
|
+
"--security-opt", "no-new-privileges" if cfg.no_new_privs else None,
|
|
122
|
+
]
|
|
123
|
+
if working_dir:
|
|
124
|
+
docker_args += ["-v", f"{working_dir}:/workspace:ro", "-w", "/workspace"]
|
|
125
|
+
docker_args = [a for a in docker_args if a is not None]
|
|
126
|
+
docker_args += [cfg.docker_image, "sh", "-c", command]
|
|
127
|
+
return self._exec(docker_args, stdin_data, cfg.timeout_seconds,
|
|
128
|
+
SandboxBackend.DOCKER)
|
|
129
|
+
|
|
130
|
+
def _run_firejail(self, command: str, stdin_data: str,
|
|
131
|
+
working_dir: Optional[Path]) -> SandboxResult:
|
|
132
|
+
cfg = self._config
|
|
133
|
+
fj_args = [
|
|
134
|
+
"firejail", "--quiet", "--private",
|
|
135
|
+
"--noprofile",
|
|
136
|
+
"--no3d", "--nogroups",
|
|
137
|
+
]
|
|
138
|
+
if not cfg.allow_network:
|
|
139
|
+
fj_args.append("--net=none")
|
|
140
|
+
fj_args += ["sh", "-c", command]
|
|
141
|
+
return self._exec(fj_args, stdin_data, cfg.timeout_seconds,
|
|
142
|
+
SandboxBackend.FIREJAIL)
|
|
143
|
+
|
|
144
|
+
def _run_subprocess(self, command: str, stdin_data: str,
|
|
145
|
+
working_dir: Optional[Path]) -> SandboxResult:
|
|
146
|
+
"""Plain subprocess with basic resource limits (POSIX only)."""
|
|
147
|
+
cfg = self._config
|
|
148
|
+
|
|
149
|
+
def _preexec():
|
|
150
|
+
if platform.system() != "Windows":
|
|
151
|
+
try:
|
|
152
|
+
import resource
|
|
153
|
+
resource.setrlimit(resource.RLIMIT_CPU,
|
|
154
|
+
(cfg.max_cpu_seconds, cfg.max_cpu_seconds))
|
|
155
|
+
mem_bytes = cfg.max_memory_mb * 1024 * 1024
|
|
156
|
+
resource.setrlimit(resource.RLIMIT_AS, (mem_bytes, mem_bytes))
|
|
157
|
+
except Exception:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
proc = subprocess.Popen(
|
|
162
|
+
["sh", "-c", command] if platform.system() != "Windows"
|
|
163
|
+
else ["cmd", "/c", command],
|
|
164
|
+
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
165
|
+
cwd=str(working_dir) if working_dir else None,
|
|
166
|
+
preexec_fn=_preexec if platform.system() != "Windows" else None,
|
|
167
|
+
)
|
|
168
|
+
try:
|
|
169
|
+
stdout, stderr = proc.communicate(
|
|
170
|
+
input=stdin_data.encode() if stdin_data else None,
|
|
171
|
+
timeout=cfg.timeout_seconds
|
|
172
|
+
)
|
|
173
|
+
return SandboxResult(
|
|
174
|
+
stdout=stdout.decode(errors="replace"),
|
|
175
|
+
stderr=stderr.decode(errors="replace"),
|
|
176
|
+
exit_code=proc.returncode,
|
|
177
|
+
backend_used=SandboxBackend.SUBPROCESS,
|
|
178
|
+
)
|
|
179
|
+
except subprocess.TimeoutExpired:
|
|
180
|
+
proc.kill()
|
|
181
|
+
proc.communicate()
|
|
182
|
+
return SandboxResult("", "TIMEOUT", -1, timed_out=True,
|
|
183
|
+
backend_used=SandboxBackend.SUBPROCESS)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
return SandboxResult("", str(e), -1,
|
|
186
|
+
backend_used=SandboxBackend.SUBPROCESS)
|
|
187
|
+
|
|
188
|
+
def _exec(self, args: list[str], stdin_data: str, timeout: int,
|
|
189
|
+
backend: SandboxBackend) -> SandboxResult:
|
|
190
|
+
try:
|
|
191
|
+
proc = subprocess.Popen(
|
|
192
|
+
args, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
|
193
|
+
stderr=subprocess.PIPE,
|
|
194
|
+
)
|
|
195
|
+
try:
|
|
196
|
+
stdout, stderr = proc.communicate(
|
|
197
|
+
input=stdin_data.encode() if stdin_data else None,
|
|
198
|
+
timeout=timeout
|
|
199
|
+
)
|
|
200
|
+
return SandboxResult(
|
|
201
|
+
stdout=stdout.decode(errors="replace"),
|
|
202
|
+
stderr=stderr.decode(errors="replace"),
|
|
203
|
+
exit_code=proc.returncode,
|
|
204
|
+
backend_used=backend,
|
|
205
|
+
)
|
|
206
|
+
except subprocess.TimeoutExpired:
|
|
207
|
+
proc.kill()
|
|
208
|
+
proc.communicate()
|
|
209
|
+
return SandboxResult("", "TIMEOUT", -1, timed_out=True,
|
|
210
|
+
backend_used=backend)
|
|
211
|
+
except FileNotFoundError as e:
|
|
212
|
+
raise SandboxUnavailableError(f"Backend binary not found: {e}")
|
|
213
|
+
except Exception as e:
|
|
214
|
+
return SandboxResult("", str(e), -1, backend_used=backend)
|
src/sandbox/policy.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Sandbox execution policy: which tool calls should be sandboxed."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from src.sandbox.hermetic import HermeticSandbox, SandboxConfig, SandboxBackend
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class SandboxPolicy:
|
|
10
|
+
enabled: bool = False
|
|
11
|
+
sandbox_tool_calls: list[str] = None # None = all tool calls
|
|
12
|
+
bypass_for_autonomy_level: int = 5 # L5 admin bypass
|
|
13
|
+
config: SandboxConfig = None
|
|
14
|
+
|
|
15
|
+
def __post_init__(self):
|
|
16
|
+
if self.sandbox_tool_calls is None:
|
|
17
|
+
self.sandbox_tool_calls = ["shell_exec", "run_code", "python_repl"]
|
|
18
|
+
if self.config is None:
|
|
19
|
+
self.config = SandboxConfig()
|
|
20
|
+
|
|
21
|
+
def should_sandbox(self, tool_name: str, autonomy_level: int = 0) -> bool:
|
|
22
|
+
if not self.enabled:
|
|
23
|
+
return False
|
|
24
|
+
if autonomy_level >= self.bypass_for_autonomy_level:
|
|
25
|
+
return False
|
|
26
|
+
return tool_name in self.sandbox_tool_calls
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_global_policy: Optional[SandboxPolicy] = None
|
|
30
|
+
_global_sandbox: Optional[HermeticSandbox] = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def set_sandbox_policy(policy: SandboxPolicy) -> None:
|
|
34
|
+
global _global_policy, _global_sandbox
|
|
35
|
+
_global_policy = policy
|
|
36
|
+
_global_sandbox = HermeticSandbox(policy.config) if policy.enabled else None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_sandbox() -> Optional[HermeticSandbox]:
|
|
40
|
+
return _global_sandbox
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_policy() -> Optional[SandboxPolicy]:
|
|
44
|
+
return _global_policy
|
src/sdk/__init__.py
ADDED
src/sdk/plugin_base.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from typing import Callable, Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class PermissionManifest:
|
|
7
|
+
file_read: bool = True
|
|
8
|
+
file_write: bool = False
|
|
9
|
+
network: bool = False
|
|
10
|
+
git_read: bool = True
|
|
11
|
+
git_write: bool = False
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GdmPlugin:
|
|
15
|
+
name: str = ""
|
|
16
|
+
version: str = "0.0.0"
|
|
17
|
+
permissions: PermissionManifest = field(default_factory=PermissionManifest)
|
|
18
|
+
|
|
19
|
+
def on_load(self, context: dict) -> None:
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
def on_unload(self) -> None:
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def tool(name: str, description: str, permissions: Optional[PermissionManifest] = None):
|
|
27
|
+
"""Decorator to register a function as a gdm tool."""
|
|
28
|
+
def decorator(fn: Callable) -> Callable:
|
|
29
|
+
fn._gdm_tool = {"name": name, "description": description, "permissions": permissions}
|
|
30
|
+
return fn
|
|
31
|
+
return decorator
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def provider(name: str):
|
|
35
|
+
"""Decorator to register a class as a model provider."""
|
|
36
|
+
def decorator(cls):
|
|
37
|
+
cls._gdm_provider = {"name": name}
|
|
38
|
+
return cls
|
|
39
|
+
return decorator
|