groknroll 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groknroll/__init__.py +36 -0
- groknroll/__main__.py +9 -0
- groknroll/agents/__init__.py +18 -0
- groknroll/agents/agent_manager.py +187 -0
- groknroll/agents/base_agent.py +118 -0
- groknroll/agents/build_agent.py +231 -0
- groknroll/agents/plan_agent.py +215 -0
- groknroll/cli/__init__.py +7 -0
- groknroll/cli/enhanced_cli.py +372 -0
- groknroll/cli/large_codebase_cli.py +413 -0
- groknroll/cli/main.py +331 -0
- groknroll/cli/rlm_commands.py +258 -0
- groknroll/clients/__init__.py +63 -0
- groknroll/clients/anthropic.py +112 -0
- groknroll/clients/azure_openai.py +142 -0
- groknroll/clients/base_lm.py +33 -0
- groknroll/clients/gemini.py +162 -0
- groknroll/clients/litellm.py +105 -0
- groknroll/clients/openai.py +129 -0
- groknroll/clients/portkey.py +94 -0
- groknroll/core/__init__.py +9 -0
- groknroll/core/agent.py +339 -0
- groknroll/core/comms_utils.py +264 -0
- groknroll/core/context.py +251 -0
- groknroll/core/exceptions.py +181 -0
- groknroll/core/large_codebase.py +564 -0
- groknroll/core/lm_handler.py +206 -0
- groknroll/core/rlm.py +446 -0
- groknroll/core/rlm_codebase.py +448 -0
- groknroll/core/rlm_integration.py +256 -0
- groknroll/core/types.py +276 -0
- groknroll/environments/__init__.py +34 -0
- groknroll/environments/base_env.py +182 -0
- groknroll/environments/constants.py +32 -0
- groknroll/environments/docker_repl.py +336 -0
- groknroll/environments/local_repl.py +388 -0
- groknroll/environments/modal_repl.py +502 -0
- groknroll/environments/prime_repl.py +588 -0
- groknroll/logger/__init__.py +4 -0
- groknroll/logger/rlm_logger.py +63 -0
- groknroll/logger/verbose.py +393 -0
- groknroll/operations/__init__.py +15 -0
- groknroll/operations/bash_ops.py +447 -0
- groknroll/operations/file_ops.py +473 -0
- groknroll/operations/git_ops.py +620 -0
- groknroll/oracle/__init__.py +11 -0
- groknroll/oracle/codebase_indexer.py +238 -0
- groknroll/oracle/oracle_agent.py +278 -0
- groknroll/setup.py +34 -0
- groknroll/storage/__init__.py +14 -0
- groknroll/storage/database.py +272 -0
- groknroll/storage/models.py +128 -0
- groknroll/utils/__init__.py +0 -0
- groknroll/utils/parsing.py +168 -0
- groknroll/utils/prompts.py +146 -0
- groknroll/utils/rlm_utils.py +19 -0
- groknroll-2.0.0.dist-info/METADATA +246 -0
- groknroll-2.0.0.dist-info/RECORD +62 -0
- groknroll-2.0.0.dist-info/WHEEL +5 -0
- groknroll-2.0.0.dist-info/entry_points.txt +3 -0
- groknroll-2.0.0.dist-info/licenses/LICENSE +21 -0
- groknroll-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import json
|
|
3
|
+
import textwrap
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
import modal
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from groknroll.core.comms_utils import LMRequest, send_lm_request, send_lm_request_batched
|
|
11
|
+
from groknroll.core.types import REPLResult, RLMChatCompletion
|
|
12
|
+
from groknroll.environments.base_env import IsolatedEnv
|
|
13
|
+
from groknroll.environments.constants import APT_PACKAGES, PIP_PACKAGES
|
|
14
|
+
|
|
15
|
+
# =============================================================================
|
|
16
|
+
# Default Modal Image
|
|
17
|
+
# =============================================================================
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_default_image() -> modal.Image:
|
|
21
|
+
"""
|
|
22
|
+
Build a default Modal image with common libraries for data science,
|
|
23
|
+
math, and general Python work.
|
|
24
|
+
"""
|
|
25
|
+
return (
|
|
26
|
+
modal.Image.debian_slim(python_version="3.11")
|
|
27
|
+
.apt_install(*APT_PACKAGES)
|
|
28
|
+
.pip_install(*PIP_PACKAGES)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# =============================================================================
|
|
33
|
+
# Broker Server Script (runs inside sandbox, handles LLM request queue)
|
|
34
|
+
# =============================================================================
|
|
35
|
+
|
|
36
|
+
_BROKER_SCRIPT = textwrap.dedent(
|
|
37
|
+
'''
|
|
38
|
+
import json
|
|
39
|
+
import threading
|
|
40
|
+
import uuid
|
|
41
|
+
from flask import Flask, request, jsonify
|
|
42
|
+
|
|
43
|
+
app = Flask(__name__)
|
|
44
|
+
|
|
45
|
+
# Request queue: {request_id: {"request": {...}, "response": None, "event": Event}}
|
|
46
|
+
pending_requests = {}
|
|
47
|
+
lock = threading.Lock()
|
|
48
|
+
|
|
49
|
+
@app.route("/health")
|
|
50
|
+
def health():
|
|
51
|
+
return jsonify({"status": "ok"})
|
|
52
|
+
|
|
53
|
+
@app.route("/enqueue", methods=["POST"])
|
|
54
|
+
def enqueue():
|
|
55
|
+
"""Called by sandbox code to submit an LLM request and wait for response."""
|
|
56
|
+
data = request.json
|
|
57
|
+
request_id = str(uuid.uuid4())
|
|
58
|
+
event = threading.Event()
|
|
59
|
+
|
|
60
|
+
with lock:
|
|
61
|
+
pending_requests[request_id] = {
|
|
62
|
+
"request": data,
|
|
63
|
+
"response": None,
|
|
64
|
+
"event": event,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Wait for response (with timeout)
|
|
68
|
+
event.wait(timeout=300)
|
|
69
|
+
|
|
70
|
+
with lock:
|
|
71
|
+
entry = pending_requests.pop(request_id, None)
|
|
72
|
+
|
|
73
|
+
if entry and entry["response"] is not None:
|
|
74
|
+
return jsonify(entry["response"])
|
|
75
|
+
else:
|
|
76
|
+
return jsonify({"error": "Request timed out"}), 504
|
|
77
|
+
|
|
78
|
+
@app.route("/pending")
|
|
79
|
+
def get_pending():
|
|
80
|
+
"""Called by ModalREPL to get pending requests."""
|
|
81
|
+
with lock:
|
|
82
|
+
pending = [
|
|
83
|
+
{"id": rid, "request": entry["request"]}
|
|
84
|
+
for rid, entry in pending_requests.items()
|
|
85
|
+
if entry["response"] is None
|
|
86
|
+
]
|
|
87
|
+
return jsonify({"pending": pending})
|
|
88
|
+
|
|
89
|
+
@app.route("/respond", methods=["POST"])
|
|
90
|
+
def respond():
|
|
91
|
+
"""Called by ModalREPL to submit a response."""
|
|
92
|
+
data = request.json
|
|
93
|
+
request_id = data.get("id")
|
|
94
|
+
response = data.get("response")
|
|
95
|
+
|
|
96
|
+
with lock:
|
|
97
|
+
if request_id in pending_requests:
|
|
98
|
+
pending_requests[request_id]["response"] = response
|
|
99
|
+
pending_requests[request_id]["event"].set()
|
|
100
|
+
return jsonify({"status": "ok"})
|
|
101
|
+
|
|
102
|
+
return jsonify({"error": "Request not found"}), 404
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
app.run(host="0.0.0.0", port=8080, threaded=True)
|
|
106
|
+
'''
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# =============================================================================
|
|
111
|
+
# Execution Script (runs inside the sandbox for each code block)
|
|
112
|
+
# =============================================================================
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _build_exec_script(code: str, broker_port: int = 8080, depth: int = 1) -> str:
|
|
116
|
+
"""
|
|
117
|
+
Build a script that executes code with state persistence.
|
|
118
|
+
LLM queries go through the local broker server.
|
|
119
|
+
"""
|
|
120
|
+
code_b64 = base64.b64encode(code.encode()).decode()
|
|
121
|
+
|
|
122
|
+
return textwrap.dedent(
|
|
123
|
+
f'''
|
|
124
|
+
import sys
|
|
125
|
+
import io
|
|
126
|
+
import json
|
|
127
|
+
import base64
|
|
128
|
+
import traceback
|
|
129
|
+
import os
|
|
130
|
+
import requests
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
import dill
|
|
134
|
+
except ImportError:
|
|
135
|
+
import pickle as dill
|
|
136
|
+
|
|
137
|
+
# =============================================================================
|
|
138
|
+
# LLM Query Functions (via local broker)
|
|
139
|
+
# =============================================================================
|
|
140
|
+
|
|
141
|
+
BROKER_URL = "http://127.0.0.1:{broker_port}"
|
|
142
|
+
|
|
143
|
+
def llm_query(prompt, model=None):
|
|
144
|
+
"""Query the LM via the broker."""
|
|
145
|
+
try:
|
|
146
|
+
response = requests.post(
|
|
147
|
+
f"{{BROKER_URL}}/enqueue",
|
|
148
|
+
json={{"type": "single", "prompt": prompt, "model": model, "depth": {depth}}},
|
|
149
|
+
timeout=300,
|
|
150
|
+
)
|
|
151
|
+
data = response.json()
|
|
152
|
+
if data.get("error"):
|
|
153
|
+
return f"Error: {{data['error']}}"
|
|
154
|
+
return data.get("response", "Error: No response")
|
|
155
|
+
except Exception as e:
|
|
156
|
+
return f"Error: LM query failed - {{e}}"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def llm_query_batched(prompts, model=None):
|
|
160
|
+
"""Query the LM with multiple prompts."""
|
|
161
|
+
try:
|
|
162
|
+
response = requests.post(
|
|
163
|
+
f"{{BROKER_URL}}/enqueue",
|
|
164
|
+
json={{"type": "batched", "prompts": prompts, "model": model, "depth": {depth}}},
|
|
165
|
+
timeout=300,
|
|
166
|
+
)
|
|
167
|
+
data = response.json()
|
|
168
|
+
if data.get("error"):
|
|
169
|
+
return [f"Error: {{data['error']}}"] * len(prompts)
|
|
170
|
+
return data.get("responses", ["Error: No response"] * len(prompts))
|
|
171
|
+
except Exception as e:
|
|
172
|
+
return [f"Error: LM query failed - {{e}}"] * len(prompts)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# =============================================================================
|
|
176
|
+
# State Management
|
|
177
|
+
# =============================================================================
|
|
178
|
+
|
|
179
|
+
STATE_FILE = "/tmp/rlm_state.dill"
|
|
180
|
+
|
|
181
|
+
def load_state():
|
|
182
|
+
if os.path.exists(STATE_FILE):
|
|
183
|
+
try:
|
|
184
|
+
with open(STATE_FILE, "rb") as f:
|
|
185
|
+
return dill.load(f)
|
|
186
|
+
except:
|
|
187
|
+
pass
|
|
188
|
+
return {{}}
|
|
189
|
+
|
|
190
|
+
def save_state(state):
|
|
191
|
+
clean_state = {{}}
|
|
192
|
+
for k, v in state.items():
|
|
193
|
+
if k.startswith("_"):
|
|
194
|
+
continue
|
|
195
|
+
try:
|
|
196
|
+
dill.dumps(v)
|
|
197
|
+
clean_state[k] = v
|
|
198
|
+
except:
|
|
199
|
+
pass
|
|
200
|
+
with open(STATE_FILE, "wb") as f:
|
|
201
|
+
dill.dump(clean_state, f)
|
|
202
|
+
|
|
203
|
+
def serialize_locals(state):
|
|
204
|
+
result = {{}}
|
|
205
|
+
for k, v in state.items():
|
|
206
|
+
if k.startswith("_"):
|
|
207
|
+
continue
|
|
208
|
+
try:
|
|
209
|
+
result[k] = repr(v)
|
|
210
|
+
except:
|
|
211
|
+
result[k] = f"<{{type(v).__name__}}>"
|
|
212
|
+
return result
|
|
213
|
+
|
|
214
|
+
# =============================================================================
|
|
215
|
+
# Execution
|
|
216
|
+
# =============================================================================
|
|
217
|
+
|
|
218
|
+
_locals = load_state()
|
|
219
|
+
|
|
220
|
+
def FINAL_VAR(variable_name):
|
|
221
|
+
variable_name = variable_name.strip().strip("\\"\\'")
|
|
222
|
+
if variable_name in _locals:
|
|
223
|
+
return str(_locals[variable_name])
|
|
224
|
+
return f"Error: Variable '{{variable_name}}' not found"
|
|
225
|
+
|
|
226
|
+
_globals = {{
|
|
227
|
+
"__builtins__": __builtins__,
|
|
228
|
+
"__name__": "__main__",
|
|
229
|
+
"llm_query": llm_query,
|
|
230
|
+
"llm_query_batched": llm_query_batched,
|
|
231
|
+
"FINAL_VAR": FINAL_VAR,
|
|
232
|
+
}}
|
|
233
|
+
|
|
234
|
+
code = base64.b64decode("{code_b64}").decode()
|
|
235
|
+
|
|
236
|
+
stdout_buf = io.StringIO()
|
|
237
|
+
stderr_buf = io.StringIO()
|
|
238
|
+
old_stdout, old_stderr = sys.stdout, sys.stderr
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
sys.stdout = stdout_buf
|
|
242
|
+
sys.stderr = stderr_buf
|
|
243
|
+
combined = {{**_globals, **_locals}}
|
|
244
|
+
exec(code, combined, combined)
|
|
245
|
+
for key, value in combined.items():
|
|
246
|
+
if key not in _globals and not key.startswith("_"):
|
|
247
|
+
_locals[key] = value
|
|
248
|
+
except Exception as e:
|
|
249
|
+
traceback.print_exc(file=stderr_buf)
|
|
250
|
+
finally:
|
|
251
|
+
sys.stdout = old_stdout
|
|
252
|
+
sys.stderr = old_stderr
|
|
253
|
+
|
|
254
|
+
save_state(_locals)
|
|
255
|
+
|
|
256
|
+
result = {{
|
|
257
|
+
"stdout": stdout_buf.getvalue(),
|
|
258
|
+
"stderr": stderr_buf.getvalue(),
|
|
259
|
+
"locals": serialize_locals(_locals),
|
|
260
|
+
}}
|
|
261
|
+
print(json.dumps(result))
|
|
262
|
+
'''
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class ModalREPL(IsolatedEnv):
|
|
267
|
+
"""
|
|
268
|
+
Modal REPL environment that runs Python code in a Modal Sandbox.
|
|
269
|
+
|
|
270
|
+
Uses Modal tunnels for LLM communication:
|
|
271
|
+
- Sandbox runs a broker server exposed via encrypted_ports
|
|
272
|
+
- ModalREPL polls the broker for pending LLM requests
|
|
273
|
+
- ModalREPL forwards requests to the LM handler and posts responses back
|
|
274
|
+
"""
|
|
275
|
+
|
|
276
|
+
BROKER_PORT = 8080
|
|
277
|
+
|
|
278
|
+
def __init__(
|
|
279
|
+
self,
|
|
280
|
+
app_name: str = "rlm-sandbox",
|
|
281
|
+
image: modal.Image | None = None,
|
|
282
|
+
timeout: int = 600,
|
|
283
|
+
lm_handler_address: tuple[str, int] | None = None,
|
|
284
|
+
context_payload: dict | list | str | None = None,
|
|
285
|
+
setup_code: str | None = None,
|
|
286
|
+
persistent: bool = False,
|
|
287
|
+
depth: int = 1,
|
|
288
|
+
**kwargs,
|
|
289
|
+
):
|
|
290
|
+
if persistent:
|
|
291
|
+
raise NotImplementedError(
|
|
292
|
+
"Persistent REPLs are currently not supported for environment: ModalREPL"
|
|
293
|
+
)
|
|
294
|
+
super().__init__(persistent=persistent, depth=depth, **kwargs)
|
|
295
|
+
|
|
296
|
+
self.app_name = app_name
|
|
297
|
+
self.timeout = timeout
|
|
298
|
+
self.lm_handler_address = lm_handler_address
|
|
299
|
+
|
|
300
|
+
self.image = image or get_default_image()
|
|
301
|
+
|
|
302
|
+
self.app = None
|
|
303
|
+
self.sandbox = None
|
|
304
|
+
self.broker_process = None
|
|
305
|
+
self.broker_url: str | None = None
|
|
306
|
+
self.poller_thread: threading.Thread | None = None
|
|
307
|
+
self.poller_stop = threading.Event()
|
|
308
|
+
self.pending_llm_calls: list[RLMChatCompletion] = []
|
|
309
|
+
self._calls_lock = threading.Lock()
|
|
310
|
+
|
|
311
|
+
self.setup()
|
|
312
|
+
|
|
313
|
+
if context_payload is not None:
|
|
314
|
+
self.load_context(context_payload)
|
|
315
|
+
|
|
316
|
+
if setup_code:
|
|
317
|
+
self.execute_code(setup_code)
|
|
318
|
+
|
|
319
|
+
def setup(self):
|
|
320
|
+
"""Create the Modal app, sandbox, broker, and start polling."""
|
|
321
|
+
self.app = modal.App.lookup(self.app_name, create_if_missing=True)
|
|
322
|
+
|
|
323
|
+
# Create sandbox with encrypted port for broker
|
|
324
|
+
self.sandbox = modal.Sandbox.create(
|
|
325
|
+
app=self.app,
|
|
326
|
+
image=self.image,
|
|
327
|
+
timeout=self.timeout,
|
|
328
|
+
encrypted_ports=[self.BROKER_PORT],
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
# Start the broker server in the sandbox
|
|
332
|
+
self.broker_process = self.sandbox.exec(
|
|
333
|
+
"python",
|
|
334
|
+
"-c",
|
|
335
|
+
_BROKER_SCRIPT,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Wait for broker to be ready
|
|
339
|
+
time.sleep(2)
|
|
340
|
+
|
|
341
|
+
# Get the tunnel URL
|
|
342
|
+
tunnels = self.sandbox.tunnels()
|
|
343
|
+
if self.BROKER_PORT in tunnels:
|
|
344
|
+
self.broker_url = tunnels[self.BROKER_PORT].url
|
|
345
|
+
|
|
346
|
+
# Start polling thread if we have an LM handler
|
|
347
|
+
if self.lm_handler_address and self.broker_url:
|
|
348
|
+
self.poller_stop.clear()
|
|
349
|
+
self.poller_thread = threading.Thread(target=self._poll_broker, daemon=True)
|
|
350
|
+
self.poller_thread.start()
|
|
351
|
+
|
|
352
|
+
def _poll_broker(self):
|
|
353
|
+
"""Poll the broker for pending LLM requests and handle them."""
|
|
354
|
+
while not self.poller_stop.is_set():
|
|
355
|
+
try:
|
|
356
|
+
# Get pending requests
|
|
357
|
+
resp = requests.get(
|
|
358
|
+
f"{self.broker_url}/pending",
|
|
359
|
+
timeout=5,
|
|
360
|
+
)
|
|
361
|
+
pending = resp.json().get("pending", [])
|
|
362
|
+
|
|
363
|
+
for item in pending:
|
|
364
|
+
request_id = item["id"]
|
|
365
|
+
req_data = item["request"]
|
|
366
|
+
|
|
367
|
+
# Handle the request
|
|
368
|
+
response = self._handle_llm_request(req_data)
|
|
369
|
+
|
|
370
|
+
# Send response back
|
|
371
|
+
requests.post(
|
|
372
|
+
f"{self.broker_url}/respond",
|
|
373
|
+
json={"id": request_id, "response": response},
|
|
374
|
+
timeout=10,
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
except requests.exceptions.RequestException:
|
|
378
|
+
pass
|
|
379
|
+
except Exception:
|
|
380
|
+
pass
|
|
381
|
+
|
|
382
|
+
time.sleep(0.1)
|
|
383
|
+
|
|
384
|
+
def _handle_llm_request(self, req_data: dict) -> dict:
|
|
385
|
+
"""Handle an LLM request from the sandbox."""
|
|
386
|
+
req_type = req_data.get("type")
|
|
387
|
+
model = req_data.get("model")
|
|
388
|
+
|
|
389
|
+
if req_type == "single":
|
|
390
|
+
prompt = req_data.get("prompt")
|
|
391
|
+
request = LMRequest(prompt=prompt, model=model, depth=self.depth)
|
|
392
|
+
response = send_lm_request(self.lm_handler_address, request)
|
|
393
|
+
|
|
394
|
+
if not response.success:
|
|
395
|
+
return {"error": response.error}
|
|
396
|
+
|
|
397
|
+
# Track the call
|
|
398
|
+
with self._calls_lock:
|
|
399
|
+
self.pending_llm_calls.append(response.chat_completion)
|
|
400
|
+
|
|
401
|
+
return {"response": response.chat_completion.response}
|
|
402
|
+
|
|
403
|
+
elif req_type == "batched":
|
|
404
|
+
prompts = req_data.get("prompts", [])
|
|
405
|
+
responses = send_lm_request_batched(
|
|
406
|
+
self.lm_handler_address, prompts, model=model, depth=self.depth
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
results = []
|
|
410
|
+
for resp in responses:
|
|
411
|
+
if not resp.success:
|
|
412
|
+
results.append(f"Error: {resp.error}")
|
|
413
|
+
else:
|
|
414
|
+
with self._calls_lock:
|
|
415
|
+
self.pending_llm_calls.append(resp.chat_completion)
|
|
416
|
+
results.append(resp.chat_completion.response)
|
|
417
|
+
|
|
418
|
+
return {"responses": results}
|
|
419
|
+
|
|
420
|
+
return {"error": "Unknown request type"}
|
|
421
|
+
|
|
422
|
+
def load_context(self, context_payload: dict | list | str):
|
|
423
|
+
"""Load context into the sandbox environment."""
|
|
424
|
+
if isinstance(context_payload, str):
|
|
425
|
+
escaped = context_payload.replace("\\", "\\\\").replace('"""', '\\"\\"\\"')
|
|
426
|
+
context_code = f'context = """{escaped}"""'
|
|
427
|
+
else:
|
|
428
|
+
context_json = json.dumps(context_payload)
|
|
429
|
+
escaped_json = context_json.replace("\\", "\\\\").replace("'", "\\'")
|
|
430
|
+
context_code = f"import json; context = json.loads('{escaped_json}')"
|
|
431
|
+
|
|
432
|
+
self.execute_code(context_code)
|
|
433
|
+
|
|
434
|
+
def execute_code(self, code: str) -> REPLResult:
|
|
435
|
+
"""Execute code in the Modal sandbox and return result."""
|
|
436
|
+
start_time = time.perf_counter()
|
|
437
|
+
|
|
438
|
+
# Clear pending LLM calls
|
|
439
|
+
with self._calls_lock:
|
|
440
|
+
self.pending_llm_calls.clear()
|
|
441
|
+
|
|
442
|
+
# Build and execute the script
|
|
443
|
+
script = _build_exec_script(code, self.BROKER_PORT, self.depth)
|
|
444
|
+
process = self.sandbox.exec("python", "-c", script)
|
|
445
|
+
|
|
446
|
+
# Read output
|
|
447
|
+
stdout = process.stdout.read()
|
|
448
|
+
stderr = process.stderr.read()
|
|
449
|
+
|
|
450
|
+
# Collect LLM calls made during this execution
|
|
451
|
+
with self._calls_lock:
|
|
452
|
+
pending_calls = self.pending_llm_calls.copy()
|
|
453
|
+
self.pending_llm_calls.clear()
|
|
454
|
+
|
|
455
|
+
execution_time = time.perf_counter() - start_time
|
|
456
|
+
|
|
457
|
+
# Parse the JSON result
|
|
458
|
+
try:
|
|
459
|
+
lines = stdout.strip().split("\n")
|
|
460
|
+
result_json = lines[-1] if lines else "{}"
|
|
461
|
+
result = json.loads(result_json)
|
|
462
|
+
|
|
463
|
+
return REPLResult(
|
|
464
|
+
stdout=result.get("stdout", ""),
|
|
465
|
+
stderr=result.get("stderr", "") + stderr,
|
|
466
|
+
locals=result.get("locals", {}),
|
|
467
|
+
execution_time=execution_time,
|
|
468
|
+
rlm_calls=pending_calls,
|
|
469
|
+
)
|
|
470
|
+
except json.JSONDecodeError:
|
|
471
|
+
return REPLResult(
|
|
472
|
+
stdout=stdout,
|
|
473
|
+
stderr=stderr or "Failed to parse execution result",
|
|
474
|
+
locals={},
|
|
475
|
+
execution_time=execution_time,
|
|
476
|
+
rlm_calls=pending_calls,
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
def cleanup(self):
|
|
480
|
+
"""Terminate the sandbox and stop polling."""
|
|
481
|
+
# Stop the poller thread
|
|
482
|
+
if self.poller_thread is not None:
|
|
483
|
+
self.poller_stop.set()
|
|
484
|
+
self.poller_thread.join(timeout=2)
|
|
485
|
+
self.poller_thread = None
|
|
486
|
+
|
|
487
|
+
if self.sandbox is not None:
|
|
488
|
+
try:
|
|
489
|
+
self.sandbox.terminate()
|
|
490
|
+
except Exception:
|
|
491
|
+
pass
|
|
492
|
+
self.sandbox = None
|
|
493
|
+
|
|
494
|
+
def __enter__(self):
|
|
495
|
+
return self
|
|
496
|
+
|
|
497
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
498
|
+
self.cleanup()
|
|
499
|
+
return False
|
|
500
|
+
|
|
501
|
+
def __del__(self):
|
|
502
|
+
self.cleanup()
|