ralphx 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ralphx/__init__.py +1 -1
- ralphx/adapters/base.py +8 -0
- ralphx/adapters/claude_cli.py +377 -452
- ralphx/api/routes/items.py +4 -0
- ralphx/api/routes/loops.py +101 -15
- ralphx/api/routes/planning.py +1 -1
- ralphx/api/routes/stream.py +104 -59
- ralphx/api/routes/templates.py +1 -0
- ralphx/api/routes/workflows.py +2 -2
- ralphx/core/checkpoint.py +118 -0
- ralphx/core/executor.py +134 -81
- ralphx/core/loop_templates.py +33 -14
- ralphx/core/planning_service.py +1 -1
- ralphx/core/project_db.py +66 -7
- ralphx/core/session.py +62 -10
- ralphx/core/templates.py +74 -87
- ralphx/core/workflow_executor.py +3 -0
- ralphx/mcp/tools/workflows.py +2 -2
- ralphx/models/loop.py +1 -1
- ralphx/models/session.py +5 -0
- ralphx/static/assets/index-DnihHetG.js +265 -0
- ralphx/static/assets/index-DnihHetG.js.map +1 -0
- ralphx/static/assets/index-nIDWmtzm.css +1 -0
- ralphx/static/index.html +2 -2
- {ralphx-0.4.0.dist-info → ralphx-0.4.1.dist-info}/METADATA +1 -1
- {ralphx-0.4.0.dist-info → ralphx-0.4.1.dist-info}/RECORD +28 -28
- ralphx/static/assets/index-BuLI7ffn.css +0 -1
- ralphx/static/assets/index-DWvlqOTb.js +0 -264
- ralphx/static/assets/index-DWvlqOTb.js.map +0 -1
- {ralphx-0.4.0.dist-info → ralphx-0.4.1.dist-info}/WHEEL +0 -0
- {ralphx-0.4.0.dist-info → ralphx-0.4.1.dist-info}/entry_points.txt +0 -0
ralphx/api/routes/items.py
CHANGED
|
@@ -139,6 +139,8 @@ async def list_items(
|
|
|
139
139
|
source_step_id: Optional[int] = Query(None, description="Filter by source step"),
|
|
140
140
|
limit: int = Query(50, ge=1, le=1000, description="Items per page"),
|
|
141
141
|
offset: int = Query(0, ge=0, description="Offset for pagination"),
|
|
142
|
+
sort_by: str = Query("created_at", description="Column to sort by"),
|
|
143
|
+
sort_order: str = Query("desc", description="Sort order: asc or desc"),
|
|
142
144
|
):
|
|
143
145
|
"""List work items with optional filtering."""
|
|
144
146
|
manager, project, project_db = get_project(slug)
|
|
@@ -151,6 +153,8 @@ async def list_items(
|
|
|
151
153
|
source_step_id=source_step_id,
|
|
152
154
|
limit=limit,
|
|
153
155
|
offset=offset,
|
|
156
|
+
sort_by=sort_by,
|
|
157
|
+
sort_order=sort_order,
|
|
154
158
|
)
|
|
155
159
|
|
|
156
160
|
# Convert to response models
|
ralphx/api/routes/loops.py
CHANGED
|
@@ -17,6 +17,7 @@ from ralphx.core.project_db import ProjectDatabase
|
|
|
17
17
|
from ralphx.models.loop import LoopConfig, LoopType, ModeSelectionStrategy, ItemTypes
|
|
18
18
|
from ralphx.models.run import Run, RunStatus
|
|
19
19
|
from ralphx.core.logger import loop_log
|
|
20
|
+
from ralphx.core.checkpoint import kill_orphan_process
|
|
20
21
|
|
|
21
22
|
router = APIRouter()
|
|
22
23
|
|
|
@@ -54,6 +55,9 @@ def detect_source_cycle(
|
|
|
54
55
|
# Store for running loops
|
|
55
56
|
_running_loops: dict[str, LoopExecutor] = {}
|
|
56
57
|
|
|
58
|
+
# Prevent concurrent stop attempts
|
|
59
|
+
_stopping_loops: set[str] = set()
|
|
60
|
+
|
|
57
61
|
# Security: Validate loop names to prevent path traversal
|
|
58
62
|
LOOP_NAME_PATTERN = re.compile(r'^[a-zA-Z0-9_-]+$')
|
|
59
63
|
|
|
@@ -428,34 +432,110 @@ async def start_loop(
|
|
|
428
432
|
|
|
429
433
|
@router.post("/{slug}/loops/{loop_name}/stop")
|
|
430
434
|
async def stop_loop(slug: str, loop_name: str):
|
|
431
|
-
"""Stop a running loop.
|
|
432
|
-
|
|
433
|
-
|
|
435
|
+
"""Stop a running loop.
|
|
436
|
+
|
|
437
|
+
Attempts to stop via executor if in memory, otherwise falls back
|
|
438
|
+
to killing via PID from database (for orphaned processes after
|
|
439
|
+
server restart/hot-reload).
|
|
440
|
+
"""
|
|
441
|
+
manager, project, project_db = get_managers(slug)
|
|
434
442
|
|
|
435
443
|
key = f"{slug}:{loop_name}"
|
|
436
|
-
executor = _running_loops.get(key)
|
|
437
444
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
detail=f"Loop {loop_name} is not running",
|
|
442
|
-
)
|
|
445
|
+
# Prevent concurrent stop attempts
|
|
446
|
+
if key in _stopping_loops:
|
|
447
|
+
return {"message": f"Stop already in progress for {loop_name}"}
|
|
443
448
|
|
|
444
|
-
|
|
449
|
+
_stopping_loops.add(key)
|
|
450
|
+
try:
|
|
451
|
+
# Try 1: Stop via executor (normal case)
|
|
452
|
+
executor = _running_loops.get(key)
|
|
453
|
+
if executor:
|
|
454
|
+
await executor.stop()
|
|
455
|
+
return {
|
|
456
|
+
"message": f"Stop signal sent to {loop_name}",
|
|
457
|
+
"method": "executor",
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
# Try 2: Kill via PID (orphan case after server restart)
|
|
461
|
+
runs = project_db.list_runs(loop_name=loop_name, status=["running", "paused"])
|
|
462
|
+
if not runs:
|
|
463
|
+
raise HTTPException(
|
|
464
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
465
|
+
detail=f"Loop {loop_name} is not running",
|
|
466
|
+
)
|
|
445
467
|
|
|
446
|
-
|
|
468
|
+
# Get most recent running run
|
|
469
|
+
run = runs[0]
|
|
470
|
+
pid = run.get("executor_pid")
|
|
471
|
+
|
|
472
|
+
if not pid:
|
|
473
|
+
# No PID recorded - can't kill, just mark as aborted
|
|
474
|
+
project_db.update_run(
|
|
475
|
+
run["id"],
|
|
476
|
+
status="aborted",
|
|
477
|
+
completed_at=datetime.utcnow().isoformat(),
|
|
478
|
+
error_message="Stopped by user (no PID available for orphan process)",
|
|
479
|
+
)
|
|
480
|
+
return {
|
|
481
|
+
"message": f"Marked {loop_name} as aborted (no PID available)",
|
|
482
|
+
"method": "database_only",
|
|
483
|
+
"warning": "Process may still be running",
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
# Kill the orphan process
|
|
487
|
+
success, reason = await kill_orphan_process(pid)
|
|
488
|
+
|
|
489
|
+
# Update database regardless of kill result
|
|
490
|
+
if success:
|
|
491
|
+
error_msg = f"Killed orphan process (PID {pid}) after server restart"
|
|
492
|
+
if reason == "already_dead":
|
|
493
|
+
error_msg = f"Orphan process (PID {pid}) already terminated"
|
|
494
|
+
else:
|
|
495
|
+
error_msg = f"Could not kill orphan process (PID {pid}): {reason}"
|
|
496
|
+
|
|
497
|
+
project_db.update_run(
|
|
498
|
+
run["id"],
|
|
499
|
+
status="aborted",
|
|
500
|
+
completed_at=datetime.utcnow().isoformat(),
|
|
501
|
+
error_message=error_msg,
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
if success:
|
|
505
|
+
return {
|
|
506
|
+
"message": f"Stopped orphan process for {loop_name}",
|
|
507
|
+
"method": "pid_kill",
|
|
508
|
+
"pid": pid,
|
|
509
|
+
"detail": reason, # "killed" or "already_dead"
|
|
510
|
+
}
|
|
511
|
+
else:
|
|
512
|
+
return {
|
|
513
|
+
"message": f"Could not kill process {pid}, marked as aborted",
|
|
514
|
+
"method": "pid_kill_failed",
|
|
515
|
+
"pid": pid,
|
|
516
|
+
"reason": reason,
|
|
517
|
+
"warning": "Process may not have been our process (PID reuse)" if reason == "not_our_process" else None,
|
|
518
|
+
}
|
|
519
|
+
finally:
|
|
520
|
+
_stopping_loops.discard(key)
|
|
447
521
|
|
|
448
522
|
|
|
449
523
|
@router.post("/{slug}/loops/{loop_name}/pause")
|
|
450
524
|
async def pause_loop(slug: str, loop_name: str):
|
|
451
525
|
"""Pause a running loop."""
|
|
452
|
-
|
|
453
|
-
get_managers(slug)
|
|
526
|
+
manager, project, project_db = get_managers(slug)
|
|
454
527
|
|
|
455
528
|
key = f"{slug}:{loop_name}"
|
|
456
529
|
executor = _running_loops.get(key)
|
|
457
530
|
|
|
458
531
|
if not executor:
|
|
532
|
+
# Check if there's an orphan process
|
|
533
|
+
runs = project_db.list_runs(loop_name=loop_name, status=["running", "paused"])
|
|
534
|
+
if runs:
|
|
535
|
+
raise HTTPException(
|
|
536
|
+
status_code=status.HTTP_409_CONFLICT,
|
|
537
|
+
detail=f"Loop {loop_name} is running as orphan process (server restarted). Use stop to terminate it.",
|
|
538
|
+
)
|
|
459
539
|
raise HTTPException(
|
|
460
540
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
461
541
|
detail=f"Loop {loop_name} is not running",
|
|
@@ -469,13 +549,19 @@ async def pause_loop(slug: str, loop_name: str):
|
|
|
469
549
|
@router.post("/{slug}/loops/{loop_name}/resume")
|
|
470
550
|
async def resume_loop(slug: str, loop_name: str):
|
|
471
551
|
"""Resume a paused loop."""
|
|
472
|
-
|
|
473
|
-
get_managers(slug)
|
|
552
|
+
manager, project, project_db = get_managers(slug)
|
|
474
553
|
|
|
475
554
|
key = f"{slug}:{loop_name}"
|
|
476
555
|
executor = _running_loops.get(key)
|
|
477
556
|
|
|
478
557
|
if not executor:
|
|
558
|
+
# Check if there's an orphan process
|
|
559
|
+
runs = project_db.list_runs(loop_name=loop_name, status=["running", "paused"])
|
|
560
|
+
if runs:
|
|
561
|
+
raise HTTPException(
|
|
562
|
+
status_code=status.HTTP_409_CONFLICT,
|
|
563
|
+
detail=f"Loop {loop_name} is orphaned (server restarted). Use stop to terminate, then start again.",
|
|
564
|
+
)
|
|
479
565
|
raise HTTPException(
|
|
480
566
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
481
567
|
detail=f"Loop {loop_name} is not running",
|
ralphx/api/routes/planning.py
CHANGED
|
@@ -459,7 +459,7 @@ async def stream_planning_response(slug: str, workflow_id: str):
|
|
|
459
459
|
async for event in service.stream_response(
|
|
460
460
|
messages,
|
|
461
461
|
model=model,
|
|
462
|
-
tools=allowed_tools
|
|
462
|
+
tools=allowed_tools,
|
|
463
463
|
timeout=timeout,
|
|
464
464
|
):
|
|
465
465
|
if event.type == AdapterEvent.TEXT:
|
ralphx/api/routes/stream.py
CHANGED
|
@@ -8,7 +8,7 @@ from fastapi import APIRouter, HTTPException, Query, status
|
|
|
8
8
|
from fastapi.responses import StreamingResponse
|
|
9
9
|
|
|
10
10
|
from ralphx.core.project import ProjectManager
|
|
11
|
-
from ralphx.core.session import
|
|
11
|
+
from ralphx.core.session import SessionManager
|
|
12
12
|
from ralphx.models.run import RunStatus
|
|
13
13
|
|
|
14
14
|
router = APIRouter()
|
|
@@ -179,13 +179,18 @@ async def _tail_session(
|
|
|
179
179
|
run_id: Optional[str] = None,
|
|
180
180
|
iteration: Optional[int] = None,
|
|
181
181
|
) -> AsyncGenerator[str, None]:
|
|
182
|
-
"""
|
|
182
|
+
"""Stream session events from DB via polling.
|
|
183
|
+
|
|
184
|
+
Events are persisted to the session_events table by the executor's
|
|
185
|
+
persist_event callback. This function polls that table and yields
|
|
186
|
+
SSE events as they appear — same pattern as planning.py's
|
|
187
|
+
stream_iteration_progress().
|
|
183
188
|
|
|
184
189
|
Args:
|
|
185
190
|
session_manager: Session manager instance.
|
|
186
191
|
session_id: Session UUID.
|
|
187
|
-
project_path: Project directory path.
|
|
188
|
-
project_db: ProjectDatabase for
|
|
192
|
+
project_path: Project directory path (used for optional file metadata).
|
|
193
|
+
project_db: ProjectDatabase for reading events.
|
|
189
194
|
from_beginning: Start from file beginning.
|
|
190
195
|
run_id: Run ID for this session.
|
|
191
196
|
iteration: Iteration number for this session.
|
|
@@ -195,16 +200,12 @@ async def _tail_session(
|
|
|
195
200
|
"""
|
|
196
201
|
from pathlib import Path
|
|
197
202
|
|
|
203
|
+
# Session file is optional metadata — streaming uses DB polling, not file tailing
|
|
198
204
|
session_file = session_manager.find_session_file(
|
|
199
205
|
session_id=session_id,
|
|
200
206
|
project_path=Path(project_path),
|
|
201
207
|
)
|
|
202
|
-
|
|
203
|
-
if not session_file:
|
|
204
|
-
yield await format_sse("error", {
|
|
205
|
-
"message": f"Session file not found: {session_id}"
|
|
206
|
-
})
|
|
207
|
-
return
|
|
208
|
+
# Don't abort if file not found — we stream from DB
|
|
208
209
|
|
|
209
210
|
# Get session info if not provided
|
|
210
211
|
if run_id is None or iteration is None:
|
|
@@ -250,6 +251,16 @@ async def _tail_session(
|
|
|
250
251
|
"message": db_event.get("error_message"),
|
|
251
252
|
**event_meta,
|
|
252
253
|
})
|
|
254
|
+
elif event_type == "thinking":
|
|
255
|
+
yield await format_sse("thinking", {
|
|
256
|
+
"content": db_event.get("content", ""),
|
|
257
|
+
**event_meta,
|
|
258
|
+
})
|
|
259
|
+
elif event_type == "usage":
|
|
260
|
+
yield await format_sse("usage", {
|
|
261
|
+
"data": db_event.get("raw_data"),
|
|
262
|
+
**event_meta,
|
|
263
|
+
})
|
|
253
264
|
elif event_type == "init":
|
|
254
265
|
yield await format_sse("init", {
|
|
255
266
|
"data": db_event.get("raw_data"),
|
|
@@ -261,64 +272,95 @@ async def _tail_session(
|
|
|
261
272
|
|
|
262
273
|
yield await format_sse("session_start", {
|
|
263
274
|
"session_id": session_id,
|
|
264
|
-
"file": str(session_file),
|
|
275
|
+
"file": str(session_file) if session_file else None,
|
|
265
276
|
"history_events": len(existing_events),
|
|
266
277
|
"run_id": run_id,
|
|
267
278
|
"iteration": iteration,
|
|
268
279
|
})
|
|
269
280
|
|
|
270
|
-
#
|
|
271
|
-
#
|
|
272
|
-
tailer = SessionTailer(
|
|
273
|
-
session_path=session_file,
|
|
274
|
-
from_beginning=from_beginning and len(existing_events) == 0,
|
|
275
|
-
)
|
|
276
|
-
|
|
281
|
+
# Poll DB for new events (same pattern as planning.py stream_iteration_progress)
|
|
282
|
+
# This replaces the SessionTailer file-tailing approach to unify streaming
|
|
277
283
|
try:
|
|
278
|
-
|
|
279
|
-
#
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
"
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
284
|
+
while True:
|
|
285
|
+
# Fetch new events since last seen
|
|
286
|
+
new_events = project_db.get_session_events(session_id, after_id=last_db_event_id)
|
|
287
|
+
|
|
288
|
+
for db_event in new_events:
|
|
289
|
+
last_db_event_id = db_event.get("id", 0)
|
|
290
|
+
event_type = db_event.get("event_type", "unknown")
|
|
291
|
+
|
|
292
|
+
if event_type == "text":
|
|
293
|
+
yield await format_sse("text", {
|
|
294
|
+
"content": db_event.get("content", ""),
|
|
295
|
+
**event_meta,
|
|
296
|
+
})
|
|
297
|
+
elif event_type == "tool_call":
|
|
298
|
+
yield await format_sse("tool_call", {
|
|
299
|
+
"name": db_event.get("tool_name"),
|
|
300
|
+
"input": db_event.get("tool_input"),
|
|
301
|
+
**event_meta,
|
|
302
|
+
})
|
|
303
|
+
elif event_type == "tool_result":
|
|
304
|
+
yield await format_sse("tool_result", {
|
|
305
|
+
"name": db_event.get("tool_name"),
|
|
306
|
+
"result": db_event.get("tool_result"),
|
|
307
|
+
**event_meta,
|
|
308
|
+
})
|
|
309
|
+
elif event_type == "error":
|
|
310
|
+
yield await format_sse("error", {
|
|
311
|
+
"message": db_event.get("error_message"),
|
|
312
|
+
**event_meta,
|
|
313
|
+
})
|
|
314
|
+
elif event_type == "thinking":
|
|
315
|
+
yield await format_sse("thinking", {
|
|
316
|
+
"content": db_event.get("content", ""),
|
|
317
|
+
**event_meta,
|
|
318
|
+
})
|
|
319
|
+
elif event_type == "usage":
|
|
320
|
+
yield await format_sse("usage", {
|
|
321
|
+
"data": db_event.get("raw_data"),
|
|
322
|
+
**event_meta,
|
|
323
|
+
})
|
|
324
|
+
elif event_type == "complete":
|
|
325
|
+
yield await format_sse("complete", event_meta)
|
|
326
|
+
return # Session complete
|
|
327
|
+
elif event_type == "init":
|
|
328
|
+
yield await format_sse("init", {
|
|
329
|
+
"data": db_event.get("raw_data"),
|
|
330
|
+
**event_meta,
|
|
331
|
+
})
|
|
332
|
+
|
|
333
|
+
# Check if session is done (status updated by executor)
|
|
334
|
+
session_info = project_db.get_session(session_id)
|
|
335
|
+
if session_info and session_info.get("status") in ("completed", "error"):
|
|
336
|
+
# Drain any remaining events
|
|
337
|
+
final_events = project_db.get_session_events(session_id, after_id=last_db_event_id)
|
|
338
|
+
for db_event in final_events:
|
|
339
|
+
last_db_event_id = db_event.get("id", 0)
|
|
340
|
+
event_type = db_event.get("event_type", "unknown")
|
|
341
|
+
if event_type == "text":
|
|
342
|
+
yield await format_sse("text", {"content": db_event.get("content", ""), **event_meta})
|
|
343
|
+
elif event_type == "tool_call":
|
|
344
|
+
yield await format_sse("tool_call", {"name": db_event.get("tool_name"), "input": db_event.get("tool_input"), **event_meta})
|
|
345
|
+
elif event_type == "tool_result":
|
|
346
|
+
yield await format_sse("tool_result", {"name": db_event.get("tool_name"), "result": db_event.get("tool_result"), **event_meta})
|
|
347
|
+
elif event_type == "error":
|
|
348
|
+
yield await format_sse("error", {"message": db_event.get("error_message"), **event_meta})
|
|
349
|
+
elif event_type == "complete":
|
|
350
|
+
yield await format_sse("complete", event_meta)
|
|
351
|
+
elif event_type == "thinking":
|
|
352
|
+
yield await format_sse("thinking", {"content": db_event.get("content", ""), **event_meta})
|
|
353
|
+
elif event_type == "usage":
|
|
354
|
+
yield await format_sse("usage", {"data": db_event.get("raw_data"), **event_meta})
|
|
355
|
+
elif event_type == "init":
|
|
356
|
+
yield await format_sse("init", {"data": db_event.get("raw_data"), **event_meta})
|
|
312
357
|
break
|
|
313
358
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
**event_meta,
|
|
318
|
-
})
|
|
359
|
+
# Heartbeat + poll interval (same as planning.py)
|
|
360
|
+
yield await format_sse("heartbeat", {})
|
|
361
|
+
await asyncio.sleep(0.5)
|
|
319
362
|
|
|
320
363
|
except asyncio.CancelledError:
|
|
321
|
-
tailer.stop()
|
|
322
364
|
yield await format_sse("disconnected", {})
|
|
323
365
|
|
|
324
366
|
|
|
@@ -452,6 +494,7 @@ async def list_sessions(
|
|
|
452
494
|
"status": s.status,
|
|
453
495
|
"started_at": s.started_at.isoformat() if s.started_at else None,
|
|
454
496
|
"duration_seconds": s.duration_seconds,
|
|
497
|
+
"account_email": s.account_email,
|
|
455
498
|
}
|
|
456
499
|
for s in sessions
|
|
457
500
|
]
|
|
@@ -484,6 +527,7 @@ async def get_session(
|
|
|
484
527
|
"started_at": session.started_at.isoformat() if session.started_at else None,
|
|
485
528
|
"duration_seconds": session.duration_seconds,
|
|
486
529
|
"items_added": session.items_added,
|
|
530
|
+
"account_email": session.account_email,
|
|
487
531
|
}
|
|
488
532
|
|
|
489
533
|
|
|
@@ -527,7 +571,7 @@ async def get_session_events(
|
|
|
527
571
|
async def get_grouped_events(
|
|
528
572
|
slug: str,
|
|
529
573
|
loop_name: str,
|
|
530
|
-
limit_runs: int = Query(
|
|
574
|
+
limit_runs: int = Query(20, ge=1, le=50, description="Max runs to return"),
|
|
531
575
|
limit_sessions: int = Query(20, ge=1, le=100, description="Max sessions per run"),
|
|
532
576
|
limit_events: int = Query(200, ge=1, le=1000, description="Max events per session"),
|
|
533
577
|
):
|
|
@@ -579,6 +623,7 @@ async def get_grouped_events(
|
|
|
579
623
|
"mode": session.mode,
|
|
580
624
|
"status": session.status,
|
|
581
625
|
"is_live": is_live,
|
|
626
|
+
"account_email": session.account_email,
|
|
582
627
|
"events": events,
|
|
583
628
|
"events_truncated": len(events) >= limit_events,
|
|
584
629
|
}
|
ralphx/api/routes/templates.py
CHANGED
ralphx/api/routes/workflows.py
CHANGED
|
@@ -28,7 +28,7 @@ PROCESSING_TYPES = {
|
|
|
28
28
|
"config": {
|
|
29
29
|
"loopType": "generator",
|
|
30
30
|
"template": "extractgen_requirements",
|
|
31
|
-
"allowedTools": ["
|
|
31
|
+
"allowedTools": ["Read", "Glob", "Grep"],
|
|
32
32
|
"model": "opus",
|
|
33
33
|
"timeout": 600,
|
|
34
34
|
"max_iterations": 100,
|
|
@@ -41,7 +41,7 @@ PROCESSING_TYPES = {
|
|
|
41
41
|
"config": {
|
|
42
42
|
"loopType": "generator",
|
|
43
43
|
"template": "webgen_requirements",
|
|
44
|
-
"allowedTools": ["WebSearch", "WebFetch"],
|
|
44
|
+
"allowedTools": ["Read", "Glob", "Grep", "WebSearch", "WebFetch"],
|
|
45
45
|
"model": "opus",
|
|
46
46
|
"timeout": 900,
|
|
47
47
|
"max_iterations": 15,
|
ralphx/core/checkpoint.py
CHANGED
|
@@ -7,9 +7,12 @@ Implements:
|
|
|
7
7
|
- Recovery flow for resuming interrupted runs
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
import asyncio
|
|
10
11
|
import fcntl
|
|
11
12
|
import json
|
|
12
13
|
import os
|
|
14
|
+
import signal
|
|
15
|
+
import subprocess
|
|
13
16
|
import sys
|
|
14
17
|
from dataclasses import dataclass, field
|
|
15
18
|
from datetime import datetime
|
|
@@ -197,6 +200,121 @@ def is_pid_running(pid: int) -> bool:
|
|
|
197
200
|
return False
|
|
198
201
|
|
|
199
202
|
|
|
203
|
+
def is_our_claude_process(pid: int) -> bool:
|
|
204
|
+
"""Verify PID is actually our Claude process, not a reused PID.
|
|
205
|
+
|
|
206
|
+
This prevents PID reuse attacks where we might accidentally kill
|
|
207
|
+
an unrelated process that was assigned the same PID after our
|
|
208
|
+
Claude process terminated.
|
|
209
|
+
|
|
210
|
+
Returns False if:
|
|
211
|
+
- Process doesn't exist
|
|
212
|
+
- Can't read cmdline (permissions, etc.)
|
|
213
|
+
- Process is not a Claude CLI or Python/RalphX process
|
|
214
|
+
|
|
215
|
+
Note: There is still a small TOCTOU (time-of-check-to-time-of-use) race
|
|
216
|
+
between this check and the actual kill. This is an accepted risk that
|
|
217
|
+
is mitigated by:
|
|
218
|
+
1. The check significantly reduces the window vs. no check at all
|
|
219
|
+
2. We only use PIDs from our own database, not user input
|
|
220
|
+
3. The target must match expected process names
|
|
221
|
+
"""
|
|
222
|
+
if pid <= 0:
|
|
223
|
+
return False
|
|
224
|
+
|
|
225
|
+
if sys.platform == "win32":
|
|
226
|
+
# Windows: Use tasklist to verify process name
|
|
227
|
+
try:
|
|
228
|
+
result = subprocess.run(
|
|
229
|
+
["tasklist", "/FI", f"PID eq {pid}", "/FO", "CSV", "/NH"],
|
|
230
|
+
capture_output=True, text=True, timeout=5
|
|
231
|
+
)
|
|
232
|
+
output = result.stdout.lower()
|
|
233
|
+
# Check for claude or python (for multiprocessing spawn)
|
|
234
|
+
return "claude" in output or "python" in output
|
|
235
|
+
except Exception:
|
|
236
|
+
return False
|
|
237
|
+
|
|
238
|
+
elif sys.platform == "darwin":
|
|
239
|
+
# macOS: Use ps command (no /proc filesystem)
|
|
240
|
+
try:
|
|
241
|
+
result = subprocess.run(
|
|
242
|
+
["ps", "-p", str(pid), "-o", "command="],
|
|
243
|
+
capture_output=True, text=True, timeout=5
|
|
244
|
+
)
|
|
245
|
+
output = result.stdout.lower()
|
|
246
|
+
return "claude" in output or "python" in output or "ralphx" in output
|
|
247
|
+
except Exception:
|
|
248
|
+
return False
|
|
249
|
+
|
|
250
|
+
else:
|
|
251
|
+
# Linux: Check /proc/{pid}/cmdline (most reliable)
|
|
252
|
+
try:
|
|
253
|
+
with open(f"/proc/{pid}/cmdline", "rb") as f:
|
|
254
|
+
cmdline = f.read().decode("utf-8", errors="replace").lower()
|
|
255
|
+
# cmdline uses null bytes as separators
|
|
256
|
+
return "claude" in cmdline or "python" in cmdline or "ralphx" in cmdline
|
|
257
|
+
except (OSError, IOError):
|
|
258
|
+
return False
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
async def kill_orphan_process(pid: int, timeout: float = 5.0) -> tuple[bool, str]:
|
|
262
|
+
"""Kill an orphan Claude/RalphX process by PID.
|
|
263
|
+
|
|
264
|
+
Returns tuple of (success, reason):
|
|
265
|
+
- (True, "killed") - Process was terminated by us
|
|
266
|
+
- (True, "already_dead") - Process was already dead
|
|
267
|
+
- (False, "not_our_process") - PID exists but isn't our process
|
|
268
|
+
- (False, "permission_denied") - Can't kill (permissions)
|
|
269
|
+
- (False, "unknown_error") - Other failure
|
|
270
|
+
|
|
271
|
+
Cross-platform notes:
|
|
272
|
+
- Linux/macOS: SIGTERM for graceful, SIGKILL for force
|
|
273
|
+
- Windows: os.kill() with any signal calls TerminateProcess (immediate)
|
|
274
|
+
"""
|
|
275
|
+
# Check if process is already dead
|
|
276
|
+
if not is_pid_running(pid):
|
|
277
|
+
return (True, "already_dead")
|
|
278
|
+
|
|
279
|
+
# Validate this is our process
|
|
280
|
+
if not is_our_claude_process(pid):
|
|
281
|
+
return (False, "not_our_process")
|
|
282
|
+
|
|
283
|
+
try:
|
|
284
|
+
if sys.platform == "win32":
|
|
285
|
+
# Windows: TerminateProcess is immediate, no graceful option
|
|
286
|
+
os.kill(pid, signal.SIGTERM) # Actually calls TerminateProcess
|
|
287
|
+
await asyncio.sleep(0.1)
|
|
288
|
+
if not is_pid_running(pid):
|
|
289
|
+
return (True, "killed")
|
|
290
|
+
return (False, "unknown_error")
|
|
291
|
+
else:
|
|
292
|
+
# Unix: Send SIGTERM for graceful shutdown
|
|
293
|
+
os.kill(pid, signal.SIGTERM)
|
|
294
|
+
|
|
295
|
+
# Wait for process to die
|
|
296
|
+
for _ in range(int(timeout * 10)):
|
|
297
|
+
await asyncio.sleep(0.1)
|
|
298
|
+
if not is_pid_running(pid):
|
|
299
|
+
return (True, "killed")
|
|
300
|
+
|
|
301
|
+
# Process didn't die, force kill
|
|
302
|
+
os.kill(pid, signal.SIGKILL)
|
|
303
|
+
await asyncio.sleep(0.1)
|
|
304
|
+
if not is_pid_running(pid):
|
|
305
|
+
return (True, "killed")
|
|
306
|
+
return (False, "unknown_error")
|
|
307
|
+
|
|
308
|
+
except ProcessLookupError:
|
|
309
|
+
# Process already dead - success
|
|
310
|
+
return (True, "already_dead")
|
|
311
|
+
except PermissionError:
|
|
312
|
+
# Can't kill - likely not our process
|
|
313
|
+
return (False, "permission_denied")
|
|
314
|
+
except OSError:
|
|
315
|
+
return (False, "unknown_error")
|
|
316
|
+
|
|
317
|
+
|
|
200
318
|
class ProjectLock:
|
|
201
319
|
"""Atomic file lock for a project.
|
|
202
320
|
|