@pushpalsdev/cli 1.1.35 → 1.1.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -116,6 +116,7 @@ _WEB_REVIEW_NO_EDIT_WATCHDOG_S = 240
|
|
|
116
116
|
_BACKGROUND_NO_EDIT_WATCHDOG_S = 120
|
|
117
117
|
_NO_EDIT_RECOVERY_WATCHDOG_S = 90
|
|
118
118
|
_DEFAULT_NO_EDIT_RECHECK_S = 120
|
|
119
|
+
_DEFAULT_NO_EDIT_COMMAND_GRACE_S = 240
|
|
119
120
|
_DEFAULT_STARTUP_STALL_WATCHDOG_S = 210
|
|
120
121
|
_RECOVERY_STARTUP_STALL_WATCHDOG_S = 150
|
|
121
122
|
_DEFAULT_ROLLOUT_WATCHDOG_S = 300
|
|
@@ -757,6 +758,27 @@ def _resolve_no_edit_recheck_seconds(communicate_timeout_s: Optional[int]) -> in
|
|
|
757
758
|
return max(1, min(_DEFAULT_NO_EDIT_RECHECK_S, upper))
|
|
758
759
|
|
|
759
760
|
|
|
761
|
+
def _resolve_no_edit_command_grace_seconds(communicate_timeout_s: Optional[int]) -> Optional[int]:
|
|
762
|
+
if not communicate_timeout_s:
|
|
763
|
+
return None
|
|
764
|
+
|
|
765
|
+
raw = os.environ.get("WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S", "").strip()
|
|
766
|
+
if raw:
|
|
767
|
+
if raw == "0":
|
|
768
|
+
return None
|
|
769
|
+
parsed = _to_positive_int(raw)
|
|
770
|
+
if parsed is None:
|
|
771
|
+
log.info(
|
|
772
|
+
"Invalid WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S="
|
|
773
|
+
f"{raw!r}; using default command-progress grace."
|
|
774
|
+
)
|
|
775
|
+
else:
|
|
776
|
+
return max(1, min(parsed, max(1, communicate_timeout_s - 1)))
|
|
777
|
+
|
|
778
|
+
upper = max(1, communicate_timeout_s - 1)
|
|
779
|
+
return max(1, min(_DEFAULT_NO_EDIT_COMMAND_GRACE_S, upper))
|
|
780
|
+
|
|
781
|
+
|
|
760
782
|
def _resolve_startup_stall_watchdog_seconds(
|
|
761
783
|
communicate_timeout_s: Optional[int],
|
|
762
784
|
recovery_attempt: int = 0,
|
|
@@ -1339,10 +1361,94 @@ def _empty_codex_trace() -> Dict[str, Any]:
|
|
|
1339
1361
|
"prompt_tokens": 0,
|
|
1340
1362
|
"completion_tokens": 0,
|
|
1341
1363
|
"total_tokens": 0,
|
|
1364
|
+
"active_command_ids": [],
|
|
1365
|
+
"command_event_count": 0,
|
|
1366
|
+
"last_command_activity_at": None,
|
|
1367
|
+
"last_command_summary": "",
|
|
1342
1368
|
}
|
|
1343
1369
|
|
|
1344
1370
|
|
|
1345
|
-
def
|
|
1371
|
+
def _looks_like_codex_command_item(value: Any) -> bool:
|
|
1372
|
+
if not isinstance(value, dict):
|
|
1373
|
+
return False
|
|
1374
|
+
type_text = " ".join(
|
|
1375
|
+
str(value.get(key) or "")
|
|
1376
|
+
for key in ("type", "item_type", "kind", "name", "tool_name")
|
|
1377
|
+
).lower()
|
|
1378
|
+
if any(marker in type_text for marker in ("command_execution", "exec_command", "shell_command")):
|
|
1379
|
+
return True
|
|
1380
|
+
return any(key in value for key in ("command", "cmd", "exit_code", "aggregated_output"))
|
|
1381
|
+
|
|
1382
|
+
|
|
1383
|
+
def _record_codex_command_activity(
|
|
1384
|
+
parsed: Dict[str, Any],
|
|
1385
|
+
event_type: str,
|
|
1386
|
+
trace: Dict[str, Any],
|
|
1387
|
+
now: float,
|
|
1388
|
+
) -> None:
|
|
1389
|
+
item = parsed.get("item")
|
|
1390
|
+
command_source: Any = item if _looks_like_codex_command_item(item) else parsed
|
|
1391
|
+
if not _looks_like_codex_command_item(command_source):
|
|
1392
|
+
return
|
|
1393
|
+
|
|
1394
|
+
command_text = ""
|
|
1395
|
+
if isinstance(command_source, dict):
|
|
1396
|
+
for key in ("command", "cmd", "name"):
|
|
1397
|
+
raw = command_source.get(key)
|
|
1398
|
+
if isinstance(raw, str) and raw.strip():
|
|
1399
|
+
command_text = _truncate_inline(raw.strip(), 160)
|
|
1400
|
+
break
|
|
1401
|
+
command_id = ""
|
|
1402
|
+
if isinstance(command_source, dict):
|
|
1403
|
+
command_id = str(
|
|
1404
|
+
command_source.get("id")
|
|
1405
|
+
or command_source.get("call_id")
|
|
1406
|
+
or command_source.get("item_id")
|
|
1407
|
+
or command_text
|
|
1408
|
+
or "command"
|
|
1409
|
+
).strip()
|
|
1410
|
+
command_id = command_id or "command"
|
|
1411
|
+
|
|
1412
|
+
active = trace.setdefault("active_command_ids", [])
|
|
1413
|
+
if not isinstance(active, list):
|
|
1414
|
+
active = []
|
|
1415
|
+
trace["active_command_ids"] = active
|
|
1416
|
+
|
|
1417
|
+
status_text = ""
|
|
1418
|
+
if isinstance(command_source, dict):
|
|
1419
|
+
status_text = " ".join(
|
|
1420
|
+
str(command_source.get(key) or "")
|
|
1421
|
+
for key in ("status", "state", "outcome")
|
|
1422
|
+
).lower()
|
|
1423
|
+
event_lower = event_type.lower()
|
|
1424
|
+
completed = (
|
|
1425
|
+
"completed" in event_lower
|
|
1426
|
+
or "failed" in event_lower
|
|
1427
|
+
or "error" in event_lower
|
|
1428
|
+
or any(marker in status_text for marker in ("completed", "failed", "cancelled", "canceled", "exited"))
|
|
1429
|
+
)
|
|
1430
|
+
started = (
|
|
1431
|
+
"started" in event_lower
|
|
1432
|
+
or "updated" in event_lower
|
|
1433
|
+
or any(marker in status_text for marker in ("running", "in_progress", "started"))
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
if completed:
|
|
1437
|
+
trace["active_command_ids"] = [item for item in active if str(item) != command_id]
|
|
1438
|
+
elif started and command_id not in active:
|
|
1439
|
+
active.append(command_id)
|
|
1440
|
+
|
|
1441
|
+
trace["command_event_count"] = to_int(trace.get("command_event_count"), 0) + 1
|
|
1442
|
+
trace["last_command_activity_at"] = float(now)
|
|
1443
|
+
trace["last_command_summary"] = command_text or event_type
|
|
1444
|
+
|
|
1445
|
+
|
|
1446
|
+
def _record_live_codex_stdout_line(
|
|
1447
|
+
line: str,
|
|
1448
|
+
use_json: bool,
|
|
1449
|
+
trace: Dict[str, Any],
|
|
1450
|
+
now: Optional[float] = None,
|
|
1451
|
+
) -> None:
|
|
1346
1452
|
stripped = line.strip()
|
|
1347
1453
|
if not stripped:
|
|
1348
1454
|
return
|
|
@@ -1369,6 +1475,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
|
|
|
1369
1475
|
return
|
|
1370
1476
|
|
|
1371
1477
|
if isinstance(parsed, dict):
|
|
1478
|
+
observed_at = float(now if now is not None else time.monotonic())
|
|
1372
1479
|
usage = _extract_usage_counts(parsed)
|
|
1373
1480
|
if usage is not None:
|
|
1374
1481
|
trace["prompt_tokens"] = max(
|
|
@@ -1385,6 +1492,7 @@ def _record_live_codex_stdout_line(line: str, use_json: bool, trace: Dict[str, A
|
|
|
1385
1492
|
.strip()
|
|
1386
1493
|
or "event"
|
|
1387
1494
|
)
|
|
1495
|
+
_record_codex_command_activity(parsed, event_type, trace, observed_at)
|
|
1388
1496
|
event_type_counts[event_type] = to_int(event_type_counts.get(event_type), 0) + 1
|
|
1389
1497
|
summary = _summarize_json_event(parsed)
|
|
1390
1498
|
# Reasoning can arrive under generic event types (for example item.updated).
|
|
@@ -1449,10 +1557,13 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
|
|
|
1449
1557
|
prompt_tokens = to_int(trace.get("prompt_tokens"), 0)
|
|
1450
1558
|
completion_tokens = to_int(trace.get("completion_tokens"), 0)
|
|
1451
1559
|
total_tokens = to_int(trace.get("total_tokens"), 0)
|
|
1560
|
+
command_event_count = to_int(trace.get("command_event_count"), 0)
|
|
1452
1561
|
if reasoning_events > 0:
|
|
1453
1562
|
log.info(f"[codex] Reasoning-like event(s): {reasoning_events}")
|
|
1454
1563
|
elif use_json and valid_json > 0:
|
|
1455
1564
|
log.info("[codex] No reasoning-like events observed in this run.")
|
|
1565
|
+
if command_event_count > 0:
|
|
1566
|
+
log.info(f"[codex] Command execution event(s): {command_event_count}")
|
|
1456
1567
|
if total_tokens > 0:
|
|
1457
1568
|
log.info(
|
|
1458
1569
|
f"[codex] Usage observed: prompt={prompt_tokens} completion={completion_tokens} total={total_tokens}"
|
|
@@ -1473,6 +1584,7 @@ def _finalize_codex_stdout_trace(trace: Dict[str, Any], use_json: bool) -> Dict[
|
|
|
1473
1584
|
"prompt_tokens": prompt_tokens,
|
|
1474
1585
|
"completion_tokens": completion_tokens,
|
|
1475
1586
|
"total_tokens": total_tokens,
|
|
1587
|
+
"command_event_count": command_event_count,
|
|
1476
1588
|
}
|
|
1477
1589
|
|
|
1478
1590
|
|
|
@@ -2299,8 +2411,14 @@ def _run_codex_task(
|
|
|
2299
2411
|
if not line:
|
|
2300
2412
|
continue
|
|
2301
2413
|
with trace_lock:
|
|
2302
|
-
|
|
2303
|
-
|
|
2414
|
+
observed_at = time.monotonic()
|
|
2415
|
+
last_activity_at["ts"] = observed_at
|
|
2416
|
+
_record_live_codex_stdout_line(
|
|
2417
|
+
line,
|
|
2418
|
+
use_json,
|
|
2419
|
+
stdout_trace_state,
|
|
2420
|
+
observed_at,
|
|
2421
|
+
)
|
|
2304
2422
|
except Exception:
|
|
2305
2423
|
pass
|
|
2306
2424
|
finally:
|
|
@@ -2377,6 +2495,7 @@ def _run_codex_task(
|
|
|
2377
2495
|
else None
|
|
2378
2496
|
)
|
|
2379
2497
|
no_edit_recheck_s = _resolve_no_edit_recheck_seconds(communicate_timeout_s)
|
|
2498
|
+
no_edit_command_grace_s = _resolve_no_edit_command_grace_seconds(communicate_timeout_s)
|
|
2380
2499
|
startup_stall_watchdog_s = _resolve_startup_stall_watchdog_seconds(
|
|
2381
2500
|
communicate_timeout_s,
|
|
2382
2501
|
recovery_attempt=startup_stall_recovery_attempt,
|
|
@@ -2400,6 +2519,11 @@ def _run_codex_task(
|
|
|
2400
2519
|
if no_edit_watchdog_s is not None
|
|
2401
2520
|
else None
|
|
2402
2521
|
)
|
|
2522
|
+
no_edit_command_grace_cap_deadline = (
|
|
2523
|
+
started_at + float(no_edit_watchdog_s + no_edit_command_grace_s)
|
|
2524
|
+
if no_edit_watchdog_s is not None and no_edit_command_grace_s is not None
|
|
2525
|
+
else None
|
|
2526
|
+
)
|
|
2403
2527
|
rollout_deadline = (
|
|
2404
2528
|
started_at + float(rollout_watchdog_s)
|
|
2405
2529
|
if rollout_watchdog_s is not None
|
|
@@ -2457,6 +2581,49 @@ def _run_codex_task(
|
|
|
2457
2581
|
"before startup-stall recovery."
|
|
2458
2582
|
)
|
|
2459
2583
|
continue
|
|
2584
|
+
command_event_count = to_int(live_trace.get("command_event_count"), 0)
|
|
2585
|
+
active_commands_raw = live_trace.get("active_command_ids")
|
|
2586
|
+
active_command_count = (
|
|
2587
|
+
len(active_commands_raw)
|
|
2588
|
+
if isinstance(active_commands_raw, list)
|
|
2589
|
+
else 0
|
|
2590
|
+
)
|
|
2591
|
+
last_command_activity_at = 0.0
|
|
2592
|
+
try:
|
|
2593
|
+
last_command_activity_at = float(
|
|
2594
|
+
live_trace.get("last_command_activity_at") or 0.0
|
|
2595
|
+
)
|
|
2596
|
+
except Exception:
|
|
2597
|
+
last_command_activity_at = 0.0
|
|
2598
|
+
if command_event_count > 0 and no_edit_command_grace_s is not None:
|
|
2599
|
+
command_grace_deadline = 0.0
|
|
2600
|
+
if active_command_count > 0:
|
|
2601
|
+
# Do not kill while Codex is actively running a tool command; poll
|
|
2602
|
+
# again soon, but keep the total grace bounded by the hard cap below.
|
|
2603
|
+
command_grace_deadline = now + min(60.0, float(no_edit_command_grace_s))
|
|
2604
|
+
elif last_command_activity_at > 0:
|
|
2605
|
+
command_grace_deadline = last_command_activity_at + float(
|
|
2606
|
+
no_edit_command_grace_s
|
|
2607
|
+
)
|
|
2608
|
+
if no_edit_command_grace_cap_deadline is not None:
|
|
2609
|
+
command_grace_deadline = min(
|
|
2610
|
+
command_grace_deadline,
|
|
2611
|
+
no_edit_command_grace_cap_deadline,
|
|
2612
|
+
)
|
|
2613
|
+
if command_grace_deadline > now:
|
|
2614
|
+
no_edit_deadline = command_grace_deadline
|
|
2615
|
+
remaining_s = int(max(1.0, command_grace_deadline - now))
|
|
2616
|
+
command_detail = (
|
|
2617
|
+
f"{active_command_count} active command(s)"
|
|
2618
|
+
if active_command_count > 0
|
|
2619
|
+
else "recent command completion"
|
|
2620
|
+
)
|
|
2621
|
+
log.info(
|
|
2622
|
+
"No-edit watchdog observed Codex tool progress "
|
|
2623
|
+
f"({command_detail}); allowing {remaining_s}s for a "
|
|
2624
|
+
"publishable patch before recovery."
|
|
2625
|
+
)
|
|
2626
|
+
continue
|
|
2460
2627
|
no_edit_artifact_only_paths = _describe_non_publishable_paths(
|
|
2461
2628
|
changed_paths,
|
|
2462
2629
|
baseline_snapshot,
|
|
@@ -1347,6 +1347,90 @@ class OpenAICodexRuntimeConfigTests(unittest.TestCase):
|
|
|
1347
1347
|
self.assertIn("Patched immediately after no-edit recovery", str(result.get("stdout") or ""))
|
|
1348
1348
|
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
1349
1349
|
|
|
1350
|
+
def test_run_codex_task_no_edit_watchdog_allows_command_backed_discovery(self) -> None:
|
|
1351
|
+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-command-grace-") as temp_dir:
|
|
1352
|
+
repo = Path(temp_dir) / "repo"
|
|
1353
|
+
repo.mkdir(parents=True, exist_ok=True)
|
|
1354
|
+
(repo / "README.md").write_text("# command grace repo\n", encoding="utf-8")
|
|
1355
|
+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1356
|
+
subprocess.run(
|
|
1357
|
+
["git", "config", "user.name", "PushPals Test"],
|
|
1358
|
+
cwd=repo,
|
|
1359
|
+
check=True,
|
|
1360
|
+
capture_output=True,
|
|
1361
|
+
text=True,
|
|
1362
|
+
)
|
|
1363
|
+
subprocess.run(
|
|
1364
|
+
["git", "config", "user.email", "pushpals-tests@example.com"],
|
|
1365
|
+
cwd=repo,
|
|
1366
|
+
check=True,
|
|
1367
|
+
capture_output=True,
|
|
1368
|
+
text=True,
|
|
1369
|
+
)
|
|
1370
|
+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
|
|
1371
|
+
subprocess.run(
|
|
1372
|
+
["git", "commit", "-m", "chore: seed command grace repo"],
|
|
1373
|
+
cwd=repo,
|
|
1374
|
+
check=True,
|
|
1375
|
+
capture_output=True,
|
|
1376
|
+
text=True,
|
|
1377
|
+
)
|
|
1378
|
+
|
|
1379
|
+
stub_path = Path(temp_dir) / "fake_codex_no_edit_command_grace.py"
|
|
1380
|
+
stub_path.write_text(
|
|
1381
|
+
"\n".join(
|
|
1382
|
+
[
|
|
1383
|
+
"from pathlib import Path",
|
|
1384
|
+
"import json",
|
|
1385
|
+
"import sys",
|
|
1386
|
+
"import time",
|
|
1387
|
+
"",
|
|
1388
|
+
"argv = sys.argv[1:]",
|
|
1389
|
+
"last_message_path = None",
|
|
1390
|
+
"for index, arg in enumerate(argv):",
|
|
1391
|
+
" if arg == '--output-last-message' and index + 1 < len(argv):",
|
|
1392
|
+
" last_message_path = argv[index + 1]",
|
|
1393
|
+
" break",
|
|
1394
|
+
"",
|
|
1395
|
+
"sys.stdin.read()",
|
|
1396
|
+
"print(json.dumps({'type': 'thread.started'}), flush=True)",
|
|
1397
|
+
"print(json.dumps({'type': 'turn.started'}), flush=True)",
|
|
1398
|
+
"print(json.dumps({'type': 'item.started', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'in_progress'}}), flush=True)",
|
|
1399
|
+
"time.sleep(1.4)",
|
|
1400
|
+
"print(json.dumps({'type': 'item.completed', 'item': {'id': 'cmd-read-target', 'type': 'command_execution', 'command': 'sed -n 1,120p README.md', 'status': 'completed', 'exit_code': 0, 'aggregated_output': '# command grace repo'}}), flush=True)",
|
|
1401
|
+
"time.sleep(1.6)",
|
|
1402
|
+
"Path('src').mkdir(exist_ok=True)",
|
|
1403
|
+
"Path('src/command-grace.txt').write_text('patched after command-backed discovery\\n', encoding='utf-8')",
|
|
1404
|
+
"if last_message_path:",
|
|
1405
|
+
" Path(last_message_path).write_text('Patched after command-backed discovery.', encoding='utf-8')",
|
|
1406
|
+
"print(json.dumps({'type': 'item.completed', 'item': {'type': 'message', 'text': 'Patched after command-backed discovery.'}}), flush=True)",
|
|
1407
|
+
]
|
|
1408
|
+
),
|
|
1409
|
+
encoding="utf-8",
|
|
1410
|
+
)
|
|
1411
|
+
|
|
1412
|
+
env_overrides = {
|
|
1413
|
+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
|
|
1414
|
+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
|
|
1415
|
+
"OPENAI_API_KEY": "pushpals-no-edit-command-grace-test-key",
|
|
1416
|
+
"WORKERPALS_OPENAI_CODEX_JSON": "true",
|
|
1417
|
+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "20",
|
|
1418
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_WATCHDOG_S": "1",
|
|
1419
|
+
"WORKERPALS_OPENAI_CODEX_NO_EDIT_COMMAND_GRACE_S": "5",
|
|
1420
|
+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
|
|
1421
|
+
}
|
|
1422
|
+
with mock.patch.dict(os.environ, env_overrides, clear=False):
|
|
1423
|
+
result = _run_codex_task(
|
|
1424
|
+
str(repo),
|
|
1425
|
+
"Add one focused contract assertion after inspecting the hinted test.",
|
|
1426
|
+
[],
|
|
1427
|
+
)
|
|
1428
|
+
|
|
1429
|
+
self.assertTrue(result.get("ok"), result)
|
|
1430
|
+
self.assertEqual(result.get("exitCode"), 0)
|
|
1431
|
+
self.assertIn("Patched after command-backed discovery", str(result.get("stdout") or ""))
|
|
1432
|
+
self.assertIn("src/", str(result.get("stdout") or ""))
|
|
1433
|
+
|
|
1350
1434
|
def test_run_codex_task_recovery_attempt_is_still_guarded_by_no_edit_watchdog(self) -> None:
|
|
1351
1435
|
with tempfile.TemporaryDirectory(prefix="pushpals-codex-no-edit-watchdog-fail-") as temp_dir:
|
|
1352
1436
|
repo = Path(temp_dir) / "repo"
|