jleechanorg-pr-automation 0.1.1__py3-none-any.whl → 0.2.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. jleechanorg_pr_automation/STORAGE_STATE_TESTING_PROTOCOL.md +326 -0
  2. jleechanorg_pr_automation/__init__.py +64 -9
  3. jleechanorg_pr_automation/automation_safety_manager.py +306 -95
  4. jleechanorg_pr_automation/automation_safety_wrapper.py +13 -19
  5. jleechanorg_pr_automation/automation_utils.py +87 -65
  6. jleechanorg_pr_automation/check_codex_comment.py +7 -1
  7. jleechanorg_pr_automation/codex_branch_updater.py +21 -9
  8. jleechanorg_pr_automation/codex_config.py +70 -3
  9. jleechanorg_pr_automation/jleechanorg_pr_monitor.py +1954 -234
  10. jleechanorg_pr_automation/logging_utils.py +86 -0
  11. jleechanorg_pr_automation/openai_automation/__init__.py +3 -0
  12. jleechanorg_pr_automation/openai_automation/codex_github_mentions.py +1111 -0
  13. jleechanorg_pr_automation/openai_automation/debug_page_content.py +88 -0
  14. jleechanorg_pr_automation/openai_automation/oracle_cli.py +364 -0
  15. jleechanorg_pr_automation/openai_automation/test_auth_restoration.py +244 -0
  16. jleechanorg_pr_automation/openai_automation/test_codex_comprehensive.py +355 -0
  17. jleechanorg_pr_automation/openai_automation/test_codex_integration.py +254 -0
  18. jleechanorg_pr_automation/orchestrated_pr_runner.py +516 -0
  19. jleechanorg_pr_automation/tests/__init__.py +0 -0
  20. jleechanorg_pr_automation/tests/test_actionable_counting_matrix.py +84 -86
  21. jleechanorg_pr_automation/tests/test_attempt_limit_logic.py +124 -0
  22. jleechanorg_pr_automation/tests/test_automation_marker_functions.py +175 -0
  23. jleechanorg_pr_automation/tests/test_automation_over_running_reproduction.py +9 -11
  24. jleechanorg_pr_automation/tests/test_automation_safety_limits.py +91 -79
  25. jleechanorg_pr_automation/tests/test_automation_safety_manager_comprehensive.py +53 -53
  26. jleechanorg_pr_automation/tests/test_codex_actor_matching.py +1 -1
  27. jleechanorg_pr_automation/tests/test_fixpr_prompt.py +54 -0
  28. jleechanorg_pr_automation/tests/test_fixpr_return_value.py +140 -0
  29. jleechanorg_pr_automation/tests/test_graphql_error_handling.py +26 -26
  30. jleechanorg_pr_automation/tests/test_model_parameter.py +317 -0
  31. jleechanorg_pr_automation/tests/test_orchestrated_pr_runner.py +697 -0
  32. jleechanorg_pr_automation/tests/test_packaging_integration.py +127 -0
  33. jleechanorg_pr_automation/tests/test_pr_filtering_matrix.py +246 -193
  34. jleechanorg_pr_automation/tests/test_pr_monitor_eligibility.py +354 -0
  35. jleechanorg_pr_automation/tests/test_pr_targeting.py +102 -7
  36. jleechanorg_pr_automation/tests/test_version_consistency.py +51 -0
  37. jleechanorg_pr_automation/tests/test_workflow_specific_limits.py +202 -0
  38. jleechanorg_pr_automation/tests/test_workspace_dispatch_missing_dir.py +119 -0
  39. jleechanorg_pr_automation/utils.py +81 -56
  40. jleechanorg_pr_automation-0.2.45.dist-info/METADATA +864 -0
  41. jleechanorg_pr_automation-0.2.45.dist-info/RECORD +45 -0
  42. jleechanorg_pr_automation-0.1.1.dist-info/METADATA +0 -222
  43. jleechanorg_pr_automation-0.1.1.dist-info/RECORD +0 -23
  44. {jleechanorg_pr_automation-0.1.1.dist-info → jleechanorg_pr_automation-0.2.45.dist-info}/WHEEL +0 -0
  45. {jleechanorg_pr_automation-0.1.1.dist-info → jleechanorg_pr_automation-0.2.45.dist-info}/entry_points.txt +0 -0
  46. {jleechanorg_pr_automation-0.1.1.dist-info → jleechanorg_pr_automation-0.2.45.dist-info}/top_level.txt +0 -0
@@ -7,45 +7,156 @@ posting configurable automation comments with safety limits integration.
7
7
  """
8
8
 
9
9
  import argparse
10
- import os
11
- import sys
12
10
  import json
13
- import subprocess
14
- import logging
11
+ import os
15
12
  import re
13
+ import shlex
14
+ import shutil
15
+ import subprocess
16
+ import sys
17
+ import time
16
18
  import traceback
19
+ import urllib.request
17
20
  from collections import Counter
18
- from pathlib import Path
19
21
  from datetime import datetime, timedelta
20
- from typing import List, Dict, Optional, Tuple
22
+ from pathlib import Path
23
+ from typing import Dict, List, Optional, Tuple
24
+
25
+ ROOT_DIR = Path(__file__).resolve().parents[2]
26
+ if str(ROOT_DIR) not in sys.path:
27
+ sys.path.insert(0, str(ROOT_DIR))
28
+
29
+ from orchestration.task_dispatcher import CLI_PROFILES, TaskDispatcher
30
+
21
31
  from .automation_safety_manager import AutomationSafetyManager
22
- from .utils import setup_logging, json_manager
23
32
  from .automation_utils import AutomationUtils
24
-
25
33
  from .codex_config import (
26
34
  CODEX_COMMIT_MARKER_PREFIX as SHARED_MARKER_PREFIX,
35
+ )
36
+ from .codex_config import (
27
37
  CODEX_COMMIT_MARKER_SUFFIX as SHARED_MARKER_SUFFIX,
38
+ )
39
+ from .codex_config import (
40
+ FIX_COMMENT_MARKER_PREFIX as SHARED_FIX_COMMENT_PREFIX,
41
+ )
42
+ from .codex_config import (
43
+ FIX_COMMENT_MARKER_SUFFIX as SHARED_FIX_COMMENT_SUFFIX,
44
+ )
45
+ from .codex_config import (
46
+ FIX_COMMENT_RUN_MARKER_PREFIX as SHARED_FIX_COMMENT_RUN_PREFIX,
47
+ )
48
+ from .codex_config import (
49
+ FIX_COMMENT_RUN_MARKER_SUFFIX as SHARED_FIX_COMMENT_RUN_SUFFIX,
50
+ )
51
+ from .codex_config import (
52
+ FIXPR_MARKER_PREFIX as SHARED_FIXPR_PREFIX,
53
+ )
54
+ from .codex_config import (
55
+ FIXPR_MARKER_SUFFIX as SHARED_FIXPR_SUFFIX,
56
+ )
57
+ from .codex_config import (
58
+ build_automation_marker,
28
59
  build_comment_intro,
29
60
  )
61
+ from .orchestrated_pr_runner import (
62
+ chdir,
63
+ dispatch_agent_for_pr,
64
+ dispatch_agent_for_pr_with_task,
65
+ ensure_base_clone,
66
+ has_failing_checks,
67
+ run_fixpr_batch,
68
+ )
69
+ from .utils import json_manager, setup_logging
70
+
71
+
72
+ def _parse_fixpr_agent_chain(value: str) -> str:
73
+ """Parse comma-separated CLI chain for --fixpr-agent (e.g., 'gemini,codex')."""
74
+ if not isinstance(value, str) or not value.strip():
75
+ raise argparse.ArgumentTypeError("--fixpr-agent must be a non-empty string")
76
+
77
+ parts = [part.strip().lower() for part in value.split(",")]
78
+ chain = [part for part in parts if part]
79
+ if not chain:
80
+ raise argparse.ArgumentTypeError("--fixpr-agent chain is empty")
81
+
82
+ invalid = [cli for cli in chain if cli not in CLI_PROFILES]
83
+ if invalid:
84
+ raise argparse.ArgumentTypeError(
85
+ f"Invalid --fixpr-agent CLI(s): {invalid}. Must be subset of {list(CLI_PROFILES.keys())}"
86
+ )
87
+
88
+ # De-duplicate while preserving order
89
+ seen = set()
90
+ ordered = []
91
+ for cli in chain:
92
+ if cli not in seen:
93
+ ordered.append(cli)
94
+ seen.add(cli)
95
+ return ",".join(ordered)
96
+
97
+
98
+ def _positive_int_arg(value: str) -> int:
99
+ try:
100
+ parsed = int(value)
101
+ except (TypeError, ValueError) as exc:
102
+ raise argparse.ArgumentTypeError(f"Expected integer, got {value!r}") from exc
103
+ if parsed <= 0:
104
+ raise argparse.ArgumentTypeError(f"Value must be >= 1, got {parsed}")
105
+ return parsed
106
+
107
+
108
+ def _normalize_model(model: Optional[str]) -> Optional[str]:
109
+ """Return a sanitized model value compatible with orchestration/TaskDispatcher.
110
+
111
+ Rejects values that fail TaskDispatcher's model regex.
112
+ """
113
+ if model is None:
114
+ return None
115
+
116
+ raw = str(model).strip()
117
+ if not raw:
118
+ return None
119
+
120
+ if not re.fullmatch(r"[A-Za-z0-9_.-]+", raw):
121
+ raise argparse.ArgumentTypeError(
122
+ f"Invalid --model value {raw!r}. Allowed: letters, numbers, '.', '_', '-'."
123
+ )
124
+
125
+ return raw
30
126
 
31
127
 
32
128
  class JleechanorgPRMonitor:
33
129
  """Cross-organization PR monitoring with Codex automation comments"""
34
130
 
35
- @staticmethod
36
- def _redact_email(email: Optional[str]) -> Optional[str]:
131
+ def _determine_workflow_type(self, fix_comment: bool, fixpr: bool = False) -> str:
132
+ """Determine workflow type from execution context"""
133
+ if fix_comment:
134
+ return "fix_comment"
135
+ elif fixpr:
136
+ return "fixpr"
137
+ else:
138
+ return "pr_automation"
139
+
140
+ def _redact_email(self, email: Optional[str]) -> Optional[str]:
37
141
  """Redact email for logging while preserving domain for debugging"""
38
- if not email or '@' not in email:
142
+ if not email or "@" not in email:
39
143
  return email
40
- user, domain = email.rsplit('@', 1)
144
+ user, domain = email.rsplit("@", 1)
41
145
  if len(user) <= 2:
42
146
  return f"***@{domain}"
43
147
  return f"{user[:2]}***@{domain}"
44
148
 
45
149
  CODEX_COMMIT_MARKER_PREFIX = SHARED_MARKER_PREFIX
46
150
  CODEX_COMMIT_MARKER_SUFFIX = SHARED_MARKER_SUFFIX
151
+ FIX_COMMENT_MARKER_PREFIX = SHARED_FIX_COMMENT_PREFIX
152
+ FIX_COMMENT_MARKER_SUFFIX = SHARED_FIX_COMMENT_SUFFIX
153
+ FIX_COMMENT_RUN_MARKER_PREFIX = SHARED_FIX_COMMENT_RUN_PREFIX
154
+ FIX_COMMENT_RUN_MARKER_SUFFIX = SHARED_FIX_COMMENT_RUN_SUFFIX
155
+ FIXPR_MARKER_PREFIX = SHARED_FIXPR_PREFIX
156
+ FIXPR_MARKER_SUFFIX = SHARED_FIXPR_SUFFIX
47
157
  CODEX_COMMIT_MESSAGE_MARKER = "[codex-automation-commit]"
48
158
  CODEX_BOT_IDENTIFIER = "codex"
159
+ FIX_COMMENT_COMPLETION_MARKER = "Fix-comment automation complete"
49
160
  # GitHub short SHAs display with a minimum of 7 characters, while full SHAs are 40 characters.
50
161
  CODEX_COMMIT_SHA_LENGTH_RANGE: Tuple[int, int] = (7, 40)
51
162
  CODEX_SUMMARY_COMMIT_PATTERNS = [
@@ -104,13 +215,31 @@ class JleechanorgPRMonitor:
104
215
  for keyword in _codex_actor_keywords
105
216
  ]
106
217
  _codex_commit_message_pattern_str = (
107
- r"\[(?:" + "|".join(_codex_actor_keywords) + r")-automation-commit\]"
218
+ r"\[(?:fixpr\s+)?(?:" + "|".join(_codex_actor_keywords) + r")-automation-commit\]"
108
219
  )
109
220
  _codex_commit_message_pattern = re.compile(
110
221
  _codex_commit_message_pattern_str,
111
222
  re.IGNORECASE,
112
223
  )
113
224
 
225
+ # Known GitHub review bots that may appear without [bot] suffix in API responses.
226
+ # Note: Some bots (e.g., "coderabbitai", "copilot") appear in both this list and
227
+ # _codex_actor_keywords. This is intentional:
228
+ # - KNOWN_GITHUB_BOTS: Detects the review service (e.g., "coderabbitai" or "coderabbitai[bot]")
229
+ # whose comments should trigger PR re-processing.
230
+ # - _codex_actor_keywords: Used to exclude our own automation bots from being
231
+ # treated as external review bots when they have the [bot] suffix.
232
+ # The detection order in _is_github_bot_comment() ensures known bots are detected first.
233
+ KNOWN_GITHUB_BOTS = frozenset({
234
+ "github-actions",
235
+ "coderabbitai",
236
+ "copilot-swe-agent",
237
+ "dependabot",
238
+ "renovate",
239
+ "codecov",
240
+ "sonarcloud",
241
+ })
242
+
114
243
  @staticmethod
115
244
  def _extract_actor_fields(
116
245
  actor: Optional[Dict],
@@ -124,8 +253,9 @@ class JleechanorgPRMonitor:
124
253
  name = actor.get("name")
125
254
  return (login, email, name)
126
255
 
127
- def __init__(self):
256
+ def __init__(self, *, safety_limits: Optional[Dict[str, int]] = None, no_act: bool = False, automation_username: Optional[str] = None):
128
257
  self.logger = setup_logging(__name__)
258
+ self._explicit_automation_username = automation_username
129
259
 
130
260
  self.assistant_mentions = os.environ.get(
131
261
  "AI_ASSISTANT_MENTIONS",
@@ -133,6 +263,7 @@ class JleechanorgPRMonitor:
133
263
  )
134
264
 
135
265
  self.wrapper_managed = os.environ.get("AUTOMATION_SAFETY_WRAPPER") == "1"
266
+ self.no_act = bool(no_act)
136
267
 
137
268
  # Processing history persisted to permanent location
138
269
  self.history_base_dir = Path.home() / "Library" / "Logs" / "worldarchitect-automation" / "pr_history"
@@ -142,24 +273,30 @@ class JleechanorgPRMonitor:
142
273
  self.organization = "jleechanorg"
143
274
  self.base_project_dir = Path.home() / "projects"
144
275
 
145
- safety_data_dir = os.environ.get('AUTOMATION_SAFETY_DATA_DIR')
276
+ safety_data_dir = os.environ.get("AUTOMATION_SAFETY_DATA_DIR")
146
277
  if not safety_data_dir:
147
278
  default_dir = Path.home() / "Library" / "Application Support" / "worldarchitect-automation"
148
279
  default_dir.mkdir(parents=True, exist_ok=True)
149
280
  safety_data_dir = str(default_dir)
150
281
 
151
- self.safety_manager = AutomationSafetyManager(safety_data_dir)
282
+ self.safety_manager = AutomationSafetyManager(safety_data_dir, limits=safety_limits)
283
+
284
+ # Resolve automation username (CLI > Env > Dynamic)
285
+ # Note: CLI arg is passed via __init__ if we update signature, but for now we'll handle it
286
+ # by checking if it was set after init or passing it in.
287
+ # Actually, let's update __init__ signature to accept it.
288
+ self.automation_username = self._resolve_automation_username()
152
289
 
153
- self.logger.info(f"🏢 Initialized jleechanorg PR monitor")
290
+ self.logger.info("🏢 Initialized jleechanorg PR monitor")
154
291
  self.logger.info(f"📁 History storage: {self.history_base_dir}")
155
- self.logger.info(f"💬 Comment-only automation mode")
292
+ self.logger.info("💬 Comment-only automation mode")
156
293
  def _get_history_file(self, repo_name: str, branch_name: str) -> Path:
157
294
  """Get history file path for specific repo/branch"""
158
295
  repo_dir = self.history_base_dir / repo_name
159
296
  repo_dir.mkdir(parents=True, exist_ok=True)
160
297
 
161
298
  # Replace slashes in branch names to avoid creating nested directories
162
- safe_branch_name = branch_name.replace('/', '_')
299
+ safe_branch_name = branch_name.replace("/", "_")
163
300
  return repo_dir / f"{safe_branch_name}.json"
164
301
 
165
302
  def _load_branch_history(self, repo_name: str, branch_name: str) -> Dict[str, str]:
@@ -192,6 +329,37 @@ class JleechanorgPRMonitor:
192
329
  self.logger.info(f"⏭️ Skipping PR {repo_name}/{branch_name}#{pr_number} - already processed commit {current_commit[:8]}")
193
330
  return True
194
331
 
332
+ def _resolve_automation_username(self) -> str:
333
+ """Resolve the automation username from multiple sources."""
334
+ # 1. Explicitly passed (CLI)
335
+ if self._explicit_automation_username:
336
+ self.logger.debug(f"👤 Using explicit automation username: {self._explicit_automation_username}")
337
+ return self._explicit_automation_username
338
+
339
+ # 2. Environment variable
340
+ env_user = os.environ.get("AUTOMATION_USERNAME")
341
+ if env_user:
342
+ self.logger.debug(f"👤 Using environment automation username: {env_user}")
343
+ return env_user
344
+
345
+ # 3. Dynamic discovery from GitHub CLI
346
+ try:
347
+ result = AutomationUtils.execute_subprocess_with_timeout(
348
+ ["gh", "api", "user", "--jq", ".login"],
349
+ timeout=10,
350
+ check=False
351
+ )
352
+ if result.returncode == 0 and result.stdout.strip():
353
+ user = result.stdout.strip()
354
+ self.logger.debug(f"👤 Discovered current GitHub user: {user}")
355
+ return user
356
+ except Exception as e:
357
+ self.logger.warning(f"⚠️ Failed to discover GitHub user: {e}")
358
+
359
+ # Fallback (should ideally not happen in real usage, but safe default)
360
+ self.logger.warning("⚠️ Could not resolve automation username, defaulting to 'unknown'")
361
+ return "unknown"
362
+
195
363
  def _record_processed_pr(self, repo_name: str, branch_name: str, pr_number: int, commit_sha: str) -> None:
196
364
  """Record that we've processed a PR with a specific commit"""
197
365
  history = self._load_branch_history(repo_name, branch_name)
@@ -219,23 +387,53 @@ class JleechanorgPRMonitor:
219
387
  def is_pr_actionable(self, pr_data: Dict) -> bool:
220
388
  """Determine if a PR is actionable (should be processed)"""
221
389
  # Closed PRs are not actionable
222
- if pr_data.get('state', '').lower() != 'open':
390
+ if pr_data.get("state", "").lower() != "open":
391
+ return False
392
+
393
+ # Draft PRs are not actionable for automation
394
+ if pr_data.get("isDraft"):
223
395
  return False
224
396
 
225
397
  # PRs with no commits are not actionable
226
- head_ref_oid = pr_data.get('headRefOid')
398
+ head_ref_oid = pr_data.get("headRefOid")
227
399
  if not head_ref_oid:
228
400
  return False
229
401
 
230
402
  # Check if already processed with this commit
231
- repo_name = pr_data.get('repository', '')
232
- branch_name = pr_data.get('headRefName', '')
233
- pr_number = pr_data.get('number', 0)
403
+ repo_name = pr_data.get("repository", "")
404
+ branch_name = pr_data.get("headRefName", "")
405
+ pr_number = pr_data.get("number", 0)
234
406
 
235
407
  if self._should_skip_pr(repo_name, branch_name, pr_number, head_ref_oid):
408
+ # Even if commit was processed, check for new bot comments that need attention
409
+ repo_full = pr_data.get("repositoryFullName") or ""
410
+
411
+ if not repo_full:
412
+ if repo_name:
413
+ repo_full = self._normalize_repository_name(repo_name)
414
+ else:
415
+ self.logger.warning(
416
+ "Skipping PR comment state check: missing repository information "
417
+ f"(pr_number={pr_number})"
418
+ )
419
+ return False
420
+
421
+ owner_repo = repo_full.split("/", 1)
422
+ if len(owner_repo) != 2 or not owner_repo[0].strip() or not owner_repo[1].strip():
423
+ self.logger.warning(
424
+ "Skipping PR comment state check due to invalid repository identifier "
425
+ f"repo_full='{repo_full}' (pr_number={pr_number})"
426
+ )
427
+ return False
428
+ _, comments = self._get_pr_comment_state(repo_full, pr_number)
429
+ if self._has_new_bot_comments_since_codex(comments):
430
+ self.logger.info(
431
+ f"🤖 PR {repo_name}#{pr_number} has new bot comments since last processing - marking actionable"
432
+ )
433
+ return True
236
434
  return False
237
435
 
238
- # Open PRs (including drafts) with new commits are actionable
436
+ # Open non-draft PRs with new commits are actionable
239
437
  return True
240
438
 
241
439
  def filter_eligible_prs(self, pr_list: List[Dict]) -> List[Dict]:
@@ -268,12 +466,64 @@ class JleechanorgPRMonitor:
268
466
  eligible_prs = self.filter_eligible_prs(all_prs)
269
467
  return eligible_prs[:limit]
270
468
 
469
+ def list_actionable_prs(self, cutoff_hours: int = 24, max_prs: int = 20, mode: str = "fixpr", single_repo: Optional[str] = None) -> List[Dict]:
470
+ """
471
+ Return PRs that would be processed for fixpr (merge conflicts or failing checks).
472
+ """
473
+ try:
474
+ prs = self.discover_open_prs(cutoff_hours=cutoff_hours)
475
+ except TypeError:
476
+ # Backwards-compatible with older stubs/mocks that don't accept cutoff_hours.
477
+ prs = self.discover_open_prs()
478
+ if single_repo:
479
+ prs = [pr for pr in prs if pr.get("repository") == single_repo]
480
+
481
+ actionable = []
482
+ for pr in prs:
483
+ repo = pr.get("repository")
484
+ owner = pr.get("owner", "jleechanorg")
485
+ pr_number = pr.get("number")
486
+ if not repo or pr_number is None:
487
+ continue
488
+ repo_full = f"{owner}/{repo}"
489
+
490
+ mergeable = pr.get("mergeable")
491
+ if mergeable is None:
492
+ try:
493
+ result = AutomationUtils.execute_subprocess_with_timeout(
494
+ ["gh", "pr", "view", str(pr_number), "--repo", repo_full, "--json", "mergeable"],
495
+ timeout=30,
496
+ check=False,
497
+ )
498
+ if result.returncode == 0:
499
+ data = json.loads(result.stdout or "{}")
500
+ mergeable = data.get("mergeable")
501
+ except Exception:
502
+ mergeable = None
503
+
504
+ if mergeable == "CONFLICTING":
505
+ actionable.append({**pr, "repo_full": repo_full})
506
+ continue
507
+
508
+ try:
509
+ if has_failing_checks(repo_full, pr_number):
510
+ actionable.append({**pr, "repo_full": repo_full})
511
+ except Exception:
512
+ # Skip on error to avoid blocking listing
513
+ continue
514
+
515
+ actionable = actionable[:max_prs]
516
+ print(f"🔎 Eligible for fixpr: {len(actionable)}")
517
+ for pr in actionable:
518
+ print(f" • {pr.get('repository')} PR #{pr.get('number')}: {pr.get('title')}")
519
+ return actionable
520
+
271
521
  def run_monitoring_cycle_with_actionable_count(self, target_actionable_count: int = 20) -> Dict:
272
522
  """Enhanced monitoring cycle that processes exactly target actionable PRs"""
273
523
  all_prs = self.discover_open_prs()
274
524
 
275
525
  # Sort by most recently updated first
276
- all_prs.sort(key=lambda pr: pr.get('updatedAt', ''), reverse=True)
526
+ all_prs.sort(key=lambda pr: pr.get("updatedAt", ""), reverse=True)
277
527
 
278
528
  actionable_processed = 0
279
529
  skipped_count = 0
@@ -293,9 +543,9 @@ class JleechanorgPRMonitor:
293
543
  continue # Already counted in skipped above
294
544
 
295
545
  # Attempt to process the PR
296
- repo_name = pr.get('repository', '')
297
- pr_number = pr.get('number', 0)
298
- repo_full = pr.get('repositoryFullName', f"jleechanorg/{repo_name}")
546
+ repo_name = pr.get("repository", "")
547
+ pr_number = pr.get("number", 0)
548
+ repo_full = pr.get("repositoryFullName", f"jleechanorg/{repo_name}")
299
549
 
300
550
  # Reserve a processing slot for this PR
301
551
  if not self.safety_manager.try_process_pr(pr_number, repo=repo_full):
@@ -317,17 +567,17 @@ class JleechanorgPRMonitor:
317
567
  self.safety_manager.release_pr_slot(pr_number, repo=repo_full)
318
568
 
319
569
  return {
320
- 'actionable_processed': actionable_processed,
321
- 'total_discovered': len(all_prs),
322
- 'skipped_count': skipped_count,
323
- 'processing_failures': processing_failures
570
+ "actionable_processed": actionable_processed,
571
+ "total_discovered": len(all_prs),
572
+ "skipped_count": skipped_count,
573
+ "processing_failures": processing_failures
324
574
  }
325
575
 
326
576
  def _process_pr_comment(self, repo_name: str, pr_number: int, pr_data: Dict) -> bool:
327
577
  """Process a PR by posting a comment (used by tests and enhanced monitoring)"""
328
578
  try:
329
579
  # Use the existing comment posting method
330
- repo_full_name = pr_data.get('repositoryFullName', f"jleechanorg/{repo_name}")
580
+ repo_full_name = pr_data.get("repositoryFullName", f"jleechanorg/{repo_name}")
331
581
  result = self.post_codex_instruction_simple(repo_full_name, pr_number, pr_data)
332
582
  # Return True only if comment was actually posted
333
583
  return result == "posted"
@@ -335,16 +585,16 @@ class JleechanorgPRMonitor:
335
585
  self.logger.error(f"Error processing comment for PR {repo_name}#{pr_number}: {e}")
336
586
  return False
337
587
 
338
- def discover_open_prs(self) -> List[Dict]:
339
- """Discover open PRs updated in the last 24 hours across the organization."""
588
+ def discover_open_prs(self, cutoff_hours: int = 24) -> List[Dict]:
589
+ """Discover open PRs updated in the last specified hours across the organization."""
340
590
 
341
- self.logger.info(f"🔍 Discovering open PRs in {self.organization} organization (last 24 hours)")
591
+ self.logger.info(f"🔍 Discovering open PRs in {self.organization} organization (last {cutoff_hours} hours)")
342
592
 
343
593
  now = datetime.utcnow()
344
- one_day_ago = now - timedelta(hours=24)
345
- self.logger.info("📅 Filtering PRs updated since: %s UTC", one_day_ago.strftime('%Y-%m-%d %H:%M:%S'))
594
+ one_day_ago = now - timedelta(hours=cutoff_hours)
595
+ self.logger.info("📅 Filtering PRs updated since: %s UTC", one_day_ago.strftime("%Y-%m-%d %H:%M:%S"))
346
596
 
347
- graphql_query = '''
597
+ graphql_query = """
348
598
  query($searchQuery: String!, $cursor: String) {
349
599
  search(type: ISSUE, query: $searchQuery, first: 100, after: $cursor) {
350
600
  nodes {
@@ -366,7 +616,7 @@ class JleechanorgPRMonitor:
366
616
  pageInfo { hasNextPage endCursor }
367
617
  }
368
618
  }
369
- '''
619
+ """
370
620
 
371
621
  search_query = f"org:{self.organization} is:pr is:open"
372
622
  cursor: Optional[str] = None
@@ -395,57 +645,57 @@ class JleechanorgPRMonitor:
395
645
  self.logger.error("❌ Failed to parse GraphQL response: %s", exc)
396
646
  raise
397
647
 
398
- search_data = api_data.get('data', {}).get('search')
648
+ search_data = api_data.get("data", {}).get("search")
399
649
  if not search_data:
400
650
  break
401
651
 
402
- nodes = search_data.get('nodes', [])
652
+ nodes = search_data.get("nodes", [])
403
653
  for node in nodes:
404
- if node.get('__typename') != 'PullRequest':
654
+ if node.get("__typename") != "PullRequest":
405
655
  continue
406
656
 
407
- updated_str = node.get('updatedAt')
657
+ updated_str = node.get("updatedAt")
408
658
  if not updated_str:
409
659
  continue
410
660
 
411
661
  try:
412
- updated_time = datetime.fromisoformat(updated_str.replace('Z', '+00:00')).replace(tzinfo=None)
662
+ updated_time = datetime.fromisoformat(updated_str.replace("Z", "+00:00")).replace(tzinfo=None)
413
663
  except ValueError:
414
664
  self.logger.debug(
415
- "⚠️ Invalid date format for PR %s: %s", node.get('number'), updated_str
665
+ "⚠️ Invalid date format for PR %s: %s", node.get("number"), updated_str
416
666
  )
417
667
  continue
418
668
 
419
669
  if updated_time < one_day_ago:
420
670
  continue
421
671
 
422
- repo_info = node.get('repository') or {}
423
- author_info = node.get('author') or {}
424
- if 'login' not in author_info:
425
- author_info = {**author_info, 'login': author_info.get('login')}
672
+ repo_info = node.get("repository") or {}
673
+ author_info = node.get("author") or {}
674
+ if "login" not in author_info:
675
+ author_info = {**author_info, "login": author_info.get("login")}
426
676
 
427
677
  normalized = {
428
- 'number': node.get('number'),
429
- 'title': node.get('title'),
430
- 'headRefName': node.get('headRefName'),
431
- 'baseRefName': node.get('baseRefName'),
432
- 'updatedAt': updated_str,
433
- 'url': node.get('url'),
434
- 'author': author_info,
435
- 'headRefOid': node.get('headRefOid'),
436
- 'state': node.get('state'),
437
- 'isDraft': node.get('isDraft'),
438
- 'repository': repo_info.get('name'),
439
- 'repositoryFullName': repo_info.get('nameWithOwner'),
440
- 'updated_datetime': updated_time,
678
+ "number": node.get("number"),
679
+ "title": node.get("title"),
680
+ "headRefName": node.get("headRefName"),
681
+ "baseRefName": node.get("baseRefName"),
682
+ "updatedAt": updated_str,
683
+ "url": node.get("url"),
684
+ "author": author_info,
685
+ "headRefOid": node.get("headRefOid"),
686
+ "state": node.get("state"),
687
+ "isDraft": node.get("isDraft"),
688
+ "repository": repo_info.get("name"),
689
+ "repositoryFullName": repo_info.get("nameWithOwner"),
690
+ "updated_datetime": updated_time,
441
691
  }
442
692
  recent_prs.append(normalized)
443
693
 
444
- page_info = search_data.get('pageInfo') or {}
445
- if not page_info.get('hasNextPage'):
694
+ page_info = search_data.get("pageInfo") or {}
695
+ if not page_info.get("hasNextPage"):
446
696
  break
447
697
 
448
- cursor = page_info.get('endCursor')
698
+ cursor = page_info.get("endCursor")
449
699
  if not cursor:
450
700
  break
451
701
 
@@ -453,9 +703,9 @@ class JleechanorgPRMonitor:
453
703
  self.logger.info("📭 No recent open PRs discovered")
454
704
  return []
455
705
 
456
- recent_prs.sort(key=lambda x: x.get('updated_datetime', datetime.min), reverse=True)
706
+ recent_prs.sort(key=lambda x: x.get("updated_datetime", datetime.min), reverse=True)
457
707
 
458
- repo_counter = Counter(pr.get('repository') for pr in recent_prs if pr.get('repository'))
708
+ repo_counter = Counter(pr.get("repository") for pr in recent_prs if pr.get("repository"))
459
709
  for repo_name, count in repo_counter.items():
460
710
  self.logger.info("📋 %s: %s recent PRs", repo_name, count)
461
711
 
@@ -463,8 +713,8 @@ class JleechanorgPRMonitor:
463
713
 
464
714
  self.logger.info("📊 Most recently updated PRs:")
465
715
  for i, pr in enumerate(recent_prs[:5], 1):
466
- updated_str = pr['updated_datetime'].strftime('%Y-%m-%d %H:%M')
467
- self.logger.info(" %s. %s #%s - %s", i, pr['repositoryFullName'], pr['number'], updated_str)
716
+ updated_str = pr["updated_datetime"].strftime("%Y-%m-%d %H:%M")
717
+ self.logger.info(" %s. %s #%s - %s", i, pr["repositoryFullName"], pr["number"], updated_str)
468
718
 
469
719
  return recent_prs
470
720
 
@@ -533,30 +783,48 @@ class JleechanorgPRMonitor:
533
783
  repo_full = self._normalize_repository_name(repository)
534
784
  self.logger.info(f"💬 Requesting Codex support for {repo_full} PR #{pr_number}")
535
785
 
786
+ if self.no_act:
787
+ self.logger.info("🧪 --no-act enabled: skipping comment post for %s #%s", repo_full, pr_number)
788
+ return "skipped"
789
+
536
790
  # Extract repo name and branch from PR data
537
- repo_name = repo_full.split('/')[-1]
538
- branch_name = pr_data.get('headRefName', 'unknown')
791
+ repo_name = repo_full.split("/")[-1]
792
+ branch_name = pr_data.get("headRefName", "unknown")
539
793
 
540
794
  # Get current PR state including commit SHA
541
795
  head_sha, comments = self._get_pr_comment_state(repo_full, pr_number)
542
796
  head_commit_details = None
797
+ # Flag to bypass skip checks when new bot comments require attention
798
+ force_process_due_to_bot_comments = False
799
+
543
800
  if head_sha:
544
801
  head_commit_details = self._get_head_commit_details(repo_full, pr_number, head_sha)
545
802
  if head_commit_details and self._is_head_commit_from_codex(head_commit_details):
546
- self.logger.debug(
547
- "🆔 Head commit %s for %s#%s already attributed to Codex",
548
- head_sha[:8],
549
- repo_full,
550
- pr_number,
551
- )
552
- self._record_processed_pr(repo_name, branch_name, pr_number, head_sha)
553
- return "skipped"
803
+ # Check if there are new bot comments that need attention
804
+ if self._has_new_bot_comments_since_codex(comments):
805
+ self.logger.info(
806
+ "🤖 Head commit %s for %s#%s is from Codex, but new bot comments detected - forcing re-run",
807
+ head_sha[:8],
808
+ repo_full,
809
+ pr_number,
810
+ )
811
+ force_process_due_to_bot_comments = True
812
+ else:
813
+ self.logger.debug(
814
+ "🆔 Head commit %s for %s#%s already attributed to Codex",
815
+ head_sha[:8],
816
+ repo_full,
817
+ pr_number,
818
+ )
819
+ self._record_processed_pr(repo_name, branch_name, pr_number, head_sha)
820
+ return "skipped"
554
821
 
555
822
  if not head_sha:
556
823
  self.logger.warning(
557
824
  f"⚠️ Could not determine commit SHA for PR #{pr_number}; proceeding without marker gating"
558
825
  )
559
- else:
826
+ elif not force_process_due_to_bot_comments:
827
+ # Only apply skip checks if we're not forcing a re-run due to new bot comments
560
828
  # Check if we should skip this PR based on commit-based tracking
561
829
  if self._should_skip_pr(repo_name, branch_name, pr_number, head_sha):
562
830
  self.logger.info(f"⏭️ Skipping PR #{pr_number} - already processed this commit")
@@ -592,9 +860,21 @@ class JleechanorgPRMonitor:
592
860
  "--body", comment_body
593
861
  ]
594
862
 
595
- result = AutomationUtils.execute_subprocess_with_timeout(comment_cmd, timeout=30)
863
+ result = AutomationUtils.execute_subprocess_with_timeout(
864
+ comment_cmd,
865
+ timeout=30,
866
+ retry_attempts=5,
867
+ retry_backoff_seconds=1.0,
868
+ retry_backoff_multiplier=2.0,
869
+ retry_on_stderr_substrings=(
870
+ "was submitted too quickly",
871
+ "secondary rate limit",
872
+ "API rate limit exceeded",
873
+ ),
874
+ )
596
875
 
597
876
  self.logger.info(f"✅ Posted Codex instruction comment on PR #{pr_number} ({repo_full})")
877
+ time.sleep(2.0)
598
878
 
599
879
  # Record that we've processed this PR with this commit when available
600
880
  if head_sha:
@@ -630,7 +910,7 @@ class JleechanorgPRMonitor:
630
910
  ], timeout=30)
631
911
 
632
912
  pr_status = json.loads(result.stdout)
633
- status_checks = pr_status.get('statusCheckRollup', [])
913
+ status_checks = pr_status.get("statusCheckRollup", [])
634
914
 
635
915
  # If no status checks are configured, assume tests are failing
636
916
  if not status_checks:
@@ -639,7 +919,7 @@ class JleechanorgPRMonitor:
639
919
 
640
920
  # Check if all status checks are successful
641
921
  for check in status_checks:
642
- if check.get('state') not in ['SUCCESS', 'NEUTRAL']:
922
+ if check.get("state") not in ["SUCCESS", "NEUTRAL"]:
643
923
  self.logger.debug(f"❌ Status check failed: {check.get('name')} - {check.get('state')}")
644
924
  return False
645
925
 
@@ -697,127 +977,790 @@ Use your judgment to fix comments from everyone or explain why it should not be
697
977
 
698
978
  return comment_body
699
979
 
700
- def _get_pr_comment_state(self, repo_full_name: str, pr_number: int) -> Tuple[Optional[str], List[Dict]]:
701
- """Fetch PR comment data needed for Codex comment gating"""
702
- view_cmd = [
703
- "gh",
704
- "pr",
705
- "view",
706
- str(pr_number),
707
- "--repo",
708
- repo_full_name,
709
- "--json",
710
- "headRefOid,comments",
980
+ def _compose_fix_comment_mentions(self) -> str:
981
+ mentions = [
982
+ token for token in (self.assistant_mentions or "").split() if token.startswith("@")
711
983
  ]
984
+ lower_mentions = {token.lower() for token in mentions}
985
+ if "@greptile" not in lower_mentions:
986
+ mentions.append("@greptile")
987
+ return " ".join(mentions)
712
988
 
713
- try:
714
- result = AutomationUtils.execute_subprocess_with_timeout(
715
- view_cmd,
716
- timeout=30
717
- )
718
- pr_data = json.loads(result.stdout or "{}")
719
- head_sha = pr_data.get("headRefOid")
720
-
721
- # Handle different comment structures from GitHub API
722
- comments_data = pr_data.get("comments", [])
723
- if isinstance(comments_data, dict):
724
- comments = comments_data.get("nodes", [])
725
- elif isinstance(comments_data, list):
726
- comments = comments_data
727
- else:
728
- comments = []
989
+ def _build_fix_comment_prompt_body(
990
+ self,
991
+ repository: str,
992
+ pr_number: int,
993
+ pr_data: Dict,
994
+ head_sha: Optional[str],
995
+ agent_cli: str,
996
+ ) -> str:
997
+ cli_chain = [part.strip().lower() for part in str(agent_cli).split(",") if part.strip()]
998
+ commit_marker_cli = cli_chain[0] if cli_chain else "claude"
999
+
1000
+ return (
1001
+ f"FIX-COMMENT TASK (SELF-CONTAINED): Update PR #{pr_number} in {repository} "
1002
+ f"(branch {pr_data.get('headRefName', 'unknown')}).\n"
1003
+ "Goal: address all review comments with explicit action-based replies, "
1004
+ "fix failing tests, and resolve merge conflicts.\n\n"
1005
+ f"CLI chain: {agent_cli}. Start immediately.\n\n"
1006
+ "Steps:\n"
1007
+ f"1) gh pr checkout {pr_number}\n\n"
1008
+ "2) Fetch ALL PR feedback sources (pagination-safe) using correct GitHub API endpoints:\n"
1009
+ f" - Issue comments: `gh api /repos/{repository}/issues/{pr_number}/comments --paginate -F per_page=100`\n"
1010
+ f" - Review summaries: `gh api /repos/{repository}/pulls/{pr_number}/reviews --paginate -F per_page=100`\n"
1011
+ f" - Inline review comments: `gh api /repos/{repository}/pulls/{pr_number}/comments --paginate -F per_page=100`\n"
1012
+ " ⚠️ WARNING: `gh pr view --json comments` ONLY fetches issue comments, NOT inline review comments.\n\n"
1013
+ "3) Apply code changes to address feedback, then reply to **100%** of comments INDIVIDUALLY.\n"
1014
+ " Threading rules:\n"
1015
+ " - Inline review comments MUST use threaded replies via the GitHub API.\n"
1016
+ " - Issue/PR comments do not support threading (they are top-level only).\n\n"
1017
+ " **Response Protocol** (use ONE of these categories for EACH comment):\n"
1018
+ " - **FIXED**: Issue implemented with working code → include files modified, tests added, verification\n"
1019
+ " - **DEFERRED**: Created issue for future work → include issue URL and reason\n"
1020
+ " - **ACKNOWLEDGED**: Noted but not actionable → include explanation\n"
1021
+ " - **NOT DONE**: Cannot implement → include specific technical reason\n\n"
1022
+ " **Reply Methods**:\n"
1023
+ f" - Inline review comments: `gh api /repos/{repository}/pulls/{pr_number}/comments/{{comment_id}}/replies -f body='[Response text]'`\n"
1024
+ f" - Issue/PR comments: `gh pr comment {pr_number} --body '[Response text]'`\n"
1025
+ " - Do NOT post mega-comments consolidating multiple responses; reply individually to each comment.\n\n"
1026
+ "4) Run tests and fix failures (block completion on critical/blocking test failures)\n\n"
1027
+ "5) Resolve merge conflicts (prefer merge over rebase)\n\n"
1028
+ f"6) git add -A && git commit -m \"[{commit_marker_cli}-automation-commit] fix PR #{pr_number} review feedback\" && git push\n\n"
1029
+ f"7) Write completion report to /tmp/orchestration_results/pr-{pr_number}_results.json "
1030
+ "with comments addressed, files modified, tests run, and remaining issues\n\n"
1031
+ "**PR Details:**\n"
1032
+ f"- Title: {pr_data.get('title', 'Unknown')}\n"
1033
+ f"- Author: {pr_data.get('author', {}).get('login', 'unknown')}\n"
1034
+ f"- Branch: {pr_data.get('headRefName', 'unknown')}\n"
1035
+ f"- Commit: {head_sha[:8] if head_sha else 'unknown'} ({head_sha or 'unknown'})\n"
1036
+ )
729
1037
 
730
- # Ensure comments are sorted by creation time (oldest first)
731
- # GitHub API should return them sorted, but let's be explicit
732
- comments.sort(
733
- key=lambda c: (c.get('createdAt') or c.get('updatedAt') or '')
734
- )
1038
+ def _build_fix_comment_queued_body(
1039
+ self,
1040
+ repository: str,
1041
+ pr_number: int,
1042
+ pr_data: Dict,
1043
+ head_sha: Optional[str],
1044
+ agent_cli: str = "claude",
1045
+ ) -> str:
1046
+ comment_body = (
1047
+ f"[AI automation - {agent_cli}] Fix-comment run queued for this PR. "
1048
+ "A review request will follow after updates are pushed.\n\n"
1049
+ "**PR Details:**\n"
1050
+ f"- Title: {pr_data.get('title', 'Unknown')}\n"
1051
+ f"- Author: {pr_data.get('author', {}).get('login', 'unknown')}\n"
1052
+ f"- Branch: {pr_data.get('headRefName', 'unknown')}\n"
1053
+ f"- Commit: {head_sha[:8] if head_sha else 'unknown'} ({head_sha or 'unknown'})\n"
1054
+ f"- Agent: {agent_cli}"
1055
+ )
735
1056
 
736
- return head_sha, comments
737
- except subprocess.CalledProcessError as e:
738
- error_message = e.stderr.strip() if e.stderr else str(e)
739
- self.logger.warning(
740
- f"⚠️ Failed to fetch PR comment state for PR #{pr_number}: {error_message}"
741
- )
742
- except json.JSONDecodeError as e:
743
- self.logger.warning(
744
- f"⚠️ Failed to parse PR comment state for PR #{pr_number}: {e}"
745
- )
1057
+ # Always add marker for limit counting, even when head_sha is unavailable.
1058
+ # Use enhanced format: workflow:agent:commit
1059
+ sha_value = head_sha or "unknown"
1060
+ # Extract first CLI from chain (e.g., "gemini,codex" -> "gemini")
1061
+ cli_chain = [part.strip().lower() for part in str(agent_cli).split(",") if part.strip()]
1062
+ marker_cli = cli_chain[0] if cli_chain else "claude"
1063
+ marker = build_automation_marker("fix-comment-run", marker_cli, sha_value)
1064
+ comment_body += f"\n\n{marker}"
746
1065
 
747
- return None, []
1066
+ return comment_body
748
1067
 
749
- def _get_head_commit_details(
1068
+ def _build_fixpr_queued_body(
750
1069
  self,
751
- repo_full_name: str,
1070
+ repository: str,
752
1071
  pr_number: int,
753
- expected_sha: Optional[str] = None,
754
- ) -> Optional[Dict[str, Optional[str]]]:
755
- """Fetch metadata for the PR head commit using the GitHub GraphQL API."""
1072
+ pr_data: Dict,
1073
+ head_sha: Optional[str],
1074
+ agent_cli: str = "claude",
1075
+ ) -> str:
1076
+ comment_body = (
1077
+ f"[AI automation - {agent_cli}] FixPR run queued for this PR.\n\n"
1078
+ "**PR Details:**\n"
1079
+ f"- Title: {pr_data.get('title', 'Unknown')}\n"
1080
+ f"- Author: {pr_data.get('author', {}).get('login', 'unknown')}\n"
1081
+ f"- Branch: {pr_data.get('headRefName', 'unknown')}\n"
1082
+ f"- Commit: {head_sha[:8] if head_sha else 'unknown'} ({head_sha or 'unknown'})\n"
1083
+ f"- Agent: {agent_cli}"
1084
+ )
756
1085
 
757
- if "/" not in repo_full_name:
758
- self.logger.debug(
759
- "⚠️ Cannot fetch commit details for %s - invalid repo format",
760
- repo_full_name,
761
- )
762
- return None
1086
+ # Always add marker for limit counting, even when head_sha is unavailable
1087
+ # Use enhanced format: workflow:agent:commit
1088
+ sha_value = head_sha or "unknown"
1089
+ # Extract first CLI from chain (e.g., "gemini,codex" -> "gemini")
1090
+ cli_chain = [part.strip().lower() for part in str(agent_cli).split(",") if part.strip()]
1091
+ marker_cli = cli_chain[0] if cli_chain else "claude"
1092
+ marker = build_automation_marker("fixpr-run", marker_cli, sha_value)
1093
+ comment_body += f"\n\n{marker}"
763
1094
 
764
- owner, name = repo_full_name.split("/", 1)
1095
+ return comment_body
765
1096
 
766
- # Validate GitHub naming constraints (alphanumeric, hyphens, periods, underscores, max 100 chars)
767
- import re
768
- github_name_pattern = re.compile(r'^[a-zA-Z0-9]([a-zA-Z0-9\-\._]{0,98}[a-zA-Z0-9])?$')
769
- if not github_name_pattern.match(owner) or not github_name_pattern.match(name):
770
- self.logger.warning(
771
- "⚠️ Invalid GitHub identifiers: owner='%s', name='%s'",
772
- owner,
773
- name,
774
- )
775
- return None
1097
+ def _build_fix_comment_review_body(
1098
+ self,
1099
+ repository: str,
1100
+ pr_number: int,
1101
+ pr_data: Dict,
1102
+ head_sha: Optional[str],
1103
+ agent_cli: str = "claude",
1104
+ ) -> str:
1105
+ mentions = self._compose_fix_comment_mentions()
1106
+ intro = f"{mentions} [AI automation] {self.FIX_COMMENT_COMPLETION_MARKER}. Please review the updates."
1107
+
1108
+ comment_body = (
1109
+ f"{intro}\n\n"
1110
+ "**Review Request:**\n"
1111
+ "Please review the latest changes, leave feedback, and flag any remaining issues. "
1112
+ "If further fixes are needed, add explicit DONE/NOT DONE guidance.\n\n"
1113
+ "**PR Details:**\n"
1114
+ f"- Title: {pr_data.get('title', 'Unknown')}\n"
1115
+ f"- Author: {pr_data.get('author', {}).get('login', 'unknown')}\n"
1116
+ f"- Branch: {pr_data.get('headRefName', 'unknown')}\n"
1117
+ f"- Commit: {head_sha[:8] if head_sha else 'unknown'} ({head_sha or 'unknown'})\n"
1118
+ )
776
1119
 
777
- # Validate PR number is positive integer
778
- if not isinstance(pr_number, int) or pr_number <= 0:
779
- self.logger.warning("⚠️ Invalid PR number: %s", pr_number)
780
- return None
1120
+ if head_sha:
1121
+ # Extract first CLI from chain (e.g., "gemini,codex" -> "gemini")
1122
+ cli_chain = [part.strip().lower() for part in str(agent_cli).split(",") if part.strip()]
1123
+ marker_cli = cli_chain[0] if cli_chain else "claude"
1124
+ marker = build_automation_marker("fix-comment", marker_cli, head_sha)
1125
+ comment_body += f"\n{marker}"
781
1126
 
782
- cmd = [
1127
+ return comment_body
1128
+
1129
+ def _get_fix_comment_watch_state(
1130
+ self,
1131
+ repo_full: str,
1132
+ pr_number: int,
1133
+ ) -> Tuple[Dict, Optional[str], List[Dict], List[str]]:
1134
+ view_cmd = [
783
1135
  "gh",
784
- "api",
785
- "graphql",
786
- "-f",
787
- f"query={self._HEAD_COMMIT_DETAILS_QUERY}",
788
- "-f",
789
- f"owner={owner}",
790
- "-f",
791
- f"name={name}",
792
- "-F",
793
- f"prNumber={pr_number}",
1136
+ "pr",
1137
+ "view",
1138
+ str(pr_number),
1139
+ "--repo",
1140
+ repo_full,
1141
+ "--json",
1142
+ "title,headRefName,headRefOid,author,comments,commits",
794
1143
  ]
1144
+ result = AutomationUtils.execute_subprocess_with_timeout(view_cmd, timeout=30, check=False)
1145
+ if result.returncode != 0:
1146
+ raise RuntimeError(result.stderr or f"gh pr view failed for {repo_full}#{pr_number}")
1147
+
1148
+ pr_data = json.loads(result.stdout or "{}")
1149
+ head_sha = pr_data.get("headRefOid")
1150
+
1151
+ comments_data = pr_data.get("comments", [])
1152
+ if isinstance(comments_data, dict):
1153
+ comments = comments_data.get("nodes", [])
1154
+ elif isinstance(comments_data, list):
1155
+ comments = comments_data
1156
+ else:
1157
+ comments = []
1158
+
1159
+ commits_data = pr_data.get("commits", [])
1160
+ if isinstance(commits_data, dict):
1161
+ commit_nodes = commits_data.get("nodes", [])
1162
+ elif isinstance(commits_data, list):
1163
+ commit_nodes = commits_data
1164
+ else:
1165
+ commit_nodes = []
1166
+
1167
+ headlines = []
1168
+ for node in commit_nodes:
1169
+ # Handle both nested 'commit' node structure and flat structure
1170
+ commit_obj = node.get("commit") if isinstance(node, dict) and "commit" in node else node
1171
+ headline = commit_obj.get("messageHeadline") if isinstance(commit_obj, dict) else None
1172
+ if headline:
1173
+ headlines.append(headline)
1174
+
1175
+ return pr_data, head_sha, comments, headlines
1176
+
1177
+ def _post_fix_comment_review(
1178
+ self,
1179
+ repository: str,
1180
+ pr_number: int,
1181
+ pr_data: Dict,
1182
+ head_sha: Optional[str],
1183
+ agent_cli: str = "claude",
1184
+ ) -> bool:
1185
+ repo_full = self._normalize_repository_name(repository)
1186
+ comment_body = self._build_fix_comment_review_body(
1187
+ repo_full,
1188
+ pr_number,
1189
+ pr_data,
1190
+ head_sha,
1191
+ agent_cli,
1192
+ )
795
1193
 
796
1194
  try:
797
- result = AutomationUtils.execute_subprocess_with_timeout(cmd, timeout=30)
798
- except subprocess.CalledProcessError as exc:
799
- self.logger.debug(
800
- "⚠️ Failed to fetch head commit details for %s#%s: %s",
801
- repo_full_name,
1195
+ comment_cmd = [
1196
+ "gh",
1197
+ "pr",
1198
+ "comment",
1199
+ str(pr_number),
1200
+ "--repo",
1201
+ repo_full,
1202
+ "--body",
1203
+ comment_body,
1204
+ ]
1205
+ AutomationUtils.execute_subprocess_with_timeout(
1206
+ comment_cmd,
1207
+ timeout=30,
1208
+ retry_attempts=5,
1209
+ retry_backoff_seconds=1.0,
1210
+ retry_backoff_multiplier=2.0,
1211
+ retry_on_stderr_substrings=(
1212
+ "was submitted too quickly",
1213
+ "secondary rate limit",
1214
+ "API rate limit exceeded",
1215
+ ),
1216
+ )
1217
+ self.logger.info(
1218
+ "✅ Posted fix-comment review request on PR #%s (%s)",
802
1219
  pr_number,
803
- exc.stderr or exc,
1220
+ repo_full,
804
1221
  )
805
- return None
1222
+ time.sleep(2.0)
1223
+ return True
806
1224
  except Exception as exc:
807
- self.logger.debug(
808
- "⚠️ Error executing head commit lookup for %s#%s: %s",
809
- repo_full_name,
1225
+ self.logger.error(
1226
+ " Failed to post fix-comment review request on PR #%s: %s",
810
1227
  pr_number,
811
1228
  exc,
812
1229
  )
813
- return None
1230
+ return False
814
1231
 
815
- try:
816
- data = json.loads(result.stdout or "{}")
817
- except json.JSONDecodeError as exc:
818
- self.logger.debug(
819
- "⚠️ Failed to decode commit details for %s#%s: %s",
820
- repo_full_name,
1232
+ def _post_fix_comment_queued(
1233
+ self,
1234
+ repository: str,
1235
+ pr_number: int,
1236
+ pr_data: Dict,
1237
+ head_sha: Optional[str],
1238
+ agent_cli: str = "claude",
1239
+ ) -> bool:
1240
+ repo_full = self._normalize_repository_name(repository)
1241
+ comment_body = self._build_fix_comment_queued_body(
1242
+ repo_full,
1243
+ pr_number,
1244
+ pr_data,
1245
+ head_sha,
1246
+ agent_cli=agent_cli,
1247
+ )
1248
+
1249
+ try:
1250
+ queued_cmd = [
1251
+ "gh",
1252
+ "pr",
1253
+ "comment",
1254
+ str(pr_number),
1255
+ "--repo",
1256
+ repo_full,
1257
+ "--body",
1258
+ comment_body,
1259
+ ]
1260
+ AutomationUtils.execute_subprocess_with_timeout(
1261
+ queued_cmd,
1262
+ timeout=30,
1263
+ retry_attempts=5,
1264
+ retry_backoff_seconds=1.0,
1265
+ retry_backoff_multiplier=2.0,
1266
+ retry_on_stderr_substrings=(
1267
+ "was submitted too quickly",
1268
+ "secondary rate limit",
1269
+ "API rate limit exceeded",
1270
+ ),
1271
+ )
1272
+ self.logger.info(
1273
+ "✅ Posted fix-comment queued notice on PR #%s (%s)",
1274
+ pr_number,
1275
+ repo_full,
1276
+ )
1277
+ time.sleep(2.0)
1278
+ return True
1279
+ except Exception as exc:
1280
+ self.logger.error(
1281
+ "❌ Failed to post fix-comment queued notice on PR #%s: %s",
1282
+ pr_number,
1283
+ exc,
1284
+ )
1285
+ return False
1286
+
1287
+ def _post_fixpr_queued(
1288
+ self,
1289
+ repository: str,
1290
+ pr_number: int,
1291
+ pr_data: Dict,
1292
+ head_sha: Optional[str],
1293
+ agent_cli: str = "claude",
1294
+ ) -> bool:
1295
+ repo_full = self._normalize_repository_name(repository)
1296
+ comment_body = self._build_fixpr_queued_body(
1297
+ repo_full,
1298
+ pr_number,
1299
+ pr_data,
1300
+ head_sha,
1301
+ agent_cli=agent_cli,
1302
+ )
1303
+
1304
+ try:
1305
+ queued_cmd = [
1306
+ "gh",
1307
+ "pr",
1308
+ "comment",
1309
+ str(pr_number),
1310
+ "--repo",
1311
+ repo_full,
1312
+ "--body",
1313
+ comment_body,
1314
+ ]
1315
+ AutomationUtils.execute_subprocess_with_timeout(
1316
+ queued_cmd,
1317
+ timeout=30,
1318
+ retry_attempts=5,
1319
+ retry_backoff_seconds=1.0,
1320
+ retry_backoff_multiplier=2.0,
1321
+ retry_on_stderr_substrings=(
1322
+ "was submitted too quickly",
1323
+ "secondary rate limit",
1324
+ "API rate limit exceeded",
1325
+ ),
1326
+ )
1327
+ self.logger.info(
1328
+ "✅ Posted fixpr queued notice on PR #%s (%s)",
1329
+ pr_number,
1330
+ repo_full,
1331
+ )
1332
+ time.sleep(2.0)
1333
+ return True
1334
+ except Exception as exc:
1335
+ self.logger.error(
1336
+ "❌ Failed to post fixpr queued notice on PR #%s: %s",
1337
+ pr_number,
1338
+ exc,
1339
+ )
1340
+ return False
1341
+
1342
+ def _start_fix_comment_review_watcher(
1343
+ self,
1344
+ repository: str,
1345
+ pr_number: int,
1346
+ agent_cli: str,
1347
+ ) -> bool:
1348
+ repo_full = self._normalize_repository_name(repository)
1349
+ env = os.environ.copy()
1350
+ pythonpath_parts = [str(ROOT_DIR), str(ROOT_DIR / "automation")]
1351
+ if env.get("PYTHONPATH"):
1352
+ pythonpath_parts.append(env["PYTHONPATH"])
1353
+ env["PYTHONPATH"] = ":".join(pythonpath_parts)
1354
+
1355
+ cmd = [
1356
+ sys.executable,
1357
+ "-m",
1358
+ "jleechanorg_pr_automation.jleechanorg_pr_monitor",
1359
+ "--fix-comment-watch",
1360
+ "--target-pr",
1361
+ str(pr_number),
1362
+ "--target-repo",
1363
+ repo_full,
1364
+ "--fixpr-agent",
1365
+ agent_cli,
1366
+ ]
1367
+ try:
1368
+ subprocess.Popen(
1369
+ cmd,
1370
+ cwd=str(ROOT_DIR),
1371
+ env=env,
1372
+ stdout=subprocess.DEVNULL,
1373
+ stderr=subprocess.DEVNULL,
1374
+ start_new_session=True,
1375
+ )
1376
+ self.logger.info(
1377
+ "🧭 Started fix-comment review watcher for PR #%s (%s)",
1378
+ pr_number,
1379
+ repo_full,
1380
+ )
1381
+ return True
1382
+ except Exception as exc:
1383
+ self.logger.error(
1384
+ "❌ Failed to start fix-comment review watcher for PR #%s: %s",
1385
+ pr_number,
1386
+ exc,
1387
+ )
1388
+ return False
1389
+
1390
+ def run_fix_comment_review_watcher(
1391
+ self,
1392
+ pr_number: int,
1393
+ repository: str,
1394
+ agent_cli: str = "claude",
1395
+ ) -> bool:
1396
+ repo_full = self._normalize_repository_name(repository)
1397
+ cli_chain = [part.strip().lower() for part in str(agent_cli).split(",") if part.strip()]
1398
+ commit_marker_cli = cli_chain[0] if cli_chain else "claude"
1399
+ commit_marker = f"[{commit_marker_cli}-automation-commit]"
1400
+ timeout_seconds = int(os.environ.get("FIX_COMMENT_WATCH_TIMEOUT", "3600"))
1401
+ poll_interval = float(os.environ.get("FIX_COMMENT_WATCH_POLL", "30"))
1402
+ deadline = time.time() + timeout_seconds
1403
+
1404
+ while time.time() < deadline:
1405
+ try:
1406
+ pr_data, head_sha, comments, headlines = self._get_fix_comment_watch_state(
1407
+ repo_full,
1408
+ pr_number,
1409
+ )
1410
+ except Exception as exc:
1411
+ self.logger.warning(
1412
+ "⚠️ Fix-comment watcher failed to fetch PR state for #%s: %s",
1413
+ pr_number,
1414
+ exc,
1415
+ )
1416
+ time.sleep(poll_interval)
1417
+ continue
1418
+
1419
+ if head_sha and self._has_fix_comment_comment_for_commit(comments, head_sha):
1420
+ self.logger.info(
1421
+ "✅ Fix-comment review already posted for PR #%s",
1422
+ pr_number,
1423
+ )
1424
+ return True
1425
+
1426
+ if any(commit_marker in headline for headline in headlines):
1427
+ if self._post_fix_comment_review(repo_full, pr_number, pr_data, head_sha, agent_cli):
1428
+ return True
1429
+ return False
1430
+
1431
+ time.sleep(poll_interval)
1432
+
1433
+ self.logger.warning(
1434
+ "⏳ Fix-comment watcher timed out for PR #%s after %ss",
1435
+ pr_number,
1436
+ timeout_seconds,
1437
+ )
1438
+ return False
1439
+
1440
+ def dispatch_fix_comment_agent(
1441
+ self,
1442
+ repository: str,
1443
+ pr_number: int,
1444
+ pr_data: Dict,
1445
+ agent_cli: str = "claude",
1446
+ model: str = None,
1447
+ ) -> bool:
1448
+ repo_full = self._normalize_repository_name(repository)
1449
+ repo_name = repo_full.split("/")[-1]
1450
+ branch = pr_data.get("headRefName", "")
1451
+ if not branch:
1452
+ branch = f"pr-{pr_number}"
1453
+
1454
+ head_sha = pr_data.get("headRefOid")
1455
+ task_description = self._build_fix_comment_prompt_body(
1456
+ repo_full,
1457
+ pr_number,
1458
+ pr_data,
1459
+ head_sha,
1460
+ agent_cli,
1461
+ )
1462
+
1463
+ pr_payload = {
1464
+ "repo_full": repo_full,
1465
+ "repo": repo_name,
1466
+ "number": pr_number,
1467
+ "branch": branch,
1468
+ }
1469
+
1470
+ try:
1471
+ base_dir = ensure_base_clone(repo_full)
1472
+ with chdir(base_dir):
1473
+ dispatcher = TaskDispatcher()
1474
+ return dispatch_agent_for_pr_with_task(
1475
+ dispatcher,
1476
+ pr_payload,
1477
+ task_description,
1478
+ agent_cli=agent_cli,
1479
+ model=model,
1480
+ )
1481
+ except Exception as exc:
1482
+ self.logger.error(
1483
+ "❌ Failed to dispatch fix-comment agent for %s #%s: %s",
1484
+ repo_full,
1485
+ pr_number,
1486
+ exc,
1487
+ )
1488
+ return False
1489
+
1490
+ def _process_pr_fix_comment(
1491
+ self,
1492
+ repository: str,
1493
+ pr_number: int,
1494
+ pr_data: Dict,
1495
+ agent_cli: str = "claude",
1496
+ model: str = None,
1497
+ ) -> str:
1498
+ repo_full = self._normalize_repository_name(repository)
1499
+ repo_name = repo_full.split("/")[-1]
1500
+ branch_name = pr_data.get("headRefName", "unknown")
1501
+
1502
+ if self.no_act:
1503
+ self.logger.info("🧪 --no-act enabled: skipping fix-comment dispatch for %s #%s", repo_full, pr_number)
1504
+ return "skipped"
1505
+
1506
+ head_sha, comments = self._get_pr_comment_state(repo_full, pr_number)
1507
+ if head_sha and pr_data.get("headRefOid") != head_sha:
1508
+ pr_data = {**pr_data, "headRefOid": head_sha}
1509
+
1510
+ # Check workflow-specific safety limits for fix-comment
1511
+ fix_comment_count = self._count_workflow_comments(comments, "fix_comment")
1512
+ if fix_comment_count >= self.safety_manager.fix_comment_limit:
1513
+ self.logger.info(
1514
+ f"🚫 Safety limits exceeded for PR {repo_full} #{pr_number} (fix-comment); "
1515
+ f"{fix_comment_count}/{self.safety_manager.fix_comment_limit} fix-comment automation comments"
1516
+ )
1517
+ return "skipped"
1518
+
1519
+ # Check if we've already processed this PR with this commit
1520
+ if head_sha and self._should_skip_pr(repo_name, branch_name, pr_number, head_sha):
1521
+ self.logger.info(
1522
+ "⏭️ Skipping PR #%s - already processed commit %s",
1523
+ pr_number,
1524
+ head_sha[:8],
1525
+ )
1526
+ return "skipped"
1527
+
1528
+ if head_sha and self._has_fix_comment_comment_for_commit(comments, head_sha):
1529
+ self.logger.info(
1530
+ "♻️ Fix-comment automation already posted for commit %s on PR #%s, skipping",
1531
+ head_sha[:8],
1532
+ pr_number,
1533
+ )
1534
+ self._record_processed_pr(repo_name, branch_name, pr_number, head_sha)
1535
+ return "skipped"
1536
+
1537
+ if not self.dispatch_fix_comment_agent(repo_full, pr_number, pr_data, agent_cli=agent_cli, model=model):
1538
+ return "failed"
1539
+
1540
+ queued_posted = self._post_fix_comment_queued(repo_full, pr_number, pr_data, head_sha, agent_cli=agent_cli)
1541
+
1542
+ if head_sha:
1543
+ self._record_processed_pr(repo_name, branch_name, pr_number, head_sha)
1544
+
1545
+ if not self._start_fix_comment_review_watcher(
1546
+ repo_full,
1547
+ pr_number,
1548
+ agent_cli=agent_cli,
1549
+ ):
1550
+ self.logger.warning(
1551
+ "⚠️ Failed to start review watcher for PR #%s, but agent is dispatched",
1552
+ pr_number,
1553
+ )
1554
+ return "failed"
1555
+
1556
+ if not queued_posted:
1557
+ self.logger.warning(
1558
+ "⚠️ Queued comment failed for PR #%s, but agent and watcher are running",
1559
+ pr_number,
1560
+ )
1561
+ return "partial"
1562
+
1563
+ return "posted"
1564
+
1565
+ def _process_pr_fixpr(
1566
+ self,
1567
+ repository: str,
1568
+ pr_number: int,
1569
+ pr_data: Dict,
1570
+ agent_cli: str = "claude",
1571
+ model: Optional[str] = None,
1572
+ ) -> str:
1573
+ repo_full = self._normalize_repository_name(repository)
1574
+ repo_name = repo_full.split("/")[-1]
1575
+ branch_name = pr_data.get("headRefName", "unknown")
1576
+
1577
+ if self.no_act:
1578
+ self.logger.info("🧪 --no-act enabled: skipping fixpr dispatch for %s #%s", repo_full, pr_number)
1579
+ return "skipped"
1580
+
1581
+ head_sha, comments = self._get_pr_comment_state(repo_full, pr_number)
1582
+ if head_sha and pr_data.get("headRefOid") != head_sha:
1583
+ pr_data = {**pr_data, "headRefOid": head_sha}
1584
+
1585
+ # Check workflow-specific safety limits for fixpr
1586
+ fixpr_count = self._count_workflow_comments(comments, "fixpr")
1587
+ if fixpr_count >= self.safety_manager.fixpr_limit:
1588
+ self.logger.info(
1589
+ f"🚫 Safety limits exceeded for PR {repo_full} #{pr_number} (fixpr); "
1590
+ f"{fixpr_count}/{self.safety_manager.fixpr_limit} fixpr automation comments"
1591
+ )
1592
+ return "skipped"
1593
+
1594
+ # Check if we've already processed this PR with this commit (reusing skip logic)
1595
+ # FixPR logic typically runs on every push unless skipped by other means.
1596
+ # But for now, let's assume we don't want to loop infinitely on the same commit.
1597
+ if head_sha and self._should_skip_pr(repo_name, branch_name, pr_number, head_sha):
1598
+ self.logger.info(
1599
+ "⏭️ Skipping PR #%s - already processed commit %s",
1600
+ pr_number,
1601
+ head_sha[:8],
1602
+ )
1603
+ return "skipped"
1604
+
1605
+ # Dispatch agent for fixpr
1606
+ try:
1607
+ base_dir = ensure_base_clone(repo_full)
1608
+ with chdir(base_dir):
1609
+ dispatcher = TaskDispatcher()
1610
+ # Prepare PR dict for dispatch_agent_for_pr
1611
+ pr_info = {
1612
+ "repo_full": repo_full,
1613
+ "repo": repo_name,
1614
+ "number": pr_number,
1615
+ "branch": branch_name,
1616
+ }
1617
+ success = dispatch_agent_for_pr(dispatcher, pr_info, agent_cli=agent_cli, model=model)
1618
+
1619
+ if success:
1620
+ queued_posted = self._post_fixpr_queued(repo_full, pr_number, pr_data, head_sha, agent_cli=agent_cli)
1621
+ # Record processing so we don't loop
1622
+ if head_sha:
1623
+ self._record_processed_pr(repo_name, branch_name, pr_number, head_sha)
1624
+
1625
+ if not queued_posted:
1626
+ self.logger.warning(
1627
+ "⚠️ Queued comment failed for PR #%s, but agent is dispatched",
1628
+ pr_number,
1629
+ )
1630
+ return "partial"
1631
+
1632
+ return "posted" # "posted" means action taken
1633
+ else:
1634
+ return "failed"
1635
+ except Exception as exc:
1636
+ self.logger.error(
1637
+ "❌ Failed to dispatch fixpr agent for %s #%s: %s",
1638
+ repo_full,
1639
+ pr_number,
1640
+ exc,
1641
+ )
1642
+ return "failed"
1643
+
1644
+ def _get_pr_comment_state(self, repo_full_name: str, pr_number: int) -> Tuple[Optional[str], List[Dict]]:
1645
+ """Fetch PR comment data needed for Codex comment gating"""
1646
+ view_cmd = [
1647
+ "gh",
1648
+ "pr",
1649
+ "view",
1650
+ str(pr_number),
1651
+ "--repo",
1652
+ repo_full_name,
1653
+ "--json",
1654
+ "headRefOid,comments",
1655
+ ]
1656
+
1657
+ try:
1658
+ result = AutomationUtils.execute_subprocess_with_timeout(
1659
+ view_cmd,
1660
+ timeout=30
1661
+ )
1662
+ pr_data = json.loads(result.stdout or "{}")
1663
+ head_sha = pr_data.get("headRefOid")
1664
+
1665
+ # Handle different comment structures from GitHub API
1666
+ comments_data = pr_data.get("comments", [])
1667
+ if isinstance(comments_data, dict):
1668
+ comments = comments_data.get("nodes", [])
1669
+ elif isinstance(comments_data, list):
1670
+ comments = comments_data
1671
+ else:
1672
+ comments = []
1673
+
1674
+ # Ensure comments are sorted by creation time (oldest first)
1675
+ # GitHub API should return them sorted, but let's be explicit
1676
+ comments.sort(
1677
+ key=lambda c: (c.get("createdAt") or c.get("updatedAt") or "")
1678
+ )
1679
+
1680
+ return head_sha, comments
1681
+ except subprocess.CalledProcessError as e:
1682
+ error_message = e.stderr.strip() if e.stderr else str(e)
1683
+ self.logger.warning(
1684
+ f"⚠️ Failed to fetch PR comment state for PR #{pr_number}: {error_message}"
1685
+ )
1686
+ except json.JSONDecodeError as e:
1687
+ self.logger.warning(
1688
+ f"⚠️ Failed to parse PR comment state for PR #{pr_number}: {e}"
1689
+ )
1690
+
1691
+ return None, []
1692
+
1693
+ def _get_head_commit_details(
1694
+ self,
1695
+ repo_full_name: str,
1696
+ pr_number: int,
1697
+ expected_sha: Optional[str] = None,
1698
+ ) -> Optional[Dict[str, Optional[str]]]:
1699
+ """Fetch metadata for the PR head commit using the GitHub GraphQL API."""
1700
+
1701
+ if "/" not in repo_full_name:
1702
+ self.logger.debug(
1703
+ "⚠️ Cannot fetch commit details for %s - invalid repo format",
1704
+ repo_full_name,
1705
+ )
1706
+ return None
1707
+
1708
+ owner, name = repo_full_name.split("/", 1)
1709
+
1710
+ # Validate GitHub naming constraints (alphanumeric, hyphens, periods, underscores, max 100 chars)
1711
+ github_name_pattern = re.compile(r"^[a-zA-Z0-9]([a-zA-Z0-9\-\._]{0,98}[a-zA-Z0-9])?$")
1712
+ if not github_name_pattern.match(owner) or not github_name_pattern.match(name):
1713
+ self.logger.warning(
1714
+ "⚠️ Invalid GitHub identifiers: owner='%s', name='%s'",
1715
+ owner,
1716
+ name,
1717
+ )
1718
+ return None
1719
+
1720
+ # Validate PR number is positive integer
1721
+ if not isinstance(pr_number, int) or pr_number <= 0:
1722
+ self.logger.warning("⚠️ Invalid PR number: %s", pr_number)
1723
+ return None
1724
+
1725
+ cmd = [
1726
+ "gh",
1727
+ "api",
1728
+ "graphql",
1729
+ "-f",
1730
+ f"query={self._HEAD_COMMIT_DETAILS_QUERY}",
1731
+ "-f",
1732
+ f"owner={owner}",
1733
+ "-f",
1734
+ f"name={name}",
1735
+ "-F",
1736
+ f"prNumber={pr_number}",
1737
+ ]
1738
+
1739
+ try:
1740
+ result = AutomationUtils.execute_subprocess_with_timeout(cmd, timeout=30)
1741
+ except subprocess.CalledProcessError as exc:
1742
+ self.logger.debug(
1743
+ "⚠️ Failed to fetch head commit details for %s#%s: %s",
1744
+ repo_full_name,
1745
+ pr_number,
1746
+ exc.stderr or exc,
1747
+ )
1748
+ return None
1749
+ except Exception as exc:
1750
+ self.logger.debug(
1751
+ "⚠️ Error executing head commit lookup for %s#%s: %s",
1752
+ repo_full_name,
1753
+ pr_number,
1754
+ exc,
1755
+ )
1756
+ return None
1757
+
1758
+ try:
1759
+ data = json.loads(result.stdout or "{}")
1760
+ except json.JSONDecodeError as exc:
1761
+ self.logger.debug(
1762
+ "⚠️ Failed to decode commit details for %s#%s: %s",
1763
+ repo_full_name,
821
1764
  pr_number,
822
1765
  exc,
823
1766
  )
@@ -886,6 +1829,32 @@ Use your judgment to fix comments from everyone or explain why it should not be
886
1829
 
887
1830
  return comment_body[start_index:end_index].strip()
888
1831
 
1832
+ def _extract_fix_comment_marker(self, comment_body: str) -> Optional[str]:
1833
+ """Extract commit SHA from fix-comment automation comments.
1834
+
1835
+ Handles both old format (SHA) and new format (SHA:cli).
1836
+ Returns just the SHA portion for comparison.
1837
+ """
1838
+ if not comment_body:
1839
+ return None
1840
+
1841
+ prefix_index = comment_body.find(self.FIX_COMMENT_MARKER_PREFIX)
1842
+ if prefix_index == -1:
1843
+ return None
1844
+
1845
+ start_index = prefix_index + len(self.FIX_COMMENT_MARKER_PREFIX)
1846
+ end_index = comment_body.find(self.FIX_COMMENT_MARKER_SUFFIX, start_index)
1847
+ if end_index == -1:
1848
+ return None
1849
+
1850
+ marker_content = comment_body[start_index:end_index].strip()
1851
+ # Handle new format: SHA:cli -> extract just SHA
1852
+ # Also handles old format: SHA (no colon)
1853
+ if ":" in marker_content:
1854
+ marker_content = marker_content.split(":")[0]
1855
+
1856
+ return marker_content
1857
+
889
1858
  def _has_codex_comment_for_commit(self, comments: List[Dict], head_sha: str) -> bool:
890
1859
  """Determine if Codex instruction already exists for the latest commit"""
891
1860
  if not head_sha:
@@ -899,6 +1868,19 @@ Use your judgment to fix comments from everyone or explain why it should not be
899
1868
 
900
1869
  return False
901
1870
 
1871
+ def _has_fix_comment_comment_for_commit(self, comments: List[Dict], head_sha: str) -> bool:
1872
+ """Determine if fix-comment automation already ran for the latest commit."""
1873
+ if not head_sha:
1874
+ return False
1875
+
1876
+ for comment in comments:
1877
+ body = comment.get("body", "")
1878
+ marker_sha = self._extract_fix_comment_marker(body)
1879
+ if marker_sha and marker_sha == head_sha and self.FIX_COMMENT_COMPLETION_MARKER in body:
1880
+ return True
1881
+
1882
+ return False
1883
+
902
1884
  def _is_head_commit_from_codex(
903
1885
  self, commit_details: Optional[Dict[str, Optional[str]]]
904
1886
  ) -> bool:
@@ -933,6 +1915,172 @@ Use your judgment to fix comments from everyone or explain why it should not be
933
1915
 
934
1916
  return False
935
1917
 
1918
+ def _is_github_bot_comment(self, comment: Dict) -> bool:
1919
+ """Check if comment is from a GitHub bot (not Codex/AI automation).
1920
+
1921
+ Detection order matters:
1922
+ 1. Check KNOWN_GITHUB_BOTS first (these are review bots we want to detect)
1923
+ 2. Then check [bot] suffix for other bots
1924
+ 3. Only exclude codex patterns for bots NOT in our known list
1925
+ """
1926
+ author_login = self._get_comment_author_login(comment)
1927
+ if not author_login:
1928
+ return False
1929
+
1930
+ lower_login = author_login.lower()
1931
+
1932
+ # Strip [bot] suffix for known bot comparison (handles both "coderabbitai" and "coderabbitai[bot]")
1933
+ base_login = lower_login.removesuffix("[bot]")
1934
+
1935
+ # Check known review bots FIRST (before codex pattern exclusion)
1936
+ # These are legitimate review bots whose comments should trigger re-processing
1937
+ if base_login in self.KNOWN_GITHUB_BOTS:
1938
+ return True
1939
+
1940
+ # GitHub bots have [bot] suffix - but exclude our own automation bots
1941
+ # Use case-insensitive check for robustness
1942
+ if lower_login.endswith("[bot]"):
1943
+ # Exclude our own Codex/AI automation bots (chatgpt-codex-connector[bot], etc.)
1944
+ for pattern in self._codex_actor_patterns:
1945
+ if pattern.search(lower_login):
1946
+ return False
1947
+ return True
1948
+
1949
+ return False
1950
+
1951
+ def _get_last_codex_automation_comment_time(self, comments: List[Dict]) -> Optional[str]:
1952
+ """Find the timestamp of the last automation comment (any workflow marker).
1953
+
1954
+ Note: Despite the method name, this checks ALL automation workflow markers
1955
+ (codex, fix-comment, fixpr) to prevent rerun gating misfires with multiple workflows.
1956
+ """
1957
+ last_time = None
1958
+
1959
+ for comment in comments:
1960
+ body = comment.get("body", "")
1961
+ # Check if this is ANY automation comment (any workflow marker)
1962
+ if (
1963
+ self.CODEX_COMMIT_MARKER_PREFIX in body
1964
+ or self.FIX_COMMENT_MARKER_PREFIX in body
1965
+ or self.FIX_COMMENT_RUN_MARKER_PREFIX in body
1966
+ or self.FIXPR_MARKER_PREFIX in body
1967
+ ):
1968
+ created_at = comment.get("createdAt") or comment.get("updatedAt")
1969
+ if created_at and (last_time is None or created_at > last_time):
1970
+ last_time = created_at
1971
+
1972
+ return last_time
1973
+
1974
+ def _count_codex_automation_comments(self, comments: List[Dict]) -> int:
1975
+ """Count the number of Codex automation comments (with commit marker).
1976
+
1977
+ This is used for safety limits - we only count comments that contain
1978
+ the CODEX_COMMIT_MARKER_PREFIX, not all comments from jleechan2015.
1979
+ """
1980
+ count = 0
1981
+ for comment in comments:
1982
+ body = comment.get("body", "")
1983
+ if self.CODEX_COMMIT_MARKER_PREFIX in body or self.FIX_COMMENT_MARKER_PREFIX in body:
1984
+ count += 1
1985
+ return count
1986
+
1987
+ def _count_workflow_comments(self, comments: List[Dict], workflow_type: str) -> int:
1988
+ """Count automation comments for a specific workflow type.
1989
+
1990
+ Args:
1991
+ comments: List of PR comments
1992
+ workflow_type: One of 'pr_automation', 'fix_comment', 'codex_update', 'fixpr'
1993
+
1994
+ Returns:
1995
+ Count of comments matching the workflow type
1996
+
1997
+ Note: codex_update workflow operates via browser automation (not PR comments),
1998
+ so count is always 0. The limit is configured but unused, reserved for future
1999
+ compatibility if codex_update ever posts PR comments.
2000
+ """
2001
+ # codex_update doesn't post PR comments, so always returns 0
2002
+ # Limit is configured but unused (reserved for future compatibility)
2003
+ if workflow_type == "codex_update":
2004
+ return 0
2005
+
2006
+ count = 0
2007
+ for comment in comments:
2008
+ body = comment.get("body", "")
2009
+
2010
+ if workflow_type == "pr_automation":
2011
+ # PR automation uses codex-automation-commit marker (but not fix-comment or fixpr)
2012
+ if (
2013
+ self.CODEX_COMMIT_MARKER_PREFIX in body
2014
+ and self.FIX_COMMENT_MARKER_PREFIX not in body
2015
+ and self.FIX_COMMENT_RUN_MARKER_PREFIX not in body
2016
+ and self.FIXPR_MARKER_PREFIX not in body
2017
+ ):
2018
+ count += 1
2019
+ elif workflow_type == "fix_comment":
2020
+ # Fix-comment workflow uses dedicated markers for queued runs + completion.
2021
+ # Only count if posted by the automation user, not bots echoing the marker
2022
+ if self.FIX_COMMENT_RUN_MARKER_PREFIX in body or self.FIX_COMMENT_MARKER_PREFIX in body:
2023
+ author = self._get_comment_author_login(comment)
2024
+ # Only count comments from the automation user
2025
+ # Exclude bot replies that might echo the marker in quoted text or scripts
2026
+ if author == self.automation_username:
2027
+ count += 1
2028
+ elif workflow_type == "fixpr":
2029
+ # FixPR workflow uses a dedicated marker in its queued comment.
2030
+ # Only count if posted by the automation user, not bots echoing the marker
2031
+ if self.FIXPR_MARKER_PREFIX in body:
2032
+ author = self._get_comment_author_login(comment)
2033
+ if author == self.automation_username:
2034
+ count += 1
2035
+ else:
2036
+ # Fallback: count all automation comments
2037
+ if (
2038
+ self.CODEX_COMMIT_MARKER_PREFIX in body
2039
+ or self.FIX_COMMENT_MARKER_PREFIX in body
2040
+ or self.FIX_COMMENT_RUN_MARKER_PREFIX in body
2041
+ or self.FIXPR_MARKER_PREFIX in body
2042
+ ):
2043
+ count += 1
2044
+ return count
2045
+
2046
+ def _has_new_bot_comments_since_codex(self, comments: List[Dict]) -> bool:
2047
+ """Check if there are new GitHub bot comments since the last Codex automation comment.
2048
+
2049
+ This allows automation to run even when head commit is from Codex if
2050
+ there are new bot comments (like CI failures, review bot comments) that
2051
+ need attention.
2052
+ """
2053
+ last_codex_time = self._get_last_codex_automation_comment_time(comments)
2054
+
2055
+ # If no Codex automation comment exists, treat any bot comment as new
2056
+ if not last_codex_time:
2057
+ for comment in comments:
2058
+ if self._is_github_bot_comment(comment):
2059
+ created_at = comment.get("createdAt") or comment.get("updatedAt")
2060
+ self.logger.debug(
2061
+ "🤖 Found bot comment from %s at %s with no prior Codex automation comment",
2062
+ self._get_comment_author_login(comment),
2063
+ created_at,
2064
+ )
2065
+ return True
2066
+ return False
2067
+
2068
+ for comment in comments:
2069
+ if not self._is_github_bot_comment(comment):
2070
+ continue
2071
+
2072
+ created_at = comment.get("createdAt") or comment.get("updatedAt")
2073
+ if created_at and created_at > last_codex_time:
2074
+ self.logger.debug(
2075
+ "🤖 Found new bot comment from %s at %s (after Codex comment at %s)",
2076
+ self._get_comment_author_login(comment),
2077
+ created_at,
2078
+ last_codex_time,
2079
+ )
2080
+ return True
2081
+
2082
+ return False
2083
+
936
2084
  def _get_comment_author_login(self, comment: Dict) -> str:
937
2085
  """Return normalized author login for a comment."""
938
2086
  author = comment.get("author") or comment.get("user") or {}
@@ -977,7 +2125,16 @@ Use your judgment to fix comments from everyone or explain why it should not be
977
2125
 
978
2126
  return False
979
2127
 
980
- def process_single_pr_by_number(self, pr_number: int, repository: str) -> bool:
2128
+ def process_single_pr_by_number(
2129
+ self,
2130
+ pr_number: int,
2131
+ repository: str,
2132
+ *,
2133
+ fix_comment: bool = False,
2134
+ fixpr: bool = False,
2135
+ agent_cli: str = "claude",
2136
+ model: str = None,
2137
+ ) -> bool:
981
2138
  """Process a specific PR by number and repository"""
982
2139
  repo_full = self._normalize_repository_name(repository)
983
2140
  self.logger.info(f"🎯 Processing target PR: {repo_full} #{pr_number}")
@@ -988,9 +2145,41 @@ Use your judgment to fix comments from everyone or explain why it should not be
988
2145
  return False
989
2146
 
990
2147
  try:
2148
+ # Check workflow-specific safety limits
2149
+ workflow_type = self._determine_workflow_type(fix_comment, fixpr)
2150
+ _, comments = self._get_pr_comment_state(repo_full, pr_number)
2151
+ automation_comment_count = self._count_workflow_comments(comments, workflow_type)
2152
+
2153
+ # Get workflow-specific limit
2154
+ if workflow_type == "fix_comment":
2155
+ workflow_limit = self.safety_manager.fix_comment_limit
2156
+ elif workflow_type == "fixpr":
2157
+ workflow_limit = self.safety_manager.fixpr_limit
2158
+ elif workflow_type == "pr_automation":
2159
+ workflow_limit = self.safety_manager.pr_automation_limit
2160
+ else:
2161
+ workflow_limit = self.safety_manager.pr_limit # Fallback
2162
+
2163
+ if automation_comment_count >= workflow_limit:
2164
+ self.logger.warning(
2165
+ f"🚫 Safety limits exceeded for PR {repo_full} #{pr_number} ({workflow_type}); "
2166
+ f"{automation_comment_count}/{workflow_limit} automation comments"
2167
+ )
2168
+ # Not an execution failure: we're intentionally skipping to avoid spamming.
2169
+ return True
2170
+
2171
+ if self.no_act:
2172
+ self.logger.info(
2173
+ "🧪 --no-act enabled: skipping processing for %s #%s (would run %s)",
2174
+ repo_full,
2175
+ pr_number,
2176
+ workflow_type,
2177
+ )
2178
+ return True
2179
+
991
2180
  # Check safety limits for this specific PR first
992
2181
  if not self.safety_manager.try_process_pr(pr_number, repo=repo_full):
993
- self.logger.warning(f"🚫 Safety limits exceeded for PR {repo_full} #{pr_number}")
2182
+ self.logger.warning(f"🚫 Internal safety limits exceeded for PR {repo_full} #{pr_number}")
994
2183
  return False
995
2184
 
996
2185
  # Only record global run AFTER confirming we can process the PR
@@ -1007,25 +2196,43 @@ Use your judgment to fix comments from everyone or explain why it should not be
1007
2196
  try:
1008
2197
  # Get PR details using gh CLI
1009
2198
  result = AutomationUtils.execute_subprocess_with_timeout(
1010
- ["gh", "pr", "view", str(pr_number), "--repo", repo_full, "--json", "title,headRefName,baseRefName,url,author"],
2199
+ ["gh", "pr", "view", str(pr_number), "--repo", repo_full, "--json", "title,headRefName,baseRefName,url,author,headRefOid,mergeable"],
1011
2200
  timeout=30
1012
2201
  )
1013
2202
  pr_data = json.loads(result.stdout)
1014
2203
 
1015
2204
  self.logger.info(f"📝 Found PR: {pr_data['title']}")
1016
2205
 
1017
- # Post codex instruction comment
1018
- comment_result = self.post_codex_instruction_simple(repo_full, pr_number, pr_data)
1019
- success = comment_result == "posted"
1020
-
1021
- # Record PR processing attempt with result
1022
- result = "success" if success else "failure"
1023
- self.safety_manager.record_pr_attempt(
1024
- pr_number,
1025
- result,
1026
- repo=repo_full,
1027
- branch=pr_data.get('headRefName'),
1028
- )
2206
+ if fix_comment:
2207
+ comment_result = self._process_pr_fix_comment(
2208
+ repo_full,
2209
+ pr_number,
2210
+ pr_data,
2211
+ agent_cli=agent_cli,
2212
+ model=model,
2213
+ )
2214
+ elif fixpr:
2215
+ comment_result = self._process_pr_fixpr(
2216
+ repo_full,
2217
+ pr_number,
2218
+ pr_data,
2219
+ agent_cli=agent_cli,
2220
+ model=model,
2221
+ )
2222
+ else:
2223
+ # Post codex instruction comment
2224
+ comment_result = self.post_codex_instruction_simple(repo_full, pr_number, pr_data)
2225
+ # Treat "skipped" as a neutral outcome: do not count it as failure,
2226
+ # and avoid recording an unbounded stream of skipped attempts.
2227
+ success = comment_result in {"posted", "skipped"}
2228
+ if comment_result != "skipped":
2229
+ result = "success" if comment_result == "posted" else "failure"
2230
+ self.safety_manager.record_pr_attempt(
2231
+ pr_number,
2232
+ result,
2233
+ repo=repo_full,
2234
+ branch=pr_data.get("headRefName"),
2235
+ )
1029
2236
 
1030
2237
  if success:
1031
2238
  self.logger.info(f"✅ Successfully processed target PR {repo_full} #{pr_number}")
@@ -1049,9 +2256,10 @@ Use your judgment to fix comments from everyone or explain why it should not be
1049
2256
  self.logger.debug("Traceback: %s", traceback.format_exc())
1050
2257
  return False
1051
2258
 
1052
- def run_monitoring_cycle(self, single_repo=None, max_prs=10):
2259
+ def run_monitoring_cycle(self, single_repo=None, max_prs=10, cutoff_hours: int = 24, fix_comment: bool = False, fixpr: bool = False, agent_cli: str = "claude", model: str = None):
1053
2260
  """Run a complete monitoring cycle with actionable PR counting"""
1054
- self.logger.info("🚀 Starting jleechanorg PR monitoring cycle")
2261
+ mode_label = "fix-comment" if fix_comment else ("fixpr" if fixpr else "comment")
2262
+ self.logger.info("🚀 Starting jleechanorg PR monitoring cycle (%s mode)", mode_label)
1055
2263
 
1056
2264
  if not self.safety_manager.can_start_global_run():
1057
2265
  current_runs = self.safety_manager.get_global_runs()
@@ -1066,7 +2274,7 @@ Use your judgment to fix comments from everyone or explain why it should not be
1066
2274
  global_run_recorded = self.wrapper_managed
1067
2275
 
1068
2276
  try:
1069
- open_prs = self.discover_open_prs()
2277
+ open_prs = self.discover_open_prs(cutoff_hours=cutoff_hours)
1070
2278
  except Exception as exc:
1071
2279
  self.logger.error("❌ Failed to discover PRs: %s", exc)
1072
2280
  self.logger.debug("Traceback: %s", traceback.format_exc())
@@ -1102,11 +2310,68 @@ Use your judgment to fix comments from everyone or explain why it should not be
1102
2310
  skipped_count += 1
1103
2311
  continue
1104
2312
 
1105
- branch_name = pr.get('headRefName', 'unknown')
2313
+ # For fixpr, perform additional eligibility check (conflicts or failing checks)
2314
+ if fixpr:
2315
+ # We need to fetch more details to know if it's eligible
2316
+ # This mirrors logic in list_actionable_prs/run_fixpr_batch
2317
+ try:
2318
+ # Check mergeable status first (lightweight if available in discover_open_prs? No, it's not)
2319
+ # We need to query it.
2320
+ # Let's assume we need to check failing checks too.
2321
+ # We can use has_failing_checks from orchestrated_pr_runner
2322
+ is_conflicting = False # We'd need to fetch mergeable status
2323
+
2324
+ # Fetch mergeable status
2325
+ result = AutomationUtils.execute_subprocess_with_timeout(
2326
+ ["gh", "pr", "view", str(pr_number), "--repo", repo_full_name, "--json", "mergeable"],
2327
+ timeout=30, check=False
2328
+ )
2329
+ if result.returncode == 0:
2330
+ data = json.loads(result.stdout)
2331
+ if data.get("mergeable") == "CONFLICTING":
2332
+ is_conflicting = True
2333
+
2334
+ is_failing = has_failing_checks(repo_full_name, pr_number)
2335
+
2336
+ if not (is_conflicting or is_failing):
2337
+ self.logger.debug(f"⏭️ Skipping PR #{pr_number} (fixpr) - no conflicts or failing checks")
2338
+ skipped_count += 1
2339
+ continue
2340
+
2341
+ except Exception as e:
2342
+ self.logger.warning(f"⚠️ Error checking fixpr eligibility for #{pr_number} ({type(e).__name__}): {e}")
2343
+ skipped_count += 1
2344
+ continue
2345
+
2346
+ branch_name = pr.get("headRefName", "unknown")
2347
+
2348
+ # Check automation comment count on GitHub (not internal attempts)
2349
+ # Determine workflow type based on mode
2350
+ workflow_type = self._determine_workflow_type(fix_comment, fixpr)
2351
+ _, comments = self._get_pr_comment_state(repo_full_name, pr_number)
2352
+ automation_comment_count = self._count_workflow_comments(comments, workflow_type)
2353
+
2354
+ # Get workflow-specific limit
2355
+ if workflow_type == "fix_comment":
2356
+ workflow_limit = self.safety_manager.fix_comment_limit
2357
+ elif workflow_type == "fixpr":
2358
+ workflow_limit = self.safety_manager.fixpr_limit
2359
+ elif workflow_type == "pr_automation":
2360
+ workflow_limit = self.safety_manager.pr_automation_limit
2361
+ else:
2362
+ workflow_limit = self.safety_manager.pr_limit # Fallback
2363
+
2364
+ if automation_comment_count >= workflow_limit:
2365
+ self.logger.info(
2366
+ f"🚫 Safety limits exceeded for PR {repo_full_name} #{pr_number} ({workflow_type}); "
2367
+ f"{automation_comment_count}/{workflow_limit} automation comments"
2368
+ )
2369
+ skipped_count += 1
2370
+ continue
1106
2371
 
1107
2372
  if not self.safety_manager.try_process_pr(pr_number, repo=repo_full_name, branch=branch_name):
1108
2373
  self.logger.info(
1109
- f"🚫 Safety limits exceeded for PR {repo_full_name} #{pr_number}; skipping"
2374
+ f"🚫 Internal safety limits exceeded for PR {repo_full_name} #{pr_number}; skipping"
1110
2375
  )
1111
2376
  skipped_count += 1
1112
2377
  continue
@@ -1125,24 +2390,56 @@ Use your judgment to fix comments from everyone or explain why it should not be
1125
2390
  self.safety_manager.global_limit,
1126
2391
  )
1127
2392
 
1128
- # Post codex instruction comment directly (comment-only approach)
1129
- comment_result = self.post_codex_instruction_simple(repo_full_name, pr_number, pr)
1130
- success = comment_result == "posted"
1131
-
1132
- result = "success" if success else "failure"
1133
- self.safety_manager.record_pr_attempt(
1134
- pr_number,
1135
- result,
1136
- repo=repo_full_name,
1137
- branch=branch_name,
1138
- )
1139
- attempt_recorded = True
2393
+ if fix_comment:
2394
+ comment_result = self._process_pr_fix_comment(
2395
+ repo_full_name,
2396
+ pr_number,
2397
+ pr,
2398
+ agent_cli=agent_cli,
2399
+ model=model,
2400
+ )
2401
+ elif fixpr:
2402
+ comment_result = self._process_pr_fixpr(
2403
+ repo_full_name,
2404
+ pr_number,
2405
+ pr,
2406
+ agent_cli=agent_cli,
2407
+ model=model,
2408
+ )
2409
+ else:
2410
+ # Post codex instruction comment directly (comment-only approach)
2411
+ comment_result = self.post_codex_instruction_simple(repo_full_name, pr_number, pr)
2412
+
2413
+ # Treat "skipped" as a neutral outcome: do not count it as failure,
2414
+ # and avoid recording an unbounded stream of skipped attempts.
2415
+ success = comment_result in {"posted", "skipped"}
2416
+ if comment_result != "skipped":
2417
+ result = "success" if comment_result == "posted" else "failure"
2418
+ self.safety_manager.record_pr_attempt(
2419
+ pr_number,
2420
+ result,
2421
+ repo=repo_full_name,
2422
+ branch=branch_name,
2423
+ )
2424
+ attempt_recorded = True
1140
2425
 
1141
2426
  if success:
1142
- self.logger.info(f"✅ Successfully processed PR {repo_full_name} #{pr_number}")
1143
- actionable_processed += 1
2427
+ # Only count as processed when we actually posted; skips should not inflate stats.
2428
+ if comment_result == "posted":
2429
+ actionable_processed += 1
2430
+ self.logger.info(
2431
+ "✅ Successfully processed PR %s #%s (result=%s)",
2432
+ repo_full_name,
2433
+ pr_number,
2434
+ comment_result,
2435
+ )
1144
2436
  else:
1145
- self.logger.error(f"❌ Failed to process PR {repo_full_name} #{pr_number}")
2437
+ self.logger.error(
2438
+ "❌ Failed to process PR %s #%s (result=%s)",
2439
+ repo_full_name,
2440
+ pr_number,
2441
+ comment_result,
2442
+ )
1146
2443
  except Exception as e:
1147
2444
  self.logger.error(f"❌ Exception processing PR {repo_full_name} #{pr_number}: {e}")
1148
2445
  self.logger.debug("Traceback: %s", traceback.format_exc())
@@ -1157,30 +2454,436 @@ Use your judgment to fix comments from everyone or explain why it should not be
1157
2454
  self.logger.info(f"🏁 Monitoring cycle complete: {actionable_processed} actionable PRs processed, {skipped_count} skipped")
1158
2455
 
1159
2456
 
2457
+ def check_chrome_cdp_accessible(port=9222, host="127.0.0.1", timeout=5):
2458
+ """
2459
+ Validate that Chrome DevTools Protocol is accessible.
2460
+
2461
+ Args:
2462
+ port: CDP port (default 9222)
2463
+ host: CDP host (default 127.0.0.1)
2464
+ timeout: Connection timeout in seconds
2465
+
2466
+ Returns:
2467
+ tuple: (bool, str) - (success, message)
2468
+ """
2469
+ url_host = _format_cdp_host_for_url(host)
2470
+ url = f"http://{url_host}:{port}/json/version"
2471
+ try:
2472
+ req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
2473
+ with urllib.request.urlopen(req, timeout=timeout) as response:
2474
+ data = json.loads(response.read().decode())
2475
+ browser_version = data.get("Browser", "Unknown")
2476
+ return True, f"✅ Chrome CDP accessible (version: {browser_version})"
2477
+ except urllib.error.URLError as e:
2478
+ return False, f"❌ Chrome CDP not accessible at {host}:{port} - {e.reason}"
2479
+ except Exception as e:
2480
+ return False, f"❌ Failed to connect to Chrome CDP: {e}"
2481
+
2482
+
2483
+ def _parse_bool_env(name: str, default: bool = True) -> bool:
2484
+ raw = os.environ.get(name)
2485
+ if raw is None:
2486
+ return default
2487
+ raw = raw.strip()
2488
+ if not raw:
2489
+ return default
2490
+ return raw.lower() not in {"0", "false", "no", "off"}
2491
+
2492
+
2493
+ def _validate_cdp_host(raw_host: str) -> str:
2494
+ allowed_hosts = {"127.0.0.1", "localhost", "::1"}
2495
+ host = (raw_host or "").strip()
2496
+ if host in allowed_hosts:
2497
+ return host
2498
+
2499
+ print(
2500
+ f"WARNING: Ignoring unsafe CODEX_CDP_HOST value {host!r}; "
2501
+ "only localhost/127.0.0.1/::1 are allowed. Falling back to 127.0.0.1.",
2502
+ file=sys.stderr,
2503
+ )
2504
+ return "127.0.0.1"
2505
+
2506
+
2507
+ def _format_cdp_host_for_url(host: str) -> str:
2508
+ if ":" in host and not (host.startswith("[") and host.endswith("]")):
2509
+ return f"[{host}]"
2510
+ return host
2511
+
2512
+
2513
+ def _resolve_cdp_host_port() -> Tuple[str, int]:
2514
+ raw_host = os.environ.get("CODEX_CDP_HOST", "127.0.0.1")
2515
+ host = _validate_cdp_host(raw_host)
2516
+ port_raw = os.environ.get("CODEX_CDP_PORT", "9222")
2517
+ try:
2518
+ port = int(port_raw)
2519
+ if not (1 <= port <= 65535):
2520
+ raise ValueError(f"Port {port} out of range")
2521
+ except ValueError:
2522
+ port = 9222
2523
+ return host, port
2524
+
2525
+
2526
+ def _detect_chrome_binary() -> Optional[str]:
2527
+ if sys.platform == "win32":
2528
+ win_candidates = [
2529
+ Path(os.environ.get("PROGRAMFILES", "C:\\Program Files"))
2530
+ / "Google/Chrome/Application/chrome.exe",
2531
+ Path(os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)"))
2532
+ / "Google/Chrome/Application/chrome.exe",
2533
+ ]
2534
+ for candidate in win_candidates:
2535
+ if candidate.exists():
2536
+ return str(candidate)
2537
+
2538
+ if sys.platform == "darwin":
2539
+ mac_candidates = [
2540
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
2541
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
2542
+ ]
2543
+ for candidate in mac_candidates:
2544
+ if Path(candidate).exists():
2545
+ return candidate
2546
+
2547
+ for command in ("google-chrome", "google-chrome-stable", "chromium", "chromium-browser"):
2548
+ found = shutil.which(command)
2549
+ if found:
2550
+ return found
2551
+ return None
2552
+
2553
+
2554
+ def _start_chrome_debug(port: int, user_data_dir: str) -> Tuple[bool, str]:
2555
+ start_script = os.environ.get("CODEX_CDP_START_SCRIPT")
2556
+ if start_script:
2557
+ try:
2558
+ cmd = shlex.split(start_script)
2559
+ except ValueError as exc:
2560
+ return False, f"❌ Invalid CODEX_CDP_START_SCRIPT value ({start_script}): {exc}"
2561
+ if not cmd:
2562
+ return False, "❌ CODEX_CDP_START_SCRIPT is set but empty after parsing"
2563
+
2564
+ script_path = Path(cmd[0]).expanduser()
2565
+ if not script_path.is_file():
2566
+ return False, f"❌ CODEX_CDP_START_SCRIPT target does not exist or is not a file: {script_path}"
2567
+ try:
2568
+ script_path_resolved = script_path.resolve()
2569
+ except OSError as exc:
2570
+ return False, f"❌ Failed to resolve CODEX_CDP_START_SCRIPT path ({script_path}): {exc}"
2571
+
2572
+ cmd[0] = str(script_path_resolved)
2573
+ cmd.append(str(port))
2574
+ try:
2575
+ subprocess.Popen(
2576
+ cmd,
2577
+ stdout=subprocess.DEVNULL,
2578
+ stderr=subprocess.DEVNULL,
2579
+ start_new_session=True,
2580
+ )
2581
+ return True, f"🚀 Started Chrome via script {script_path_resolved} on port {port}"
2582
+ except Exception as exc:
2583
+ return False, f"❌ Failed to run CODEX_CDP_START_SCRIPT ({script_path_resolved}): {exc}"
2584
+
2585
+ chrome_path = _detect_chrome_binary()
2586
+ if not chrome_path:
2587
+ return False, "❌ Could not find Chrome or Chromium binary"
2588
+
2589
+ resolved_user_data_dir = Path(user_data_dir).expanduser()
2590
+ if not resolved_user_data_dir.is_absolute():
2591
+ resolved_user_data_dir = (Path.home() / resolved_user_data_dir).resolve()
2592
+ else:
2593
+ resolved_user_data_dir = resolved_user_data_dir.resolve()
2594
+ home_dir = Path.home().resolve()
2595
+ try:
2596
+ resolved_user_data_dir.relative_to(home_dir)
2597
+ except ValueError:
2598
+ return False, (
2599
+ "❌ CODEX_CDP_USER_DATA_DIR must reside under your home directory; "
2600
+ f"got {resolved_user_data_dir}"
2601
+ )
2602
+ resolved_user_data_dir.mkdir(parents=True, exist_ok=True)
2603
+ command = [
2604
+ chrome_path,
2605
+ f"--remote-debugging-port={port}",
2606
+ f"--user-data-dir={resolved_user_data_dir}",
2607
+ "--window-size=1920,1080",
2608
+ "https://chatgpt.com/",
2609
+ ]
2610
+ try:
2611
+ subprocess.Popen(
2612
+ command,
2613
+ stdout=subprocess.DEVNULL,
2614
+ stderr=subprocess.DEVNULL,
2615
+ start_new_session=True,
2616
+ )
2617
+ return True, f"🚀 Started Chrome with CDP on port {port}"
2618
+ except Exception as exc:
2619
+ return False, f"❌ Failed to start Chrome with CDP: {exc}"
2620
+
2621
+
2622
+ def ensure_chrome_cdp_accessible(timeout: Optional[int] = None) -> Tuple[bool, str]:
2623
+ host, port = _resolve_cdp_host_port()
2624
+ if timeout is None:
2625
+ timeout_raw = os.environ.get("CODEX_CDP_START_TIMEOUT", "20")
2626
+ try:
2627
+ timeout = int(timeout_raw)
2628
+ except ValueError:
2629
+ timeout = 20
2630
+ try:
2631
+ timeout = int(timeout)
2632
+ except (TypeError, ValueError):
2633
+ timeout = 20
2634
+ if timeout <= 0:
2635
+ timeout = 20
2636
+ ok, message = check_chrome_cdp_accessible(port=port, host=host)
2637
+ if ok:
2638
+ return True, message
2639
+
2640
+ auto_start = _parse_bool_env("CODEX_CDP_AUTO_START", default=True)
2641
+ if not auto_start:
2642
+ return False, message
2643
+
2644
+ user_data_dir = os.environ.get("CODEX_CDP_USER_DATA_DIR", str(Path.home() / ".chrome-automation-profile"))
2645
+ started, start_message = _start_chrome_debug(port, user_data_dir)
2646
+ if not started:
2647
+ return False, start_message
2648
+
2649
+ deadline = time.time() + timeout
2650
+ last_message = message
2651
+ while True:
2652
+ remaining = deadline - time.time()
2653
+ if remaining <= 0:
2654
+ break
2655
+ per_check_timeout = min(1.0, remaining)
2656
+ ok, last_message = check_chrome_cdp_accessible(
2657
+ port=port,
2658
+ host=host,
2659
+ timeout=per_check_timeout,
2660
+ )
2661
+ if ok:
2662
+ return True, f"{start_message}\n{last_message}"
2663
+ remaining = deadline - time.time()
2664
+ if remaining <= 0:
2665
+ break
2666
+ time.sleep(min(1.0, remaining))
2667
+
2668
+ return False, f"{start_message}\n❌ Chrome CDP still not reachable after {timeout}s ({last_message})"
2669
+
2670
+
1160
2671
  def main():
1161
2672
  """CLI interface for jleechanorg PR monitor"""
1162
2673
 
1163
- parser = argparse.ArgumentParser(description='jleechanorg PR Monitor')
1164
- parser.add_argument('--dry-run', action='store_true',
1165
- help='Discover PRs but do not process them')
1166
- parser.add_argument('--single-repo',
1167
- help='Process only specific repository')
1168
- parser.add_argument('--max-prs', type=int, default=5,
1169
- help='Maximum PRs to process per cycle')
1170
- parser.add_argument('--target-pr', type=int,
1171
- help='Process specific PR number')
1172
- parser.add_argument('--target-repo',
1173
- help='Repository for target PR (required with --target-pr)')
2674
+ parser = argparse.ArgumentParser(description="jleechanorg PR Monitor")
2675
+ parser.add_argument(
2676
+ "--no-act",
2677
+ action="store_true",
2678
+ help="Do not post comments or dispatch agents (useful for evidence/testing).",
2679
+ )
2680
+ parser.add_argument("--dry-run", action="store_true",
2681
+ help="Discover PRs but do not process them")
2682
+ parser.add_argument("--fixpr", action="store_true",
2683
+ help="Run /fixpr-only orchestrated flow for conflicts/failing checks (skips drafts)")
2684
+ parser.add_argument("--fix-comment", action="store_true",
2685
+ help="Run fix-comment orchestration flow to resolve PR review comments")
2686
+ parser.add_argument("--fix-comment-watch", action="store_true",
2687
+ help="Watch a PR for automation commits and post review request when detected")
2688
+ parser.add_argument("--cutoff-hours", type=int, default=24,
2689
+ help="Look-back window in hours for PR updates (default: 24)")
2690
+ parser.add_argument("--single-repo",
2691
+ help="Process only specific repository")
2692
+ parser.add_argument("--max-prs", type=int, default=5,
2693
+ help="Maximum PRs to process per cycle")
2694
+ parser.add_argument("--target-pr", type=int,
2695
+ help="Process specific PR number")
2696
+ parser.add_argument("--target-repo",
2697
+ help="Repository for target PR (required with --target-pr)")
2698
+ parser.add_argument(
2699
+ "--fixpr-agent",
2700
+ type=_parse_fixpr_agent_chain,
2701
+ default="claude",
2702
+ help="AI CLI (or comma-separated chain) for --fixpr mode (default: claude). Example: gemini,codex",
2703
+ )
2704
+ parser.add_argument(
2705
+ "--model",
2706
+ type=str,
2707
+ default=None,
2708
+ help="Model to use for agent CLI. Examples: sonnet/opus/haiku (Claude), gemini-3-pro-preview/gemini-3-auto (Gemini), composer-1 (Cursor). If not specified, CLI-specific defaults are used.",
2709
+ )
2710
+ parser.add_argument("--list-eligible", action="store_true",
2711
+ help="Dry-run listing of PRs eligible for fixpr (conflicts/failing checks)")
2712
+ parser.add_argument("--codex-update", action="store_true",
2713
+ help="Run Codex automation to update tasks via browser automation (use --codex-task-limit; default: 50)")
2714
+ parser.add_argument(
2715
+ "--codex-task-limit",
2716
+ type=_positive_int_arg,
2717
+ default=50,
2718
+ help="Task limit for --codex-update (default: 50, max: 200).",
2719
+ )
2720
+
2721
+ # Safety limits (params; no environment variables).
2722
+ parser.add_argument("--pr-limit", type=_positive_int_arg, default=None, help="Max failed attempts per PR (default: 10).")
2723
+ parser.add_argument("--global-limit", type=_positive_int_arg, default=None, help="Max global runs per day (default: 50).")
2724
+ parser.add_argument("--approval-hours", type=_positive_int_arg, default=None, help="Manual approval validity in hours (default: 24).")
2725
+ parser.add_argument("--subprocess-timeout", type=_positive_int_arg, default=None, help="Default subprocess timeout seconds (default: 300).")
2726
+ parser.add_argument("--pr-automation-limit", type=_positive_int_arg, default=None, help="Max PR automation comments per PR (default: 10).")
2727
+ parser.add_argument("--fix-comment-limit", type=_positive_int_arg, default=None, help="Max fix-comment comments per PR (default: 10).")
2728
+ parser.add_argument("--fixpr-limit", type=_positive_int_arg, default=None, help="Max fixpr comments per PR (default: 10).")
2729
+ parser.add_argument("--automation-user", help="Override automation username for marker validation")
1174
2730
 
1175
2731
  args = parser.parse_args()
1176
2732
 
2733
+ try:
2734
+ args.model = _normalize_model(args.model)
2735
+ except argparse.ArgumentTypeError as exc:
2736
+ parser.error(str(exc))
2737
+
1177
2738
  # Validate target PR arguments
1178
2739
  if args.target_pr and not args.target_repo:
1179
- parser.error('--target-repo is required when using --target-pr')
2740
+ parser.error("--target-repo is required when using --target-pr")
1180
2741
  if args.target_repo and not args.target_pr:
1181
- parser.error('--target-pr is required when using --target-repo')
2742
+ parser.error("--target-pr is required when using --target-repo")
2743
+ if args.fixpr and args.fix_comment:
2744
+ parser.error("--fixpr and --fix-comment are mutually exclusive")
2745
+ if args.fix_comment_watch and not (args.target_pr and args.target_repo):
2746
+ parser.error("--fix-comment-watch requires --target-pr and --target-repo")
2747
+
2748
+ safety_limits: Dict[str, int] = {}
2749
+ if args.pr_limit is not None:
2750
+ safety_limits["pr_limit"] = args.pr_limit
2751
+ if args.global_limit is not None:
2752
+ safety_limits["global_limit"] = args.global_limit
2753
+ if args.approval_hours is not None:
2754
+ safety_limits["approval_hours"] = args.approval_hours
2755
+ if args.subprocess_timeout is not None:
2756
+ safety_limits["subprocess_timeout"] = args.subprocess_timeout
2757
+ if args.pr_automation_limit is not None:
2758
+ safety_limits["pr_automation_limit"] = args.pr_automation_limit
2759
+ if args.fix_comment_limit is not None:
2760
+ safety_limits["fix_comment_limit"] = args.fix_comment_limit
2761
+ if args.fixpr_limit is not None:
2762
+ safety_limits["fixpr_limit"] = args.fixpr_limit
2763
+
2764
+ monitor = JleechanorgPRMonitor(
2765
+ safety_limits=safety_limits or None,
2766
+ no_act=args.no_act,
2767
+ automation_username=getattr(args, 'automation_user', None)
2768
+ )
1182
2769
 
1183
- monitor = JleechanorgPRMonitor()
2770
+ if args.fix_comment_watch:
2771
+ success = monitor.run_fix_comment_review_watcher(
2772
+ args.target_pr,
2773
+ args.target_repo,
2774
+ agent_cli=args.fixpr_agent,
2775
+ )
2776
+ sys.exit(0 if success else 1)
2777
+
2778
+ if args.codex_update:
2779
+ if args.no_act:
2780
+ print("🧪 --no-act enabled: skipping codex-update run")
2781
+ sys.exit(0)
2782
+
2783
+ task_limit = min(args.codex_task_limit, 200)
2784
+
2785
+ print(f"🤖 Running Codex automation (first {task_limit} tasks)...")
2786
+
2787
+ # Validate Chrome CDP is accessible before running (auto-starts if needed)
2788
+ cdp_ok, cdp_msg = ensure_chrome_cdp_accessible()
2789
+ print(cdp_msg)
2790
+ if not cdp_ok:
2791
+ print("\n⚠️ Skipping Codex automation (Chrome CDP unavailable).")
2792
+ print("💡 TIP: Start Chrome with CDP enabled first:")
2793
+ print(" ./automation/jleechanorg_pr_automation/openai_automation/start_chrome_debug.sh")
2794
+ print(" Or set CODEX_CDP_START_SCRIPT to a custom launcher path.")
2795
+ sys.exit(0)
2796
+
2797
+ try:
2798
+ host, port = _resolve_cdp_host_port()
2799
+ # Call the codex automation module with limit
2800
+ # Use -m to run as module (works with installed package)
2801
+ # Requires Chrome with CDP enabled on port 9222
2802
+ timeout_seconds = max(300, int(task_limit * 12)) # ~12s/task, min 5 minutes
2803
+ result = subprocess.run(
2804
+ [
2805
+ "python3",
2806
+ "-m",
2807
+ "jleechanorg_pr_automation.openai_automation.codex_github_mentions",
2808
+ "--use-existing-browser",
2809
+ "--cdp-host",
2810
+ host,
2811
+ "--cdp-port",
2812
+ str(port),
2813
+ "--limit",
2814
+ str(task_limit),
2815
+ ],
2816
+ check=False, capture_output=True,
2817
+ text=True,
2818
+ timeout=timeout_seconds
2819
+ )
2820
+ print(result.stdout)
2821
+ if result.stderr:
2822
+ print(result.stderr, file=sys.stderr)
2823
+ sys.exit(result.returncode)
2824
+ except subprocess.TimeoutExpired:
2825
+ print(f"❌ Codex automation timed out after {timeout_seconds // 60} minutes")
2826
+ sys.exit(1)
2827
+ except Exception as e:
2828
+ print(f"❌ Failed to run Codex automation: {e}")
2829
+ sys.exit(1)
2830
+
2831
+ if args.fixpr:
2832
+ if args.target_pr and args.target_repo:
2833
+ print(f"🎯 Processing target PR (fixpr): {args.target_repo} #{args.target_pr}")
2834
+ success = monitor.process_single_pr_by_number(
2835
+ args.target_pr,
2836
+ args.target_repo,
2837
+ fixpr=True,
2838
+ agent_cli=args.fixpr_agent,
2839
+ model=args.model,
2840
+ )
2841
+ sys.exit(0 if success else 1)
2842
+
2843
+ monitor.run_monitoring_cycle(
2844
+ single_repo=args.single_repo,
2845
+ max_prs=args.max_prs,
2846
+ cutoff_hours=args.cutoff_hours,
2847
+ fixpr=True,
2848
+ agent_cli=args.fixpr_agent,
2849
+ model=args.model,
2850
+ )
2851
+ return
2852
+
2853
+ if args.fix_comment:
2854
+ # Handle target PR processing
2855
+ if args.target_pr and args.target_repo:
2856
+ print(f"🎯 Processing target PR (fix-comment): {args.target_repo} #{args.target_pr}")
2857
+ success = monitor.process_single_pr_by_number(
2858
+ args.target_pr,
2859
+ args.target_repo,
2860
+ fix_comment=True,
2861
+ agent_cli=args.fixpr_agent,
2862
+ model=args.model,
2863
+ )
2864
+ sys.exit(0 if success else 1)
2865
+
2866
+ if args.dry_run:
2867
+ print("🔍 DRY RUN: Discovering PRs only")
2868
+ prs = monitor.discover_open_prs(cutoff_hours=args.cutoff_hours)
2869
+
2870
+ if args.single_repo:
2871
+ prs = [pr for pr in prs if pr["repository"] == args.single_repo]
2872
+
2873
+ print(f"📋 Found {len(prs)} open PRs:")
2874
+ for pr in prs[:args.max_prs]:
2875
+ print(f" • {pr['repository']} PR #{pr['number']}: {pr['title']}")
2876
+ return
2877
+
2878
+ monitor.run_monitoring_cycle(
2879
+ single_repo=args.single_repo,
2880
+ max_prs=args.max_prs,
2881
+ cutoff_hours=args.cutoff_hours,
2882
+ fix_comment=True,
2883
+ agent_cli=args.fixpr_agent,
2884
+ model=args.model,
2885
+ )
2886
+ return
1184
2887
 
1185
2888
  # Handle target PR processing
1186
2889
  if args.target_pr and args.target_repo:
@@ -1190,7 +2893,7 @@ def main():
1190
2893
 
1191
2894
  if args.dry_run:
1192
2895
  print("🔍 DRY RUN: Discovering PRs only")
1193
- prs = monitor.discover_open_prs()
2896
+ prs = monitor.discover_open_prs(cutoff_hours=args.cutoff_hours)
1194
2897
 
1195
2898
  if args.single_repo:
1196
2899
  prs = [pr for pr in prs if pr["repository"] == args.single_repo]
@@ -1198,9 +2901,26 @@ def main():
1198
2901
  print(f"📋 Found {len(prs)} open PRs:")
1199
2902
  for pr in prs[:args.max_prs]:
1200
2903
  print(f" • {pr['repository']} PR #{pr['number']}: {pr['title']}")
2904
+
2905
+ if args.list_eligible:
2906
+ print("\n🔎 Eligible for fixpr (conflicts/failing checks):")
2907
+ monitor.list_actionable_prs(
2908
+ cutoff_hours=args.cutoff_hours,
2909
+ max_prs=args.max_prs,
2910
+ single_repo=args.single_repo,
2911
+ )
2912
+ elif args.list_eligible:
2913
+ monitor.list_actionable_prs(
2914
+ cutoff_hours=args.cutoff_hours,
2915
+ max_prs=args.max_prs,
2916
+ single_repo=args.single_repo,
2917
+ )
1201
2918
  else:
1202
- monitor.run_monitoring_cycle(single_repo=args.single_repo, max_prs=args.max_prs)
2919
+ monitor.run_monitoring_cycle(
2920
+ cutoff_hours=args.cutoff_hours,
2921
+ model=args.model,
2922
+ )
1203
2923
 
1204
2924
 
1205
- if __name__ == '__main__':
2925
+ if __name__ == "__main__":
1206
2926
  main()