jleechanorg-pr-automation 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jleechanorg-pr-automation might be problematic. Click here for more details.
- jleechanorg_pr_automation/__init__.py +32 -0
- jleechanorg_pr_automation/automation_safety_manager.py +700 -0
- jleechanorg_pr_automation/automation_safety_wrapper.py +116 -0
- jleechanorg_pr_automation/automation_utils.py +314 -0
- jleechanorg_pr_automation/check_codex_comment.py +76 -0
- jleechanorg_pr_automation/codex_branch_updater.py +272 -0
- jleechanorg_pr_automation/codex_config.py +57 -0
- jleechanorg_pr_automation/jleechanorg_pr_monitor.py +1202 -0
- jleechanorg_pr_automation/tests/conftest.py +12 -0
- jleechanorg_pr_automation/tests/test_actionable_counting_matrix.py +221 -0
- jleechanorg_pr_automation/tests/test_automation_over_running_reproduction.py +147 -0
- jleechanorg_pr_automation/tests/test_automation_safety_limits.py +340 -0
- jleechanorg_pr_automation/tests/test_automation_safety_manager_comprehensive.py +615 -0
- jleechanorg_pr_automation/tests/test_codex_actor_matching.py +137 -0
- jleechanorg_pr_automation/tests/test_graphql_error_handling.py +155 -0
- jleechanorg_pr_automation/tests/test_pr_filtering_matrix.py +473 -0
- jleechanorg_pr_automation/tests/test_pr_targeting.py +95 -0
- jleechanorg_pr_automation/utils.py +232 -0
- jleechanorg_pr_automation-0.1.0.dist-info/METADATA +217 -0
- jleechanorg_pr_automation-0.1.0.dist-info/RECORD +23 -0
- jleechanorg_pr_automation-0.1.0.dist-info/WHEEL +5 -0
- jleechanorg_pr_automation-0.1.0.dist-info/entry_points.txt +3 -0
- jleechanorg_pr_automation-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1202 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
jleechanorg PR Monitor - Cross-Organization Automation
|
|
4
|
+
|
|
5
|
+
Discovers and processes open PRs across the jleechanorg organization by
|
|
6
|
+
posting configurable automation comments with safety limits integration.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import argparse
|
|
10
|
+
import os
|
|
11
|
+
import sys
|
|
12
|
+
import json
|
|
13
|
+
import subprocess
|
|
14
|
+
import logging
|
|
15
|
+
import re
|
|
16
|
+
import traceback
|
|
17
|
+
from collections import Counter
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from datetime import datetime, timedelta
|
|
20
|
+
from typing import List, Dict, Optional, Tuple
|
|
21
|
+
from .automation_safety_manager import AutomationSafetyManager
|
|
22
|
+
from .utils import setup_logging, json_manager
|
|
23
|
+
from .automation_utils import AutomationUtils
|
|
24
|
+
|
|
25
|
+
from .codex_config import (
|
|
26
|
+
CODEX_COMMIT_MARKER_PREFIX as SHARED_MARKER_PREFIX,
|
|
27
|
+
CODEX_COMMIT_MARKER_SUFFIX as SHARED_MARKER_SUFFIX,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class JleechanorgPRMonitor:
|
|
32
|
+
@staticmethod
|
|
33
|
+
def _redact_email(email: Optional[str]) -> Optional[str]:
|
|
34
|
+
"""Redact email for logging while preserving domain for debugging"""
|
|
35
|
+
if not email or '@' not in email:
|
|
36
|
+
return email
|
|
37
|
+
user, domain = email.rsplit('@', 1)
|
|
38
|
+
if len(user) <= 2:
|
|
39
|
+
return f"***@{domain}"
|
|
40
|
+
return f"{user[:2]}***@{domain}"
|
|
41
|
+
"""Cross-organization PR monitoring with Codex automation comments"""
|
|
42
|
+
|
|
43
|
+
CODEX_COMMIT_MARKER_PREFIX = SHARED_MARKER_PREFIX
|
|
44
|
+
CODEX_COMMIT_MARKER_SUFFIX = SHARED_MARKER_SUFFIX
|
|
45
|
+
CODEX_COMMIT_MESSAGE_MARKER = "[codex-automation-commit]"
|
|
46
|
+
CODEX_BOT_IDENTIFIER = "codex"
|
|
47
|
+
# GitHub short SHAs display with a minimum of 7 characters, while full SHAs are 40 characters.
|
|
48
|
+
CODEX_COMMIT_SHA_LENGTH_RANGE: Tuple[int, int] = (7, 40)
|
|
49
|
+
CODEX_SUMMARY_COMMIT_PATTERNS = [
|
|
50
|
+
re.compile(
|
|
51
|
+
rf"/blob/([0-9a-fA-F]{{{CODEX_COMMIT_SHA_LENGTH_RANGE[0]},{CODEX_COMMIT_SHA_LENGTH_RANGE[1]}}})/"
|
|
52
|
+
),
|
|
53
|
+
re.compile(
|
|
54
|
+
rf"/commit/([0-9a-fA-F]{{{CODEX_COMMIT_SHA_LENGTH_RANGE[0]},{CODEX_COMMIT_SHA_LENGTH_RANGE[1]}}})"
|
|
55
|
+
),
|
|
56
|
+
# Cursor Bugbot summaries reference the pending Codex commit in prose, e.g.
|
|
57
|
+
# "Written by Cursor Bugbot for commit c279655."
|
|
58
|
+
re.compile(
|
|
59
|
+
rf"\bcommit\b[^0-9a-fA-F]{{0,5}}([0-9a-fA-F]{{{CODEX_COMMIT_SHA_LENGTH_RANGE[0]},{CODEX_COMMIT_SHA_LENGTH_RANGE[1]}}})",
|
|
60
|
+
re.IGNORECASE,
|
|
61
|
+
),
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
_HEAD_COMMIT_DETAILS_QUERY = """
|
|
65
|
+
query($owner: String!, $name: String!, $prNumber: Int!) {
|
|
66
|
+
repository(owner: $owner, name: $name) {
|
|
67
|
+
pullRequest(number: $prNumber) {
|
|
68
|
+
headRefOid
|
|
69
|
+
commits(last: 1) {
|
|
70
|
+
nodes {
|
|
71
|
+
commit {
|
|
72
|
+
oid
|
|
73
|
+
messageHeadline
|
|
74
|
+
message
|
|
75
|
+
author {
|
|
76
|
+
email
|
|
77
|
+
name
|
|
78
|
+
user { login }
|
|
79
|
+
}
|
|
80
|
+
committer {
|
|
81
|
+
email
|
|
82
|
+
name
|
|
83
|
+
user { login }
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
_codex_actor_keywords = [
|
|
94
|
+
"codex",
|
|
95
|
+
"coderabbitai",
|
|
96
|
+
"coderabbit",
|
|
97
|
+
"copilot",
|
|
98
|
+
"cursor",
|
|
99
|
+
]
|
|
100
|
+
_codex_actor_patterns = [
|
|
101
|
+
re.compile(rf"\b{keyword}\b", re.IGNORECASE)
|
|
102
|
+
for keyword in _codex_actor_keywords
|
|
103
|
+
]
|
|
104
|
+
_codex_commit_message_pattern_str = (
|
|
105
|
+
r"\[(?:" + "|".join(_codex_actor_keywords) + r")-automation-commit\]"
|
|
106
|
+
)
|
|
107
|
+
_codex_commit_message_pattern = re.compile(
|
|
108
|
+
_codex_commit_message_pattern_str,
|
|
109
|
+
re.IGNORECASE,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _extract_actor_fields(
|
|
114
|
+
actor: Optional[Dict],
|
|
115
|
+
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
|
116
|
+
if not isinstance(actor, dict):
|
|
117
|
+
return (None, None, None)
|
|
118
|
+
|
|
119
|
+
user_info = actor.get("user")
|
|
120
|
+
login = user_info.get("login") if isinstance(user_info, dict) else None
|
|
121
|
+
email = actor.get("email")
|
|
122
|
+
name = actor.get("name")
|
|
123
|
+
return (login, email, name)
|
|
124
|
+
|
|
125
|
+
def __init__(self):
|
|
126
|
+
self.logger = setup_logging(__name__)
|
|
127
|
+
|
|
128
|
+
self.assistant_mentions = os.environ.get(
|
|
129
|
+
"AI_ASSISTANT_MENTIONS",
|
|
130
|
+
"@codex @coderabbitai @copilot @cursor",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
self.wrapper_managed = os.environ.get("AUTOMATION_SAFETY_WRAPPER") == "1"
|
|
134
|
+
|
|
135
|
+
# Processing history persisted to permanent location
|
|
136
|
+
self.history_base_dir = Path.home() / "Library" / "Logs" / "worldarchitect-automation" / "pr_history"
|
|
137
|
+
self.history_base_dir.mkdir(parents=True, exist_ok=True)
|
|
138
|
+
|
|
139
|
+
# Organization settings
|
|
140
|
+
self.organization = "jleechanorg"
|
|
141
|
+
self.base_project_dir = Path.home() / "projects"
|
|
142
|
+
|
|
143
|
+
safety_data_dir = os.environ.get('AUTOMATION_SAFETY_DATA_DIR')
|
|
144
|
+
if not safety_data_dir:
|
|
145
|
+
default_dir = Path.home() / "Library" / "Application Support" / "worldarchitect-automation"
|
|
146
|
+
default_dir.mkdir(parents=True, exist_ok=True)
|
|
147
|
+
safety_data_dir = str(default_dir)
|
|
148
|
+
|
|
149
|
+
self.safety_manager = AutomationSafetyManager(safety_data_dir)
|
|
150
|
+
|
|
151
|
+
self.logger.info(f"🏢 Initialized jleechanorg PR monitor")
|
|
152
|
+
self.logger.info(f"📁 History storage: {self.history_base_dir}")
|
|
153
|
+
self.logger.info(f"💬 Comment-only automation mode")
|
|
154
|
+
def _get_history_file(self, repo_name: str, branch_name: str) -> Path:
|
|
155
|
+
"""Get history file path for specific repo/branch"""
|
|
156
|
+
repo_dir = self.history_base_dir / repo_name
|
|
157
|
+
repo_dir.mkdir(parents=True, exist_ok=True)
|
|
158
|
+
|
|
159
|
+
# Replace slashes in branch names to avoid creating nested directories
|
|
160
|
+
safe_branch_name = branch_name.replace('/', '_')
|
|
161
|
+
return repo_dir / f"{safe_branch_name}.json"
|
|
162
|
+
|
|
163
|
+
def _load_branch_history(self, repo_name: str, branch_name: str) -> Dict[str, str]:
|
|
164
|
+
"""Load processed PRs for a specific repo/branch"""
|
|
165
|
+
history_file = self._get_history_file(repo_name, branch_name)
|
|
166
|
+
return json_manager.read_json(str(history_file), {})
|
|
167
|
+
|
|
168
|
+
def _save_branch_history(self, repo_name: str, branch_name: str, history: Dict[str, str]) -> None:
|
|
169
|
+
"""Save processed PRs for a specific repo/branch"""
|
|
170
|
+
history_file = self._get_history_file(repo_name, branch_name)
|
|
171
|
+
if not json_manager.write_json(str(history_file), history):
|
|
172
|
+
self.logger.error(f"❌ Error saving history for {repo_name}/{branch_name}: write failed")
|
|
173
|
+
|
|
174
|
+
def _should_skip_pr(self, repo_name: str, branch_name: str, pr_number: int, current_commit: str) -> bool:
|
|
175
|
+
"""Check if PR should be skipped based on recent processing"""
|
|
176
|
+
history = self._load_branch_history(repo_name, branch_name)
|
|
177
|
+
pr_key = str(pr_number)
|
|
178
|
+
|
|
179
|
+
# If we haven't processed this PR before, don't skip
|
|
180
|
+
if pr_key not in history:
|
|
181
|
+
return False
|
|
182
|
+
|
|
183
|
+
# If commit has changed since we processed it, don't skip
|
|
184
|
+
last_processed_commit = history[pr_key]
|
|
185
|
+
if last_processed_commit != current_commit:
|
|
186
|
+
self.logger.info(f"🔄 PR {repo_name}/{branch_name}#{pr_number} has new commit ({current_commit[:8]} vs {last_processed_commit[:8]})")
|
|
187
|
+
return False
|
|
188
|
+
|
|
189
|
+
# We processed this PR with this exact commit, skip it
|
|
190
|
+
self.logger.info(f"⏭️ Skipping PR {repo_name}/{branch_name}#{pr_number} - already processed commit {current_commit[:8]}")
|
|
191
|
+
return True
|
|
192
|
+
|
|
193
|
+
def _record_processed_pr(self, repo_name: str, branch_name: str, pr_number: int, commit_sha: str) -> None:
|
|
194
|
+
"""Record that we've processed a PR with a specific commit"""
|
|
195
|
+
history = self._load_branch_history(repo_name, branch_name)
|
|
196
|
+
pr_key = str(pr_number)
|
|
197
|
+
history[pr_key] = commit_sha
|
|
198
|
+
self._save_branch_history(repo_name, branch_name, history)
|
|
199
|
+
self.logger.debug(f"📝 Recorded processing of PR {repo_name}/{branch_name}#{pr_number} with commit {commit_sha[:8]}")
|
|
200
|
+
|
|
201
|
+
# TDD GREEN: Implement methods for PR filtering and actionable counting
|
|
202
|
+
def _record_pr_processing(self, repo_name: str, branch_name: str, pr_number: int, commit_sha: str) -> None:
|
|
203
|
+
"""Record that a PR has been processed (alias for compatibility)"""
|
|
204
|
+
self._record_processed_pr(repo_name, branch_name, pr_number, commit_sha)
|
|
205
|
+
|
|
206
|
+
def _normalize_repository_name(self, repository: str) -> str:
|
|
207
|
+
"""Return full owner/repo identifier for GitHub CLI operations."""
|
|
208
|
+
|
|
209
|
+
if not repository:
|
|
210
|
+
return repository
|
|
211
|
+
|
|
212
|
+
if "/" in repository:
|
|
213
|
+
return repository
|
|
214
|
+
|
|
215
|
+
return f"{self.organization}/{repository}"
|
|
216
|
+
|
|
217
|
+
def is_pr_actionable(self, pr_data: Dict) -> bool:
|
|
218
|
+
"""Determine if a PR is actionable (should be processed)"""
|
|
219
|
+
# Closed PRs are not actionable
|
|
220
|
+
if pr_data.get('state', '').lower() != 'open':
|
|
221
|
+
return False
|
|
222
|
+
|
|
223
|
+
# PRs with no commits are not actionable
|
|
224
|
+
head_ref_oid = pr_data.get('headRefOid')
|
|
225
|
+
if not head_ref_oid:
|
|
226
|
+
return False
|
|
227
|
+
|
|
228
|
+
# Check if already processed with this commit
|
|
229
|
+
repo_name = pr_data.get('repository', '')
|
|
230
|
+
branch_name = pr_data.get('headRefName', '')
|
|
231
|
+
pr_number = pr_data.get('number', 0)
|
|
232
|
+
|
|
233
|
+
if self._should_skip_pr(repo_name, branch_name, pr_number, head_ref_oid):
|
|
234
|
+
return False
|
|
235
|
+
|
|
236
|
+
# Open PRs (including drafts) with new commits are actionable
|
|
237
|
+
return True
|
|
238
|
+
|
|
239
|
+
def filter_eligible_prs(self, pr_list: List[Dict]) -> List[Dict]:
|
|
240
|
+
"""Filter list to return only actionable PRs"""
|
|
241
|
+
eligible = []
|
|
242
|
+
for pr in pr_list:
|
|
243
|
+
if self.is_pr_actionable(pr):
|
|
244
|
+
eligible.append(pr)
|
|
245
|
+
return eligible
|
|
246
|
+
|
|
247
|
+
def process_actionable_prs(self, pr_list: List[Dict], target_count: int) -> int:
|
|
248
|
+
"""Process up to target_count actionable PRs, returning count processed"""
|
|
249
|
+
processed = 0
|
|
250
|
+
for pr in pr_list:
|
|
251
|
+
if processed >= target_count:
|
|
252
|
+
break
|
|
253
|
+
if self.is_pr_actionable(pr):
|
|
254
|
+
# Simulate processing (for testing)
|
|
255
|
+
processed += 1
|
|
256
|
+
return processed
|
|
257
|
+
|
|
258
|
+
def filter_and_process_prs(self, pr_list: List[Dict], target_actionable_count: int) -> int:
|
|
259
|
+
"""Filter PRs to actionable ones and process up to target count"""
|
|
260
|
+
eligible_prs = self.filter_eligible_prs(pr_list)
|
|
261
|
+
return self.process_actionable_prs(eligible_prs, target_actionable_count)
|
|
262
|
+
|
|
263
|
+
def find_eligible_prs(self, limit: int = 10) -> List[Dict]:
|
|
264
|
+
"""Find eligible PRs from live GitHub data"""
|
|
265
|
+
all_prs = self.discover_open_prs()
|
|
266
|
+
eligible_prs = self.filter_eligible_prs(all_prs)
|
|
267
|
+
return eligible_prs[:limit]
|
|
268
|
+
|
|
269
|
+
def run_monitoring_cycle_with_actionable_count(self, target_actionable_count: int = 20) -> Dict:
|
|
270
|
+
"""Enhanced monitoring cycle that processes exactly target actionable PRs"""
|
|
271
|
+
all_prs = self.discover_open_prs()
|
|
272
|
+
|
|
273
|
+
# Sort by most recently updated first
|
|
274
|
+
all_prs.sort(key=lambda pr: pr.get('updatedAt', ''), reverse=True)
|
|
275
|
+
|
|
276
|
+
actionable_processed = 0
|
|
277
|
+
skipped_count = 0
|
|
278
|
+
processing_failures = 0
|
|
279
|
+
|
|
280
|
+
# Count ALL non-actionable PRs as skipped, not just those we encounter before target
|
|
281
|
+
for pr in all_prs:
|
|
282
|
+
if not self.is_pr_actionable(pr):
|
|
283
|
+
skipped_count += 1
|
|
284
|
+
|
|
285
|
+
# Process actionable PRs up to target
|
|
286
|
+
for pr in all_prs:
|
|
287
|
+
if actionable_processed >= target_actionable_count:
|
|
288
|
+
break
|
|
289
|
+
|
|
290
|
+
if not self.is_pr_actionable(pr):
|
|
291
|
+
continue # Already counted in skipped above
|
|
292
|
+
|
|
293
|
+
# Attempt to process the PR
|
|
294
|
+
repo_name = pr.get('repository', '')
|
|
295
|
+
pr_number = pr.get('number', 0)
|
|
296
|
+
repo_full = pr.get('repositoryFullName', f"jleechanorg/{repo_name}")
|
|
297
|
+
|
|
298
|
+
# Reserve a processing slot for this PR
|
|
299
|
+
if not self.safety_manager.try_process_pr(pr_number, repo=repo_full):
|
|
300
|
+
self.logger.info(f"⚠️ PR {repo_full}#{pr_number} blocked by safety manager - consecutive failures or rate limit")
|
|
301
|
+
processing_failures += 1
|
|
302
|
+
continue
|
|
303
|
+
|
|
304
|
+
try:
|
|
305
|
+
success = self._process_pr_comment(repo_name, pr_number, pr)
|
|
306
|
+
if success:
|
|
307
|
+
actionable_processed += 1
|
|
308
|
+
else:
|
|
309
|
+
processing_failures += 1
|
|
310
|
+
except Exception as e:
|
|
311
|
+
self.logger.error(f"Error processing PR {repo_name}#{pr_number}: {e}")
|
|
312
|
+
processing_failures += 1
|
|
313
|
+
finally:
|
|
314
|
+
# Always release the processing slot
|
|
315
|
+
self.safety_manager.release_pr_slot(pr_number, repo=repo_full)
|
|
316
|
+
|
|
317
|
+
return {
|
|
318
|
+
'actionable_processed': actionable_processed,
|
|
319
|
+
'total_discovered': len(all_prs),
|
|
320
|
+
'skipped_count': skipped_count,
|
|
321
|
+
'processing_failures': processing_failures
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
def _process_pr_comment(self, repo_name: str, pr_number: int, pr_data: Dict) -> bool:
|
|
325
|
+
"""Process a PR by posting a comment (used by tests and enhanced monitoring)"""
|
|
326
|
+
try:
|
|
327
|
+
# Use the existing comment posting method
|
|
328
|
+
repo_full_name = pr_data.get('repositoryFullName', f"jleechanorg/{repo_name}")
|
|
329
|
+
result = self.post_codex_instruction_simple(repo_full_name, pr_number, pr_data)
|
|
330
|
+
# Return True only if comment was actually posted
|
|
331
|
+
return result == "posted"
|
|
332
|
+
except Exception as e:
|
|
333
|
+
self.logger.error(f"Error processing comment for PR {repo_name}#{pr_number}: {e}")
|
|
334
|
+
return False
|
|
335
|
+
|
|
336
|
+
def discover_open_prs(self) -> List[Dict]:
|
|
337
|
+
"""Discover open PRs updated in the last 24 hours across the organization."""
|
|
338
|
+
|
|
339
|
+
self.logger.info(f"🔍 Discovering open PRs in {self.organization} organization (last 24 hours)")
|
|
340
|
+
|
|
341
|
+
now = datetime.utcnow()
|
|
342
|
+
one_day_ago = now - timedelta(hours=24)
|
|
343
|
+
self.logger.info("📅 Filtering PRs updated since: %s UTC", one_day_ago.strftime('%Y-%m-%d %H:%M:%S'))
|
|
344
|
+
|
|
345
|
+
graphql_query = '''
|
|
346
|
+
query($searchQuery: String!, $cursor: String) {
|
|
347
|
+
search(type: ISSUE, query: $searchQuery, first: 100, after: $cursor) {
|
|
348
|
+
nodes {
|
|
349
|
+
__typename
|
|
350
|
+
... on PullRequest {
|
|
351
|
+
number
|
|
352
|
+
title
|
|
353
|
+
headRefName
|
|
354
|
+
baseRefName
|
|
355
|
+
updatedAt
|
|
356
|
+
url
|
|
357
|
+
author { login resourcePath url }
|
|
358
|
+
headRefOid
|
|
359
|
+
state
|
|
360
|
+
isDraft
|
|
361
|
+
repository { name nameWithOwner }
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
pageInfo { hasNextPage endCursor }
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
'''
|
|
368
|
+
|
|
369
|
+
search_query = f"org:{self.organization} is:pr is:open"
|
|
370
|
+
cursor: Optional[str] = None
|
|
371
|
+
recent_prs: List[Dict] = []
|
|
372
|
+
|
|
373
|
+
while True:
|
|
374
|
+
gh_api_cmd = [
|
|
375
|
+
"gh",
|
|
376
|
+
"api",
|
|
377
|
+
"graphql",
|
|
378
|
+
"-f",
|
|
379
|
+
f"query={graphql_query}",
|
|
380
|
+
"-f",
|
|
381
|
+
f"searchQuery={search_query}",
|
|
382
|
+
]
|
|
383
|
+
if cursor:
|
|
384
|
+
gh_api_cmd.extend(["-f", f"cursor={cursor}"])
|
|
385
|
+
|
|
386
|
+
api_result = AutomationUtils.execute_subprocess_with_timeout(gh_api_cmd, timeout=60, check=False)
|
|
387
|
+
if api_result.returncode != 0:
|
|
388
|
+
raise RuntimeError(f"GraphQL search failed: {api_result.stderr.strip()}")
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
api_data = json.loads(api_result.stdout)
|
|
392
|
+
except json.JSONDecodeError as exc:
|
|
393
|
+
self.logger.error("❌ Failed to parse GraphQL response: %s", exc)
|
|
394
|
+
raise
|
|
395
|
+
|
|
396
|
+
search_data = api_data.get('data', {}).get('search')
|
|
397
|
+
if not search_data:
|
|
398
|
+
break
|
|
399
|
+
|
|
400
|
+
nodes = search_data.get('nodes', [])
|
|
401
|
+
for node in nodes:
|
|
402
|
+
if node.get('__typename') != 'PullRequest':
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
updated_str = node.get('updatedAt')
|
|
406
|
+
if not updated_str:
|
|
407
|
+
continue
|
|
408
|
+
|
|
409
|
+
try:
|
|
410
|
+
updated_time = datetime.fromisoformat(updated_str.replace('Z', '+00:00')).replace(tzinfo=None)
|
|
411
|
+
except ValueError:
|
|
412
|
+
self.logger.debug(
|
|
413
|
+
"⚠️ Invalid date format for PR %s: %s", node.get('number'), updated_str
|
|
414
|
+
)
|
|
415
|
+
continue
|
|
416
|
+
|
|
417
|
+
if updated_time < one_day_ago:
|
|
418
|
+
continue
|
|
419
|
+
|
|
420
|
+
repo_info = node.get('repository') or {}
|
|
421
|
+
author_info = node.get('author') or {}
|
|
422
|
+
if 'login' not in author_info:
|
|
423
|
+
author_info = {**author_info, 'login': author_info.get('login')}
|
|
424
|
+
|
|
425
|
+
normalized = {
|
|
426
|
+
'number': node.get('number'),
|
|
427
|
+
'title': node.get('title'),
|
|
428
|
+
'headRefName': node.get('headRefName'),
|
|
429
|
+
'baseRefName': node.get('baseRefName'),
|
|
430
|
+
'updatedAt': updated_str,
|
|
431
|
+
'url': node.get('url'),
|
|
432
|
+
'author': author_info,
|
|
433
|
+
'headRefOid': node.get('headRefOid'),
|
|
434
|
+
'state': node.get('state'),
|
|
435
|
+
'isDraft': node.get('isDraft'),
|
|
436
|
+
'repository': repo_info.get('name'),
|
|
437
|
+
'repositoryFullName': repo_info.get('nameWithOwner'),
|
|
438
|
+
'updated_datetime': updated_time,
|
|
439
|
+
}
|
|
440
|
+
recent_prs.append(normalized)
|
|
441
|
+
|
|
442
|
+
page_info = search_data.get('pageInfo') or {}
|
|
443
|
+
if not page_info.get('hasNextPage'):
|
|
444
|
+
break
|
|
445
|
+
|
|
446
|
+
cursor = page_info.get('endCursor')
|
|
447
|
+
if not cursor:
|
|
448
|
+
break
|
|
449
|
+
|
|
450
|
+
if not recent_prs:
|
|
451
|
+
self.logger.info("📭 No recent open PRs discovered")
|
|
452
|
+
return []
|
|
453
|
+
|
|
454
|
+
recent_prs.sort(key=lambda x: x.get('updated_datetime', datetime.min), reverse=True)
|
|
455
|
+
|
|
456
|
+
repo_counter = Counter(pr.get('repository') for pr in recent_prs if pr.get('repository'))
|
|
457
|
+
for repo_name, count in repo_counter.items():
|
|
458
|
+
self.logger.info("📋 %s: %s recent PRs", repo_name, count)
|
|
459
|
+
|
|
460
|
+
self.logger.info("🎯 Total recent PRs discovered (last 24 hours): %s", len(recent_prs))
|
|
461
|
+
|
|
462
|
+
self.logger.info("📊 Most recently updated PRs:")
|
|
463
|
+
for i, pr in enumerate(recent_prs[:5], 1):
|
|
464
|
+
updated_str = pr['updated_datetime'].strftime('%Y-%m-%d %H:%M')
|
|
465
|
+
self.logger.info(" %s. %s #%s - %s", i, pr['repositoryFullName'], pr['number'], updated_str)
|
|
466
|
+
|
|
467
|
+
return recent_prs
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def _find_local_repository(self, repo_name: str) -> Optional[Path]:
|
|
471
|
+
"""Find local repository path for given repo name"""
|
|
472
|
+
|
|
473
|
+
def is_git_repository(path: Path) -> bool:
|
|
474
|
+
"""Check if path is a git repository"""
|
|
475
|
+
git_path = path / ".git"
|
|
476
|
+
return git_path.exists()
|
|
477
|
+
|
|
478
|
+
# Check current working directory first
|
|
479
|
+
current_dir = Path.cwd()
|
|
480
|
+
if is_git_repository(current_dir):
|
|
481
|
+
# Check if this is related to the target repository
|
|
482
|
+
if repo_name.lower() in current_dir.name.lower() or "worldarchitect" in current_dir.name.lower():
|
|
483
|
+
self.logger.debug(f"🎯 Found local repo (current dir): {current_dir}")
|
|
484
|
+
return current_dir
|
|
485
|
+
|
|
486
|
+
# Common patterns for local repositories
|
|
487
|
+
search_paths = [
|
|
488
|
+
# Standard patterns in ~/projects/
|
|
489
|
+
self.base_project_dir / repo_name,
|
|
490
|
+
self.base_project_dir / f"{repo_name}_worker",
|
|
491
|
+
self.base_project_dir / f"{repo_name}_worker1",
|
|
492
|
+
self.base_project_dir / f"{repo_name}_worker2",
|
|
493
|
+
# Project patterns in home directory
|
|
494
|
+
Path.home() / f"project_{repo_name}",
|
|
495
|
+
Path.home() / f"project_{repo_name}" / repo_name,
|
|
496
|
+
# Nested repository patterns
|
|
497
|
+
Path.home() / f"project_{repo_name}_frontend" / f"{repo_name}_frontend",
|
|
498
|
+
]
|
|
499
|
+
|
|
500
|
+
for path in search_paths:
|
|
501
|
+
if path.exists() and is_git_repository(path):
|
|
502
|
+
self.logger.debug(f"🎯 Found local repo: {path}")
|
|
503
|
+
return path
|
|
504
|
+
|
|
505
|
+
# Search for any directory containing the repo name in ~/projects/
|
|
506
|
+
if self.base_project_dir.exists():
|
|
507
|
+
for path in self.base_project_dir.iterdir():
|
|
508
|
+
if path.is_dir() and repo_name.lower() in path.name.lower():
|
|
509
|
+
if is_git_repository(path):
|
|
510
|
+
self.logger.debug(f"🎯 Found local repo (fuzzy): {path}")
|
|
511
|
+
return path
|
|
512
|
+
|
|
513
|
+
# Search for project_* patterns in home directory
|
|
514
|
+
home_dir = Path.home()
|
|
515
|
+
for path in home_dir.iterdir():
|
|
516
|
+
if path.is_dir() and path.name.startswith(f"project_{repo_name}"):
|
|
517
|
+
# Check if it's a direct repo
|
|
518
|
+
if is_git_repository(path):
|
|
519
|
+
self.logger.debug(f"🎯 Found local repo (home): {path}")
|
|
520
|
+
return path
|
|
521
|
+
# Check if repo is nested inside
|
|
522
|
+
nested_repo = path / repo_name
|
|
523
|
+
if nested_repo.exists() and is_git_repository(nested_repo):
|
|
524
|
+
self.logger.debug(f"🎯 Found local repo (nested): {nested_repo}")
|
|
525
|
+
return nested_repo
|
|
526
|
+
|
|
527
|
+
return None
|
|
528
|
+
|
|
529
|
+
def post_codex_instruction_simple(self, repository: str, pr_number: int, pr_data: Dict) -> str:
|
|
530
|
+
"""Post codex instruction comment to PR"""
|
|
531
|
+
repo_full = self._normalize_repository_name(repository)
|
|
532
|
+
self.logger.info(f"💬 Requesting Codex support for {repo_full} PR #{pr_number}")
|
|
533
|
+
|
|
534
|
+
# Get current PR state including commit SHA
|
|
535
|
+
head_sha, comments = self._get_pr_comment_state(repo_full, pr_number)
|
|
536
|
+
head_commit_details = None
|
|
537
|
+
if head_sha:
|
|
538
|
+
head_commit_details = self._get_head_commit_details(repo_full, pr_number, head_sha)
|
|
539
|
+
if head_commit_details and self._is_head_commit_from_codex(head_commit_details):
|
|
540
|
+
self.logger.debug(
|
|
541
|
+
"🆔 Head commit %s for %s#%s already attributed to Codex",
|
|
542
|
+
head_sha[:8],
|
|
543
|
+
repo_full,
|
|
544
|
+
pr_number,
|
|
545
|
+
)
|
|
546
|
+
return "skipped"
|
|
547
|
+
|
|
548
|
+
# Extract repo name and branch from PR data
|
|
549
|
+
repo_name = repo_full.split('/')[-1]
|
|
550
|
+
branch_name = pr_data.get('headRefName', 'unknown')
|
|
551
|
+
|
|
552
|
+
if not head_sha:
|
|
553
|
+
self.logger.warning(
|
|
554
|
+
f"⚠️ Could not determine commit SHA for PR #{pr_number}; proceeding without marker gating"
|
|
555
|
+
)
|
|
556
|
+
else:
|
|
557
|
+
# Check if we should skip this PR based on commit-based tracking
|
|
558
|
+
if self._should_skip_pr(repo_name, branch_name, pr_number, head_sha):
|
|
559
|
+
self.logger.info(f"⏭️ Skipping PR #{pr_number} - already processed this commit")
|
|
560
|
+
return "skipped"
|
|
561
|
+
|
|
562
|
+
if self._has_codex_comment_for_commit(comments, head_sha):
|
|
563
|
+
self.logger.info(
|
|
564
|
+
f"♻️ Codex instruction already posted for commit {head_sha[:8]} on PR #{pr_number}, skipping"
|
|
565
|
+
)
|
|
566
|
+
self._record_processed_pr(repo_name, branch_name, pr_number, head_sha)
|
|
567
|
+
return "skipped"
|
|
568
|
+
|
|
569
|
+
if self._has_pending_codex_commit(comments, head_sha):
|
|
570
|
+
self.logger.info(
|
|
571
|
+
f"⏳ Pending Codex automation commit {head_sha[:8]} detected on PR #{pr_number}; skipping re-run"
|
|
572
|
+
)
|
|
573
|
+
self._record_processed_pr(repo_name, branch_name, pr_number, head_sha)
|
|
574
|
+
return "skipped"
|
|
575
|
+
|
|
576
|
+
# Build comment body that tells Codex to fix PR comments and failing tests
|
|
577
|
+
comment_body = self._build_codex_comment_body_simple(
|
|
578
|
+
repo_full,
|
|
579
|
+
pr_number,
|
|
580
|
+
pr_data,
|
|
581
|
+
head_sha,
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
# Post the comment
|
|
585
|
+
try:
|
|
586
|
+
comment_cmd = [
|
|
587
|
+
"gh", "pr", "comment", str(pr_number),
|
|
588
|
+
"--repo", repo_full,
|
|
589
|
+
"--body", comment_body
|
|
590
|
+
]
|
|
591
|
+
|
|
592
|
+
result = AutomationUtils.execute_subprocess_with_timeout(comment_cmd, timeout=30)
|
|
593
|
+
|
|
594
|
+
self.logger.info(f"✅ Posted Codex instruction comment on PR #{pr_number} ({repo_full})")
|
|
595
|
+
|
|
596
|
+
# Record that we've processed this PR with this commit when available
|
|
597
|
+
if head_sha:
|
|
598
|
+
self._record_processed_pr(repo_name, branch_name, pr_number, head_sha)
|
|
599
|
+
|
|
600
|
+
return "posted"
|
|
601
|
+
|
|
602
|
+
except subprocess.CalledProcessError as e:
|
|
603
|
+
self.logger.error(f"❌ Failed to post comment on PR #{pr_number}: {e.stderr}")
|
|
604
|
+
return "failed"
|
|
605
|
+
except Exception as e:
|
|
606
|
+
self.logger.error(f"💥 Unexpected error posting comment: {e}")
|
|
607
|
+
return "failed"
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def _are_tests_passing(self, repository: str, pr_number: int) -> bool:
|
|
620
|
+
"""Check if tests are passing on the PR"""
|
|
621
|
+
try:
|
|
622
|
+
# Get PR status checks
|
|
623
|
+
result = AutomationUtils.execute_subprocess_with_timeout([
|
|
624
|
+
"gh", "pr", "view", str(pr_number),
|
|
625
|
+
"--repo", repository,
|
|
626
|
+
"--json", "statusCheckRollup"
|
|
627
|
+
], timeout=30)
|
|
628
|
+
|
|
629
|
+
pr_status = json.loads(result.stdout)
|
|
630
|
+
status_checks = pr_status.get('statusCheckRollup', [])
|
|
631
|
+
|
|
632
|
+
# If no status checks are configured, assume tests are failing
|
|
633
|
+
if not status_checks:
|
|
634
|
+
self.logger.debug(f"⚠️ No status checks configured for PR #{pr_number}, assuming failing")
|
|
635
|
+
return False
|
|
636
|
+
|
|
637
|
+
# Check if all status checks are successful
|
|
638
|
+
for check in status_checks:
|
|
639
|
+
if check.get('state') not in ['SUCCESS', 'NEUTRAL']:
|
|
640
|
+
self.logger.debug(f"❌ Status check failed: {check.get('name')} - {check.get('state')}")
|
|
641
|
+
return False
|
|
642
|
+
|
|
643
|
+
self.logger.debug(f"✅ All {len(status_checks)} status checks passing for PR #{pr_number}")
|
|
644
|
+
return True
|
|
645
|
+
|
|
646
|
+
except Exception as e:
|
|
647
|
+
self.logger.warning(f"⚠️ Could not check test status for PR #{pr_number}: {e}")
|
|
648
|
+
return False # Assume tests are failing if we can't check
|
|
649
|
+
|
|
650
|
+
def _build_codex_comment_body_simple(
|
|
651
|
+
self,
|
|
652
|
+
repository: str,
|
|
653
|
+
pr_number: int,
|
|
654
|
+
pr_data: Dict,
|
|
655
|
+
head_sha: str,
|
|
656
|
+
) -> str:
|
|
657
|
+
"""Build comment body that tells all AI assistants to fix PR comments, tests, and merge conflicts"""
|
|
658
|
+
|
|
659
|
+
comment_body = f"""{self.assistant_mentions} [AI automation] Please make the following changes to this PR
|
|
660
|
+
|
|
661
|
+
**Summary (Execution Flow):**
|
|
662
|
+
1. Review every outstanding PR comment to understand required fixes and clarifications.
|
|
663
|
+
2. Implement code or configuration updates that address each comment, then reply with explicit DONE/NOT DONE outcomes plus context.
|
|
664
|
+
3. Run the relevant test suites locally and in CI, repairing any failures until the checks report success.
|
|
665
|
+
4. Rebase or merge with the base branch to clear conflicts, then push the updated commits to this PR.
|
|
666
|
+
5. Perform a final self-review to confirm linting, formatting, and documentation standards are met before handoff.
|
|
667
|
+
|
|
668
|
+
**PR Details:**
|
|
669
|
+
- Title: {pr_data.get('title', 'Unknown')}
|
|
670
|
+
- Author: {pr_data.get('author', {}).get('login', 'unknown')}
|
|
671
|
+
- Branch: {pr_data.get('headRefName', 'unknown')}
|
|
672
|
+
- Commit: {head_sha[:8] if head_sha else 'unknown'} ({head_sha or 'unknown'})
|
|
673
|
+
|
|
674
|
+
**Instructions:**
|
|
675
|
+
Use your judgment to fix comments from everyone or explain why it should not be fixed. Follow binary response protocol - every comment needs "DONE" or "NOT DONE" classification explicitly with an explanation. Address all comments on this PR. Fix any failing tests and resolve merge conflicts. Push any commits needed to remote so the PR is updated.
|
|
676
|
+
|
|
677
|
+
**Tasks:**
|
|
678
|
+
1. **Address all comments** - Review and implement ALL feedback from reviewers
|
|
679
|
+
2. **Fix failing tests** - Review test failures and implement fixes
|
|
680
|
+
3. **Resolve merge conflicts** - Handle any conflicts with the base branch
|
|
681
|
+
4. **Ensure code quality** - Follow project standards and best practices
|
|
682
|
+
|
|
683
|
+
**Automation Markers:**
|
|
684
|
+
- Leave the hidden comment marker `<!-- codex-automation-commit:... -->` in this thread so we only re-ping you after new commits.
|
|
685
|
+
- Include `{self.CODEX_COMMIT_MESSAGE_MARKER}` in the commit message of your next push so we can confirm Codex authored it (even if the author/committer metadata already shows Codex).
|
|
686
|
+
"""
|
|
687
|
+
|
|
688
|
+
if head_sha:
|
|
689
|
+
comment_body += (
|
|
690
|
+
f"\n\n{self.CODEX_COMMIT_MARKER_PREFIX}{head_sha}"
|
|
691
|
+
f"{self.CODEX_COMMIT_MARKER_SUFFIX}"
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
return comment_body
|
|
695
|
+
|
|
696
|
+
def _get_pr_comment_state(self, repo_full_name: str, pr_number: int) -> Tuple[Optional[str], List[Dict]]:
|
|
697
|
+
"""Fetch PR comment data needed for Codex comment gating"""
|
|
698
|
+
view_cmd = [
|
|
699
|
+
"gh",
|
|
700
|
+
"pr",
|
|
701
|
+
"view",
|
|
702
|
+
str(pr_number),
|
|
703
|
+
"--repo",
|
|
704
|
+
repo_full_name,
|
|
705
|
+
"--json",
|
|
706
|
+
"headRefOid,comments",
|
|
707
|
+
]
|
|
708
|
+
|
|
709
|
+
try:
|
|
710
|
+
result = AutomationUtils.execute_subprocess_with_timeout(
|
|
711
|
+
view_cmd,
|
|
712
|
+
timeout=30
|
|
713
|
+
)
|
|
714
|
+
pr_data = json.loads(result.stdout or "{}")
|
|
715
|
+
head_sha = pr_data.get("headRefOid")
|
|
716
|
+
|
|
717
|
+
# Handle different comment structures from GitHub API
|
|
718
|
+
comments_data = pr_data.get("comments", [])
|
|
719
|
+
if isinstance(comments_data, dict):
|
|
720
|
+
comments = comments_data.get("nodes", [])
|
|
721
|
+
elif isinstance(comments_data, list):
|
|
722
|
+
comments = comments_data
|
|
723
|
+
else:
|
|
724
|
+
comments = []
|
|
725
|
+
|
|
726
|
+
# Ensure comments are sorted by creation time (oldest first)
|
|
727
|
+
# GitHub API should return them sorted, but let's be explicit
|
|
728
|
+
comments.sort(
|
|
729
|
+
key=lambda c: (c.get('createdAt') or c.get('updatedAt') or '')
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
return head_sha, comments
|
|
733
|
+
except subprocess.CalledProcessError as e:
|
|
734
|
+
error_message = e.stderr.strip() if e.stderr else str(e)
|
|
735
|
+
self.logger.warning(
|
|
736
|
+
f"⚠️ Failed to fetch PR comment state for PR #{pr_number}: {error_message}"
|
|
737
|
+
)
|
|
738
|
+
except json.JSONDecodeError as e:
|
|
739
|
+
self.logger.warning(
|
|
740
|
+
f"⚠️ Failed to parse PR comment state for PR #{pr_number}: {e}"
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
return None, []
|
|
744
|
+
|
|
745
|
+
def _get_head_commit_details(
|
|
746
|
+
self,
|
|
747
|
+
repo_full_name: str,
|
|
748
|
+
pr_number: int,
|
|
749
|
+
expected_sha: Optional[str] = None,
|
|
750
|
+
) -> Optional[Dict[str, Optional[str]]]:
|
|
751
|
+
"""Fetch metadata for the PR head commit using the GitHub GraphQL API."""
|
|
752
|
+
|
|
753
|
+
if "/" not in repo_full_name:
|
|
754
|
+
self.logger.debug(
|
|
755
|
+
"⚠️ Cannot fetch commit details for %s - invalid repo format",
|
|
756
|
+
repo_full_name,
|
|
757
|
+
)
|
|
758
|
+
return None
|
|
759
|
+
|
|
760
|
+
owner, name = repo_full_name.split("/", 1)
|
|
761
|
+
|
|
762
|
+
# Validate GitHub naming constraints (alphanumeric, hyphens, periods, underscores, max 100 chars)
|
|
763
|
+
import re
|
|
764
|
+
github_name_pattern = re.compile(r'^[a-zA-Z0-9]([a-zA-Z0-9\-\._]{0,98}[a-zA-Z0-9])?$')
|
|
765
|
+
if not github_name_pattern.match(owner) or not github_name_pattern.match(name):
|
|
766
|
+
self.logger.warning(
|
|
767
|
+
"⚠️ Invalid GitHub identifiers: owner='%s', name='%s'",
|
|
768
|
+
owner,
|
|
769
|
+
name,
|
|
770
|
+
)
|
|
771
|
+
return None
|
|
772
|
+
|
|
773
|
+
# Validate PR number is positive integer
|
|
774
|
+
if not isinstance(pr_number, int) or pr_number <= 0:
|
|
775
|
+
self.logger.warning("⚠️ Invalid PR number: %s", pr_number)
|
|
776
|
+
return None
|
|
777
|
+
|
|
778
|
+
cmd = [
|
|
779
|
+
"gh",
|
|
780
|
+
"api",
|
|
781
|
+
"graphql",
|
|
782
|
+
"-f",
|
|
783
|
+
f"query={self._HEAD_COMMIT_DETAILS_QUERY}",
|
|
784
|
+
"-f",
|
|
785
|
+
f"owner={owner}",
|
|
786
|
+
"-f",
|
|
787
|
+
f"name={name}",
|
|
788
|
+
"-F",
|
|
789
|
+
f"prNumber={pr_number}",
|
|
790
|
+
]
|
|
791
|
+
|
|
792
|
+
try:
|
|
793
|
+
result = AutomationUtils.execute_subprocess_with_timeout(cmd, timeout=30)
|
|
794
|
+
except subprocess.CalledProcessError as exc:
|
|
795
|
+
self.logger.debug(
|
|
796
|
+
"⚠️ Failed to fetch head commit details for %s#%s: %s",
|
|
797
|
+
repo_full_name,
|
|
798
|
+
pr_number,
|
|
799
|
+
exc.stderr or exc,
|
|
800
|
+
)
|
|
801
|
+
return None
|
|
802
|
+
except Exception as exc:
|
|
803
|
+
self.logger.debug(
|
|
804
|
+
"⚠️ Error executing head commit lookup for %s#%s: %s",
|
|
805
|
+
repo_full_name,
|
|
806
|
+
pr_number,
|
|
807
|
+
exc,
|
|
808
|
+
)
|
|
809
|
+
return None
|
|
810
|
+
|
|
811
|
+
try:
|
|
812
|
+
data = json.loads(result.stdout or "{}")
|
|
813
|
+
except json.JSONDecodeError as exc:
|
|
814
|
+
self.logger.debug(
|
|
815
|
+
"⚠️ Failed to decode commit details for %s#%s: %s",
|
|
816
|
+
repo_full_name,
|
|
817
|
+
pr_number,
|
|
818
|
+
exc,
|
|
819
|
+
)
|
|
820
|
+
return None
|
|
821
|
+
|
|
822
|
+
pr_data = (
|
|
823
|
+
data.get("data", {})
|
|
824
|
+
.get("repository", {})
|
|
825
|
+
.get("pullRequest", {})
|
|
826
|
+
)
|
|
827
|
+
commits_data = pr_data.get("commits") or {}
|
|
828
|
+
commit_nodes = commits_data.get("nodes") if isinstance(commits_data, dict) else None
|
|
829
|
+
if not commit_nodes or not isinstance(commit_nodes, list):
|
|
830
|
+
return None
|
|
831
|
+
|
|
832
|
+
commit_info = commit_nodes[-1].get("commit") if commit_nodes else None
|
|
833
|
+
if not commit_info:
|
|
834
|
+
return None
|
|
835
|
+
|
|
836
|
+
commit_sha = commit_info.get("oid")
|
|
837
|
+
if expected_sha and commit_sha and commit_sha != expected_sha:
|
|
838
|
+
# If GitHub served stale data, ignore it to avoid mismatched metadata.
|
|
839
|
+
return None
|
|
840
|
+
|
|
841
|
+
author_info = commit_info.get("author") or {}
|
|
842
|
+
committer_info = commit_info.get("committer") or {}
|
|
843
|
+
|
|
844
|
+
author_login, author_email, author_name = self._extract_actor_fields(author_info)
|
|
845
|
+
committer_login, committer_email, committer_name = self._extract_actor_fields(committer_info)
|
|
846
|
+
|
|
847
|
+
# Log commit detection with redacted emails for privacy
|
|
848
|
+
self.logger.debug(
|
|
849
|
+
"📧 Commit %s: author=%s (%s), committer=%s (%s)",
|
|
850
|
+
commit_sha[:8] if commit_sha else "unknown",
|
|
851
|
+
author_login or "unknown",
|
|
852
|
+
self._redact_email(author_email) if author_email else "no-email",
|
|
853
|
+
committer_login or "unknown",
|
|
854
|
+
self._redact_email(committer_email) if committer_email else "no-email",
|
|
855
|
+
)
|
|
856
|
+
|
|
857
|
+
return {
|
|
858
|
+
"sha": commit_sha,
|
|
859
|
+
"author_login": author_login,
|
|
860
|
+
"author_email": author_email,
|
|
861
|
+
"author_name": author_name,
|
|
862
|
+
"committer_login": committer_login,
|
|
863
|
+
"committer_email": committer_email,
|
|
864
|
+
"committer_name": committer_name,
|
|
865
|
+
"message_headline": commit_info.get("messageHeadline"),
|
|
866
|
+
"message": commit_info.get("message"),
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
def _extract_commit_marker(self, comment_body: str) -> Optional[str]:
|
|
870
|
+
"""Extract commit marker from Codex automation comment"""
|
|
871
|
+
if not comment_body:
|
|
872
|
+
return None
|
|
873
|
+
|
|
874
|
+
prefix_index = comment_body.find(self.CODEX_COMMIT_MARKER_PREFIX)
|
|
875
|
+
if prefix_index == -1:
|
|
876
|
+
return None
|
|
877
|
+
|
|
878
|
+
start_index = prefix_index + len(self.CODEX_COMMIT_MARKER_PREFIX)
|
|
879
|
+
end_index = comment_body.find(self.CODEX_COMMIT_MARKER_SUFFIX, start_index)
|
|
880
|
+
if end_index == -1:
|
|
881
|
+
return None
|
|
882
|
+
|
|
883
|
+
return comment_body[start_index:end_index].strip()
|
|
884
|
+
|
|
885
|
+
def _has_codex_comment_for_commit(self, comments: List[Dict], head_sha: str) -> bool:
|
|
886
|
+
"""Determine if Codex instruction already exists for the latest commit"""
|
|
887
|
+
if not head_sha:
|
|
888
|
+
return False
|
|
889
|
+
|
|
890
|
+
for comment in comments:
|
|
891
|
+
body = comment.get("body", "")
|
|
892
|
+
marker_sha = self._extract_commit_marker(body)
|
|
893
|
+
if marker_sha and marker_sha == head_sha:
|
|
894
|
+
return True
|
|
895
|
+
|
|
896
|
+
return False
|
|
897
|
+
|
|
898
|
+
def _is_head_commit_from_codex(
|
|
899
|
+
self, commit_details: Optional[Dict[str, Optional[str]]]
|
|
900
|
+
) -> bool:
|
|
901
|
+
"""Determine if the head commit was authored or marked by Codex."""
|
|
902
|
+
|
|
903
|
+
if not commit_details:
|
|
904
|
+
return False
|
|
905
|
+
|
|
906
|
+
actor_fields = [
|
|
907
|
+
commit_details.get("author_login"),
|
|
908
|
+
commit_details.get("author_email"),
|
|
909
|
+
commit_details.get("author_name"),
|
|
910
|
+
commit_details.get("committer_login"),
|
|
911
|
+
commit_details.get("committer_email"),
|
|
912
|
+
commit_details.get("committer_name"),
|
|
913
|
+
]
|
|
914
|
+
|
|
915
|
+
for field in actor_fields:
|
|
916
|
+
if field and isinstance(field, str):
|
|
917
|
+
if any(pattern.search(field) for pattern in self._codex_actor_patterns):
|
|
918
|
+
return True
|
|
919
|
+
|
|
920
|
+
message_values = [
|
|
921
|
+
commit_details.get("message_headline"),
|
|
922
|
+
commit_details.get("message"),
|
|
923
|
+
]
|
|
924
|
+
|
|
925
|
+
for message in message_values:
|
|
926
|
+
if message and isinstance(message, str):
|
|
927
|
+
if self._codex_commit_message_pattern.search(message):
|
|
928
|
+
return True
|
|
929
|
+
|
|
930
|
+
return False
|
|
931
|
+
|
|
932
|
+
def _get_comment_author_login(self, comment: Dict) -> str:
|
|
933
|
+
"""Return normalized author login for a comment."""
|
|
934
|
+
author = comment.get("author") or comment.get("user") or {}
|
|
935
|
+
if isinstance(author, dict):
|
|
936
|
+
return (author.get("login") or author.get("name") or "").strip()
|
|
937
|
+
if isinstance(author, str):
|
|
938
|
+
return author.strip()
|
|
939
|
+
return ""
|
|
940
|
+
|
|
941
|
+
def _extract_codex_summary_commit(self, comment_body: str) -> Optional[str]:
|
|
942
|
+
"""Extract commit SHA referenced in Codex summary comment."""
|
|
943
|
+
if not comment_body:
|
|
944
|
+
return None
|
|
945
|
+
|
|
946
|
+
for pattern in self.CODEX_SUMMARY_COMMIT_PATTERNS:
|
|
947
|
+
match = pattern.search(comment_body)
|
|
948
|
+
if match:
|
|
949
|
+
return match.group(1).lower()
|
|
950
|
+
|
|
951
|
+
return None
|
|
952
|
+
|
|
953
|
+
def _has_pending_codex_commit(self, comments: List[Dict], head_sha: str) -> bool:
|
|
954
|
+
"""Detect if latest commit was generated by Codex automation and is still pending."""
|
|
955
|
+
if not head_sha:
|
|
956
|
+
return False
|
|
957
|
+
|
|
958
|
+
normalized_head = head_sha.lower()
|
|
959
|
+
|
|
960
|
+
for comment in comments:
|
|
961
|
+
author_login = self._get_comment_author_login(comment)
|
|
962
|
+
if not author_login or self.CODEX_BOT_IDENTIFIER not in author_login.lower():
|
|
963
|
+
continue
|
|
964
|
+
|
|
965
|
+
summary_commit = self._extract_codex_summary_commit(comment.get("body", ""))
|
|
966
|
+
if not summary_commit:
|
|
967
|
+
continue
|
|
968
|
+
|
|
969
|
+
if summary_commit == normalized_head or normalized_head.startswith(
|
|
970
|
+
summary_commit
|
|
971
|
+
):
|
|
972
|
+
return True
|
|
973
|
+
|
|
974
|
+
return False
|
|
975
|
+
|
|
976
|
+
def process_single_pr_by_number(self, pr_number: int, repository: str) -> bool:
|
|
977
|
+
"""Process a specific PR by number and repository"""
|
|
978
|
+
repo_full = self._normalize_repository_name(repository)
|
|
979
|
+
self.logger.info(f"🎯 Processing target PR: {repo_full} #{pr_number}")
|
|
980
|
+
|
|
981
|
+
# Check global automation limits
|
|
982
|
+
if not self.safety_manager.can_start_global_run():
|
|
983
|
+
self.logger.warning("🚫 Global automation limit reached - cannot process target PR")
|
|
984
|
+
return False
|
|
985
|
+
|
|
986
|
+
try:
|
|
987
|
+
# Check safety limits for this specific PR first
|
|
988
|
+
if not self.safety_manager.try_process_pr(pr_number, repo=repo_full):
|
|
989
|
+
self.logger.warning(f"🚫 Safety limits exceeded for PR {repo_full} #{pr_number}")
|
|
990
|
+
return False
|
|
991
|
+
|
|
992
|
+
# Only record global run AFTER confirming we can process the PR
|
|
993
|
+
if not self.wrapper_managed:
|
|
994
|
+
self.safety_manager.record_global_run()
|
|
995
|
+
current_runs = self.safety_manager.get_global_runs()
|
|
996
|
+
self.logger.info(
|
|
997
|
+
"📊 Recorded global run %s/%s before processing target PR",
|
|
998
|
+
current_runs,
|
|
999
|
+
self.safety_manager.global_limit,
|
|
1000
|
+
)
|
|
1001
|
+
|
|
1002
|
+
# Process PR with guaranteed cleanup
|
|
1003
|
+
try:
|
|
1004
|
+
# Get PR details using gh CLI
|
|
1005
|
+
result = AutomationUtils.execute_subprocess_with_timeout(
|
|
1006
|
+
["gh", "pr", "view", str(pr_number), "--repo", repo_full, "--json", "title,headRefName,baseRefName,url,author"],
|
|
1007
|
+
timeout=30
|
|
1008
|
+
)
|
|
1009
|
+
pr_data = json.loads(result.stdout)
|
|
1010
|
+
|
|
1011
|
+
self.logger.info(f"📝 Found PR: {pr_data['title']}")
|
|
1012
|
+
|
|
1013
|
+
# Post codex instruction comment
|
|
1014
|
+
comment_result = self.post_codex_instruction_simple(repo_full, pr_number, pr_data)
|
|
1015
|
+
success = comment_result == "posted"
|
|
1016
|
+
|
|
1017
|
+
# Record PR processing attempt with result
|
|
1018
|
+
result = "success" if success else "failure"
|
|
1019
|
+
self.safety_manager.record_pr_attempt(
|
|
1020
|
+
pr_number,
|
|
1021
|
+
result,
|
|
1022
|
+
repo=repo_full,
|
|
1023
|
+
branch=pr_data.get('headRefName'),
|
|
1024
|
+
)
|
|
1025
|
+
|
|
1026
|
+
if success:
|
|
1027
|
+
self.logger.info(f"✅ Successfully processed target PR {repo_full} #{pr_number}")
|
|
1028
|
+
else:
|
|
1029
|
+
self.logger.error(f"❌ Failed to process target PR {repo_full} #{pr_number}")
|
|
1030
|
+
|
|
1031
|
+
return success
|
|
1032
|
+
|
|
1033
|
+
except subprocess.CalledProcessError as e:
|
|
1034
|
+
self.logger.error(f"❌ Failed to get PR details for {repo_full} #{pr_number}: {e.stderr}")
|
|
1035
|
+
return False
|
|
1036
|
+
except json.JSONDecodeError as e:
|
|
1037
|
+
self.logger.error(f"❌ Failed to parse PR data for {repo_full} #{pr_number}: {e}")
|
|
1038
|
+
return False
|
|
1039
|
+
finally:
|
|
1040
|
+
# Always release the processing slot
|
|
1041
|
+
self.safety_manager.release_pr_slot(pr_number, repo=repo_full)
|
|
1042
|
+
|
|
1043
|
+
except Exception as e:
|
|
1044
|
+
self.logger.error(f"❌ Unexpected error processing target PR {repo_full} #{pr_number}: {e}")
|
|
1045
|
+
self.logger.debug("Traceback: %s", traceback.format_exc())
|
|
1046
|
+
return False
|
|
1047
|
+
|
|
1048
|
+
def run_monitoring_cycle(self, single_repo=None, max_prs=10):
|
|
1049
|
+
"""Run a complete monitoring cycle with actionable PR counting"""
|
|
1050
|
+
self.logger.info("🚀 Starting jleechanorg PR monitoring cycle")
|
|
1051
|
+
|
|
1052
|
+
if not self.safety_manager.can_start_global_run():
|
|
1053
|
+
current_runs = self.safety_manager.get_global_runs()
|
|
1054
|
+
self.logger.warning(
|
|
1055
|
+
"🚫 Global automation limit reached %s/%s",
|
|
1056
|
+
current_runs,
|
|
1057
|
+
self.safety_manager.global_limit,
|
|
1058
|
+
)
|
|
1059
|
+
self.safety_manager.check_and_notify_limits()
|
|
1060
|
+
return
|
|
1061
|
+
|
|
1062
|
+
global_run_recorded = self.wrapper_managed
|
|
1063
|
+
|
|
1064
|
+
try:
|
|
1065
|
+
open_prs = self.discover_open_prs()
|
|
1066
|
+
except Exception as exc:
|
|
1067
|
+
self.logger.error("❌ Failed to discover PRs: %s", exc)
|
|
1068
|
+
self.logger.debug("Traceback: %s", traceback.format_exc())
|
|
1069
|
+
self.safety_manager.check_and_notify_limits()
|
|
1070
|
+
return
|
|
1071
|
+
|
|
1072
|
+
# Apply single repo filter if specified
|
|
1073
|
+
if single_repo:
|
|
1074
|
+
open_prs = [pr for pr in open_prs if pr["repository"] == single_repo]
|
|
1075
|
+
self.logger.info(f"🎯 Filtering to repository: {single_repo}")
|
|
1076
|
+
|
|
1077
|
+
if not open_prs:
|
|
1078
|
+
self.logger.info("📭 No open PRs found")
|
|
1079
|
+
return
|
|
1080
|
+
|
|
1081
|
+
# Use enhanced actionable counting instead of simple max_prs limit
|
|
1082
|
+
target_actionable_count = max_prs # Convert max_prs to actionable target
|
|
1083
|
+
actionable_processed = 0
|
|
1084
|
+
skipped_count = 0
|
|
1085
|
+
|
|
1086
|
+
for pr in open_prs:
|
|
1087
|
+
if actionable_processed >= target_actionable_count:
|
|
1088
|
+
break
|
|
1089
|
+
|
|
1090
|
+
repo_name = pr["repository"]
|
|
1091
|
+
repo_full_name = self._normalize_repository_name(
|
|
1092
|
+
pr.get("repositoryFullName") or repo_name
|
|
1093
|
+
)
|
|
1094
|
+
pr_number = pr["number"]
|
|
1095
|
+
|
|
1096
|
+
# Check if this PR is actionable (skip if not)
|
|
1097
|
+
if not self.is_pr_actionable(pr):
|
|
1098
|
+
skipped_count += 1
|
|
1099
|
+
continue
|
|
1100
|
+
|
|
1101
|
+
branch_name = pr.get('headRefName', 'unknown')
|
|
1102
|
+
|
|
1103
|
+
if not self.safety_manager.try_process_pr(pr_number, repo=repo_full_name, branch=branch_name):
|
|
1104
|
+
self.logger.info(
|
|
1105
|
+
f"🚫 Safety limits exceeded for PR {repo_full_name} #{pr_number}; skipping"
|
|
1106
|
+
)
|
|
1107
|
+
skipped_count += 1
|
|
1108
|
+
continue
|
|
1109
|
+
|
|
1110
|
+
self.logger.info(f"🎯 Processing PR: {repo_full_name} #{pr_number} - {pr['title']}")
|
|
1111
|
+
|
|
1112
|
+
attempt_recorded = False
|
|
1113
|
+
try:
|
|
1114
|
+
if not global_run_recorded:
|
|
1115
|
+
self.safety_manager.record_global_run()
|
|
1116
|
+
global_run_recorded = True
|
|
1117
|
+
current_runs = self.safety_manager.get_global_runs()
|
|
1118
|
+
self.logger.info(
|
|
1119
|
+
"📊 Recorded global run %s/%s before processing PRs",
|
|
1120
|
+
current_runs,
|
|
1121
|
+
self.safety_manager.global_limit,
|
|
1122
|
+
)
|
|
1123
|
+
|
|
1124
|
+
# Post codex instruction comment directly (comment-only approach)
|
|
1125
|
+
comment_result = self.post_codex_instruction_simple(repo_full_name, pr_number, pr)
|
|
1126
|
+
success = comment_result == "posted"
|
|
1127
|
+
|
|
1128
|
+
result = "success" if success else "failure"
|
|
1129
|
+
self.safety_manager.record_pr_attempt(
|
|
1130
|
+
pr_number,
|
|
1131
|
+
result,
|
|
1132
|
+
repo=repo_full_name,
|
|
1133
|
+
branch=branch_name,
|
|
1134
|
+
)
|
|
1135
|
+
attempt_recorded = True
|
|
1136
|
+
|
|
1137
|
+
if success:
|
|
1138
|
+
self.logger.info(f"✅ Successfully processed PR {repo_full_name} #{pr_number}")
|
|
1139
|
+
actionable_processed += 1
|
|
1140
|
+
else:
|
|
1141
|
+
self.logger.error(f"❌ Failed to process PR {repo_full_name} #{pr_number}")
|
|
1142
|
+
except Exception as e:
|
|
1143
|
+
self.logger.error(f"❌ Exception processing PR {repo_full_name} #{pr_number}: {e}")
|
|
1144
|
+
self.logger.debug("Traceback: %s", traceback.format_exc())
|
|
1145
|
+
# Record failure for safety manager
|
|
1146
|
+
self.safety_manager.record_pr_attempt(pr_number, "failure", repo=repo_full_name, branch=branch_name)
|
|
1147
|
+
attempt_recorded = True
|
|
1148
|
+
finally:
|
|
1149
|
+
# Always release the processing slot if record_pr_attempt didn't do it
|
|
1150
|
+
if not attempt_recorded:
|
|
1151
|
+
self.safety_manager.release_pr_slot(pr_number, repo=repo_full_name, branch=branch_name)
|
|
1152
|
+
|
|
1153
|
+
self.logger.info(f"🏁 Monitoring cycle complete: {actionable_processed} actionable PRs processed, {skipped_count} skipped")
|
|
1154
|
+
|
|
1155
|
+
|
|
1156
|
+
def main():
|
|
1157
|
+
"""CLI interface for jleechanorg PR monitor"""
|
|
1158
|
+
|
|
1159
|
+
parser = argparse.ArgumentParser(description='jleechanorg PR Monitor')
|
|
1160
|
+
parser.add_argument('--dry-run', action='store_true',
|
|
1161
|
+
help='Discover PRs but do not process them')
|
|
1162
|
+
parser.add_argument('--single-repo',
|
|
1163
|
+
help='Process only specific repository')
|
|
1164
|
+
parser.add_argument('--max-prs', type=int, default=5,
|
|
1165
|
+
help='Maximum PRs to process per cycle')
|
|
1166
|
+
parser.add_argument('--target-pr', type=int,
|
|
1167
|
+
help='Process specific PR number')
|
|
1168
|
+
parser.add_argument('--target-repo',
|
|
1169
|
+
help='Repository for target PR (required with --target-pr)')
|
|
1170
|
+
|
|
1171
|
+
args = parser.parse_args()
|
|
1172
|
+
|
|
1173
|
+
# Validate target PR arguments
|
|
1174
|
+
if args.target_pr and not args.target_repo:
|
|
1175
|
+
parser.error('--target-repo is required when using --target-pr')
|
|
1176
|
+
if args.target_repo and not args.target_pr:
|
|
1177
|
+
parser.error('--target-pr is required when using --target-repo')
|
|
1178
|
+
|
|
1179
|
+
monitor = JleechanorgPRMonitor()
|
|
1180
|
+
|
|
1181
|
+
# Handle target PR processing
|
|
1182
|
+
if args.target_pr and args.target_repo:
|
|
1183
|
+
print(f"🎯 Processing target PR: {args.target_repo} #{args.target_pr}")
|
|
1184
|
+
success = monitor.process_single_pr_by_number(args.target_pr, args.target_repo)
|
|
1185
|
+
sys.exit(0 if success else 1)
|
|
1186
|
+
|
|
1187
|
+
if args.dry_run:
|
|
1188
|
+
print("🔍 DRY RUN: Discovering PRs only")
|
|
1189
|
+
prs = monitor.discover_open_prs()
|
|
1190
|
+
|
|
1191
|
+
if args.single_repo:
|
|
1192
|
+
prs = [pr for pr in prs if pr["repository"] == args.single_repo]
|
|
1193
|
+
|
|
1194
|
+
print(f"📋 Found {len(prs)} open PRs:")
|
|
1195
|
+
for pr in prs[:args.max_prs]:
|
|
1196
|
+
print(f" • {pr['repository']} PR #{pr['number']}: {pr['title']}")
|
|
1197
|
+
else:
|
|
1198
|
+
monitor.run_monitoring_cycle(single_repo=args.single_repo, max_prs=args.max_prs)
|
|
1199
|
+
|
|
1200
|
+
|
|
1201
|
+
if __name__ == '__main__':
|
|
1202
|
+
main()
|