jleechanorg-pr-automation 0.1.1__py3-none-any.whl → 0.2.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. jleechanorg_pr_automation/STORAGE_STATE_TESTING_PROTOCOL.md +326 -0
  2. jleechanorg_pr_automation/__init__.py +64 -9
  3. jleechanorg_pr_automation/automation_safety_manager.py +306 -95
  4. jleechanorg_pr_automation/automation_safety_wrapper.py +13 -19
  5. jleechanorg_pr_automation/automation_utils.py +87 -65
  6. jleechanorg_pr_automation/check_codex_comment.py +7 -1
  7. jleechanorg_pr_automation/codex_branch_updater.py +21 -9
  8. jleechanorg_pr_automation/codex_config.py +70 -3
  9. jleechanorg_pr_automation/jleechanorg_pr_monitor.py +1954 -234
  10. jleechanorg_pr_automation/logging_utils.py +86 -0
  11. jleechanorg_pr_automation/openai_automation/__init__.py +3 -0
  12. jleechanorg_pr_automation/openai_automation/codex_github_mentions.py +1111 -0
  13. jleechanorg_pr_automation/openai_automation/debug_page_content.py +88 -0
  14. jleechanorg_pr_automation/openai_automation/oracle_cli.py +364 -0
  15. jleechanorg_pr_automation/openai_automation/test_auth_restoration.py +244 -0
  16. jleechanorg_pr_automation/openai_automation/test_codex_comprehensive.py +355 -0
  17. jleechanorg_pr_automation/openai_automation/test_codex_integration.py +254 -0
  18. jleechanorg_pr_automation/orchestrated_pr_runner.py +516 -0
  19. jleechanorg_pr_automation/tests/__init__.py +0 -0
  20. jleechanorg_pr_automation/tests/test_actionable_counting_matrix.py +84 -86
  21. jleechanorg_pr_automation/tests/test_attempt_limit_logic.py +124 -0
  22. jleechanorg_pr_automation/tests/test_automation_marker_functions.py +175 -0
  23. jleechanorg_pr_automation/tests/test_automation_over_running_reproduction.py +9 -11
  24. jleechanorg_pr_automation/tests/test_automation_safety_limits.py +91 -79
  25. jleechanorg_pr_automation/tests/test_automation_safety_manager_comprehensive.py +53 -53
  26. jleechanorg_pr_automation/tests/test_codex_actor_matching.py +1 -1
  27. jleechanorg_pr_automation/tests/test_fixpr_prompt.py +54 -0
  28. jleechanorg_pr_automation/tests/test_fixpr_return_value.py +140 -0
  29. jleechanorg_pr_automation/tests/test_graphql_error_handling.py +26 -26
  30. jleechanorg_pr_automation/tests/test_model_parameter.py +317 -0
  31. jleechanorg_pr_automation/tests/test_orchestrated_pr_runner.py +697 -0
  32. jleechanorg_pr_automation/tests/test_packaging_integration.py +127 -0
  33. jleechanorg_pr_automation/tests/test_pr_filtering_matrix.py +246 -193
  34. jleechanorg_pr_automation/tests/test_pr_monitor_eligibility.py +354 -0
  35. jleechanorg_pr_automation/tests/test_pr_targeting.py +102 -7
  36. jleechanorg_pr_automation/tests/test_version_consistency.py +51 -0
  37. jleechanorg_pr_automation/tests/test_workflow_specific_limits.py +202 -0
  38. jleechanorg_pr_automation/tests/test_workspace_dispatch_missing_dir.py +119 -0
  39. jleechanorg_pr_automation/utils.py +81 -56
  40. jleechanorg_pr_automation-0.2.45.dist-info/METADATA +864 -0
  41. jleechanorg_pr_automation-0.2.45.dist-info/RECORD +45 -0
  42. jleechanorg_pr_automation-0.1.1.dist-info/METADATA +0 -222
  43. jleechanorg_pr_automation-0.1.1.dist-info/RECORD +0 -23
  44. {jleechanorg_pr_automation-0.1.1.dist-info → jleechanorg_pr_automation-0.2.45.dist-info}/WHEEL +0 -0
  45. {jleechanorg_pr_automation-0.1.1.dist-info → jleechanorg_pr_automation-0.2.45.dist-info}/entry_points.txt +0 -0
  46. {jleechanorg_pr_automation-0.1.1.dist-info → jleechanorg_pr_automation-0.2.45.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1111 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ OpenAI Codex GitHub Mentions Automation
4
+
5
+ Connects to existing Chrome browser, logs into OpenAI, finds all "GitHub mention"
6
+ tasks in Codex, and clicks "Update PR" on each one.
7
+
8
+ Uses Chrome DevTools Protocol (CDP) to connect to existing browser instance,
9
+ avoiding detection as automation.
10
+
11
+ Usage:
12
+ # Start Chrome with remote debugging (if not already running):
13
+ ./scripts/openai_automation/start_chrome_debug.sh
14
+
15
+ # Run this script:
16
+ python3 scripts/openai_automation/codex_github_mentions.py
17
+
18
+ # With custom CDP port:
19
+ python3 scripts/openai_automation/codex_github_mentions.py --cdp-port 9222
20
+ """
21
+
22
+ import argparse
23
+ import asyncio
24
+ import json
25
+ import logging
26
+ import sys
27
+ import time
28
+ import traceback
29
+ from datetime import datetime
30
+ from pathlib import Path
31
+ from typing import Dict, List, Optional, Set
32
+ from urllib.parse import urlparse
33
+
34
+ from playwright.async_api import (
35
+ Browser,
36
+ BrowserContext,
37
+ Page,
38
+ Playwright,
39
+ TimeoutError as PlaywrightTimeoutError,
40
+ async_playwright,
41
+ )
42
+
43
+ from ..logging_utils import setup_logging as _setup_logging
44
+
45
+
46
+ # Set up logging delegated to centralized logging_utils
47
+ def setup_logging():
48
+ """Set up logging to /tmp directory using centralized logging_utils."""
49
+ log_dir = Path("/tmp/automate_codex_update")
50
+ log_file = log_dir / "codex_automation.log"
51
+
52
+ logger = _setup_logging("codex_automation", log_file=str(log_file))
53
+
54
+ return logger
55
+
56
+
57
+ logger = setup_logging()
58
+
59
+ # Storage state path for persisting authentication.
60
+ # This file contains sensitive session data; enforce restrictive permissions.
61
+ AUTH_STATE_PATH = Path.home() / ".chatgpt_codex_auth_state.json"
62
+
63
+
64
+ def _ensure_auth_state_permissions(path: Path) -> None:
65
+ try:
66
+ path.parent.mkdir(parents=True, exist_ok=True)
67
+ if path.exists():
68
+ path.chmod(0o600)
69
+ except OSError as exc:
70
+ logger.warning(
71
+ "Could not ensure secure permissions on auth state file %s: %s",
72
+ path,
73
+ exc,
74
+ )
75
+
76
+
77
+ class CodexGitHubMentionsAutomation:
78
+ """Automates finding and updating GitHub mention tasks in OpenAI Codex."""
79
+
80
+ def __init__(
81
+ self,
82
+ cdp_url: Optional[str] = None,
83
+ headless: bool = False,
84
+ task_limit: Optional[int] = 50,
85
+ user_data_dir: Optional[str] = None,
86
+ debug: bool = False,
87
+ all_tasks: bool = False,
88
+ archive_mode: bool = False,
89
+ archive_limit: int = 5,
90
+ auto_archive: bool = True,
91
+ ):
92
+ """
93
+ Initialize the automation.
94
+
95
+ Args:
96
+ cdp_url: Chrome DevTools Protocol WebSocket URL (None = launch new browser)
97
+ headless: Run in headless mode (not recommended - may be detected)
98
+ task_limit: Maximum number of tasks to process (default: 50, None = all GitHub Mention tasks)
99
+ user_data_dir: Chrome profile directory for persistent login (default: ~/.chrome-codex-automation)
100
+ debug: Enable debug mode (screenshots, HTML dump, keep browser open)
101
+ archive_mode: If True, archive completed tasks ONLY (skip update phase)
102
+ archive_limit: Maximum number of tasks to archive (default: 5)
103
+ auto_archive: If True, automatically archive after updating (default: True)
104
+ """
105
+ self.cdp_url = cdp_url
106
+ self.headless = headless
107
+ self.task_limit = task_limit
108
+ self.user_data_dir = user_data_dir or str(Path.home() / ".chrome-codex-automation")
109
+ self.debug = debug
110
+ self.all_tasks = all_tasks
111
+ self.archive_mode = archive_mode
112
+ self.archive_limit = archive_limit
113
+ self.auto_archive = auto_archive
114
+ self.playwright: Optional[Playwright] = None
115
+ self.browser: Optional[Browser] = None
116
+ self.context: Optional[BrowserContext] = None
117
+ self.page: Optional[Page] = None
118
+
119
+ async def start_playwright(self) -> Playwright:
120
+ if self.playwright is None:
121
+ self.playwright = await async_playwright().start()
122
+ return self.playwright
123
+
124
+ async def connect_to_existing_browser(self) -> bool:
125
+ """Connect to an existing Chrome instance over CDP."""
126
+ await self.start_playwright()
127
+
128
+ if not self.cdp_url:
129
+ self.cdp_url = "http://127.0.0.1:9222"
130
+
131
+ print(f"🔌 Connecting to existing Chrome at {self.cdp_url}...")
132
+ logger.info(f"Connecting to Chrome at {self.cdp_url}")
133
+
134
+ try:
135
+ self.browser = await self.playwright.chromium.connect_over_cdp(self.cdp_url)
136
+ print(f"✅ Connected to Chrome (version: {self.browser.version})")
137
+ logger.info(f"Successfully connected to Chrome (version: {self.browser.version})")
138
+
139
+ contexts = self.browser.contexts
140
+ if contexts:
141
+ self.context = contexts[0]
142
+ print(f"📱 Using existing context with {len(self.context.pages)} page(s)")
143
+ else:
144
+ self.context = await self.browser.new_context()
145
+ print("📱 Created new browser context")
146
+
147
+ self.page = await self._select_existing_page()
148
+ if self.page:
149
+ print("📄 Reusing existing tab for automation")
150
+ else:
151
+ self.page = await self.context.new_page()
152
+ print("📄 Created new page for automation")
153
+ return True
154
+ except Exception as e:
155
+ print(f"❌ Failed to connect via CDP: {e}")
156
+ logger.warning(f"CDP connection failed: {e}")
157
+ return False
158
+
159
+ async def _select_existing_page(self) -> Optional[Page]:
160
+ """Reuse a ready ChatGPT/Codex tab, preferring Codex over generic chat."""
161
+ if not self.context or not self.context.pages:
162
+ return None
163
+
164
+ async def is_ready(page: Page) -> bool:
165
+ try:
166
+ if page.is_closed():
167
+ return False
168
+ title = await page.title()
169
+ return title.strip().lower() != "just a moment..."
170
+ except Exception:
171
+ return False
172
+
173
+ candidates = [page for page in self.context.pages if not page.is_closed()]
174
+
175
+ for page in candidates:
176
+ try:
177
+ if "chatgpt.com/codex" in (page.url or "") and await is_ready(page):
178
+ return page
179
+ except Exception:
180
+ continue
181
+
182
+ for page in candidates:
183
+ try:
184
+ if "chatgpt.com" in (page.url or "") and await is_ready(page):
185
+ return page
186
+ except Exception:
187
+ continue
188
+
189
+ return None
190
+
191
+ async def _ensure_page(self) -> bool:
192
+ """Ensure there is an active page, creating one if needed."""
193
+ try:
194
+ if self.page and not self.page.is_closed():
195
+ return True
196
+ except Exception as exc:
197
+ logger.debug(
198
+ "Error while checking existing page state; attempting to create a new page: %s",
199
+ exc,
200
+ )
201
+
202
+ if not self.context:
203
+ return False
204
+
205
+ try:
206
+ self.page = await self.context.new_page()
207
+ return True
208
+ except Exception as exc:
209
+ logger.debug("Failed to create new page: %s", exc)
210
+ return False
211
+
212
+ async def setup(self) -> bool:
213
+ """Set up browser connection (connect or launch new)."""
214
+ await self.start_playwright()
215
+
216
+ connected = False
217
+ if self.cdp_url:
218
+ connected = await self.connect_to_existing_browser()
219
+
220
+ if not connected:
221
+ # Check if we have saved authentication state
222
+ storage_state = None
223
+ if AUTH_STATE_PATH.exists():
224
+ _ensure_auth_state_permissions(AUTH_STATE_PATH)
225
+ print(f"📂 Found saved authentication state at {AUTH_STATE_PATH}")
226
+ logger.info(f"Loading authentication state from {AUTH_STATE_PATH}")
227
+ storage_state = str(AUTH_STATE_PATH)
228
+
229
+ # Launch browser (not persistent context - use storage state instead)
230
+ print(f"🚀 Launching Chrome...")
231
+ logger.info(f"Launching Chrome")
232
+
233
+ self.browser = await self.playwright.chromium.launch(
234
+ headless=self.headless,
235
+ )
236
+
237
+ # Create context with storage state if available
238
+ if storage_state:
239
+ self.context = await self.browser.new_context(storage_state=storage_state)
240
+ print("✅ Restored previous authentication state")
241
+ logger.info("Restored authentication state from storage")
242
+ else:
243
+ self.context = await self.browser.new_context()
244
+ print("🆕 Creating new authentication state (will save after login)")
245
+ logger.info("Creating new browser context")
246
+
247
+ # Create page
248
+ self.page = await self.context.new_page()
249
+
250
+ return True
251
+
252
+ async def ensure_openai_login(self):
253
+ """Navigate to OpenAI and ensure user is logged in."""
254
+ print("\n🔐 Checking OpenAI login status...")
255
+
256
+ if not await self._ensure_page():
257
+ print("❌ Unable to create browser page for login check")
258
+ return False
259
+
260
+ try:
261
+ current_url = self.page.url or ""
262
+ except Exception:
263
+ current_url = ""
264
+
265
+ if "chatgpt.com" not in current_url:
266
+ try:
267
+ await self.page.goto("https://chatgpt.com/", wait_until="networkidle")
268
+ except PlaywrightTimeoutError:
269
+ await self.page.goto("https://chatgpt.com/", wait_until="domcontentloaded")
270
+ await asyncio.sleep(2)
271
+
272
+ try:
273
+ await self.page.wait_for_selector(
274
+ 'button[aria-label*="User"], [data-testid="profile-button"]',
275
+ timeout=5000,
276
+ )
277
+ print("✅ Already logged in to OpenAI")
278
+
279
+ # Save authentication state if not already saved
280
+ if not AUTH_STATE_PATH.exists():
281
+ await self.context.storage_state(path=str(AUTH_STATE_PATH))
282
+ _ensure_auth_state_permissions(AUTH_STATE_PATH)
283
+ print(f"💾 Authentication state saved to {AUTH_STATE_PATH}")
284
+ logger.info(f"Saved authentication state to {AUTH_STATE_PATH}")
285
+
286
+ return True
287
+ except PlaywrightTimeoutError:
288
+ # If not logged in, try to restore from auth state file first (even in CDP mode)
289
+ if AUTH_STATE_PATH.exists():
290
+ print(f"🔄 Not logged in. Attempting to restore auth state from {AUTH_STATE_PATH}...")
291
+ try:
292
+ _ensure_auth_state_permissions(AUTH_STATE_PATH)
293
+ state_content = AUTH_STATE_PATH.read_text()
294
+ state_data = json.loads(state_content)
295
+
296
+ cookies = state_data.get("cookies")
297
+ if isinstance(cookies, list):
298
+ valid_cookies = []
299
+ # Required fields for Playwright add_cookies
300
+ # Must have name, value AND (url OR (domain AND path))
301
+ required_fields = {"name", "value"}
302
+ domain_fields = {"domain", "path"}
303
+
304
+ for cookie in cookies:
305
+ if not isinstance(cookie, dict):
306
+ logger.warning("Skipping non-dict cookie entry")
307
+ continue
308
+
309
+ # Check basic fields
310
+ if not required_fields.issubset(cookie.keys()):
311
+ cookie_name = cookie.get("name", "<unknown>")
312
+ logger.warning(
313
+ "Skipping malformed cookie '%s' missing required fields %s",
314
+ cookie_name,
315
+ required_fields,
316
+ )
317
+ continue
318
+
319
+ # Check domain/path vs url constraint
320
+ has_url = "url" in cookie
321
+ has_domain_and_path = domain_fields.issubset(cookie.keys())
322
+
323
+ if not (has_url or has_domain_and_path):
324
+ cookie_name = cookie.get("name", "<unknown>")
325
+ logger.warning(
326
+ "Skipping cookie '%s' missing either 'url' or both 'domain' and 'path'",
327
+ cookie_name,
328
+ )
329
+ continue
330
+
331
+ valid_cookies.append(cookie)
332
+
333
+ if valid_cookies:
334
+ await self.context.add_cookies(valid_cookies)
335
+ print("✅ Injected cookies from auth state file")
336
+ logger.info(
337
+ "Injected %d cookies from auth state file %s",
338
+ len(valid_cookies),
339
+ AUTH_STATE_PATH,
340
+ )
341
+
342
+ # Restore localStorage from origins
343
+ origins = state_data.get("origins", [])
344
+ if origins:
345
+ try:
346
+ current_url = self.page.url
347
+ current_parsed = urlparse(current_url)
348
+ injected_origins = 0
349
+
350
+ for origin_data in origins:
351
+ origin = origin_data.get("origin")
352
+ if not origin:
353
+ continue
354
+
355
+ # Use exact origin matching (scheme + netloc)
356
+ origin_parsed = urlparse(origin)
357
+ origin_matches = (
358
+ current_parsed.scheme == origin_parsed.scheme
359
+ and current_parsed.netloc == origin_parsed.netloc
360
+ )
361
+
362
+ if origin_matches:
363
+ logger.info(f"Restoring localStorage for origin {origin}")
364
+ storage_items = origin_data.get("localStorage", [])
365
+ items_injected = 0
366
+ if storage_items:
367
+ for item in storage_items:
368
+ key = item.get("name")
369
+ value = item.get("value")
370
+ # Allow empty strings as valid values (use None check)
371
+ if key is not None and value is not None:
372
+ await self.page.evaluate(
373
+ f"window.localStorage.setItem({json.dumps(key)}, {json.dumps(value)})"
374
+ )
375
+ items_injected += 1
376
+ if items_injected > 0:
377
+ injected_origins += 1
378
+
379
+ if injected_origins > 0:
380
+ print(f"✅ Injected localStorage for {injected_origins} origin(s)")
381
+ logger.info(f"Injected localStorage for {injected_origins} origin(s)")
382
+ except Exception as storage_err:
383
+ logger.warning(f"Failed to restore localStorage: {storage_err}")
384
+ print(f"⚠️ Failed to restore localStorage: {storage_err}")
385
+
386
+ # Refresh page to apply cookies and storage
387
+ await self.page.reload(wait_until="domcontentloaded")
388
+ await asyncio.sleep(3)
389
+
390
+ # Check login again
391
+ try:
392
+ await self.page.wait_for_selector(
393
+ 'button[aria-label*="User"], [data-testid="profile-button"]',
394
+ timeout=5000,
395
+ )
396
+ print("✅ Successfully restored session from auth state")
397
+ return True
398
+ except PlaywrightTimeoutError:
399
+ print("⚠️ Session restore failed - cookies might be expired")
400
+ else:
401
+ print("⚠️ No valid cookies found in auth state file")
402
+ logger.warning(
403
+ "No valid cookies found in auth state file %s; skipping cookie injection",
404
+ AUTH_STATE_PATH,
405
+ )
406
+ else:
407
+ if "cookies" not in state_data:
408
+ print("⚠️ No 'cookies' key found in auth state file")
409
+ logger.warning(
410
+ "Auth state file %s has no 'cookies' key",
411
+ AUTH_STATE_PATH,
412
+ )
413
+ elif cookies is None:
414
+ print("⚠️ Cookies are null in auth state file")
415
+ logger.warning(
416
+ "Auth state file %s has null 'cookies' value",
417
+ AUTH_STATE_PATH,
418
+ )
419
+ else:
420
+ print("⚠️ Invalid cookies format in auth state file (expected list)")
421
+ logger.warning(
422
+ "Invalid cookies format in auth state file %s: expected list, got %s",
423
+ AUTH_STATE_PATH,
424
+ type(cookies).__name__,
425
+ )
426
+
427
+ except Exception as restore_err:
428
+ logger.exception("Failed to restore auth state from %s", AUTH_STATE_PATH)
429
+ print(f"⚠️ Failed to restore auth state: {restore_err!r}")
430
+
431
+ try:
432
+ await self.page.wait_for_selector(
433
+ 'text="Log in", button:has-text("Log in")',
434
+ timeout=3000,
435
+ )
436
+ print("⚠️ Not logged in to OpenAI")
437
+
438
+ # Check if running in non-interactive mode (cron/CI)
439
+ if not sys.stdin.isatty():
440
+ print("❌ ERROR: Authentication required but running in non-interactive mode")
441
+ print(" Solution: Log in manually via Chrome with CDP enabled, then run again")
442
+ print(f" The script will save auth state to {AUTH_STATE_PATH}")
443
+ return False
444
+
445
+ print("\n🚨 MANUAL ACTION REQUIRED:")
446
+ print(" 1. Log in to OpenAI in the browser window")
447
+ print(" 2. Wait for login to complete")
448
+ print(" 3. Press Enter here to continue...")
449
+ input()
450
+
451
+ print("🔄 Re-checking OpenAI login status after manual login...")
452
+ try:
453
+ await self.page.wait_for_selector(
454
+ 'button[aria-label*="User"], [data-testid="profile-button"]',
455
+ timeout=5000,
456
+ )
457
+ await self.context.storage_state(path=str(AUTH_STATE_PATH))
458
+ _ensure_auth_state_permissions(AUTH_STATE_PATH)
459
+ print(f"💾 New authentication state saved to {AUTH_STATE_PATH}")
460
+ logger.info(f"Saved new authentication state after manual login to {AUTH_STATE_PATH}")
461
+ return True
462
+ except PlaywrightTimeoutError:
463
+ print("❌ Still not logged in to OpenAI after manual login step")
464
+ return False
465
+
466
+ except PlaywrightTimeoutError:
467
+ print("⚠️ Could not determine login status")
468
+ print(" Assuming you're logged in and continuing...")
469
+ return True
470
+ except Exception as login_error:
471
+ print(f"⚠️ Unexpected login detection error: {login_error}")
472
+ return False
473
+ except Exception as user_menu_error:
474
+ print(f"⚠️ Unexpected login check error: {user_menu_error}")
475
+ return False
476
+
477
+ async def navigate_to_codex(self):
478
+ """Navigate to OpenAI Codex tasks page."""
479
+ print("\n📍 Navigating to Codex...")
480
+ logger.info("Navigating to Codex...")
481
+
482
+ codex_url = "https://chatgpt.com/codex"
483
+
484
+ if not await self._ensure_page():
485
+ raise RuntimeError("No active browser page available for Codex navigation")
486
+
487
+ await self.page.goto(codex_url, wait_until="domcontentloaded", timeout=30000)
488
+
489
+ # Wait for Cloudflare challenge to complete
490
+ print(" Waiting for Cloudflare challenge (if any)...")
491
+ max_wait = 90 # 90 seconds max wait
492
+ waited = 0
493
+ while waited < max_wait:
494
+ title = await self.page.title()
495
+ if title != "Just a moment...":
496
+ break
497
+ await asyncio.sleep(2)
498
+ waited += 2
499
+ if waited % 10 == 0:
500
+ print(f" Still waiting... ({waited}s)")
501
+
502
+ # Extra wait for dynamic content to load after Cloudflare
503
+ await asyncio.sleep(5)
504
+
505
+ final_title = await self.page.title()
506
+ print(f"✅ Navigated to {codex_url} (title: {final_title})")
507
+ logger.info(f"Successfully navigated to {codex_url} (title: {final_title})")
508
+
509
+ async def find_github_mention_tasks(self) -> List[Dict[str, str]]:
510
+ """
511
+ Find task links in Codex.
512
+
513
+ By default, filters for "GitHub Mention" tasks and applies task_limit.
514
+ If all_tasks is True, collects the first N Codex tasks regardless of title.
515
+ """
516
+ if self.task_limit == 0:
517
+ print("⚠️ Task limit set to 0 - skipping")
518
+ return []
519
+
520
+ try:
521
+ print(" Waiting for content to load...")
522
+ await asyncio.sleep(5)
523
+
524
+ primary_selector = 'a[href*="/codex/tasks/"]'
525
+ filtered_selector = f'{primary_selector}:has-text("GitHub Mention:")'
526
+ selector_candidates = [primary_selector] if self.all_tasks else [
527
+ filtered_selector,
528
+ 'a:has-text("GitHub Mention:")',
529
+ primary_selector,
530
+ ]
531
+
532
+ if self.debug:
533
+ debug_dir = Path("/tmp/automate_codex_update")
534
+ debug_dir.mkdir(parents=True, exist_ok=True)
535
+
536
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
537
+ screenshot_path = debug_dir / f"debug_screenshot_{timestamp}.png"
538
+ html_path = debug_dir / f"debug_html_{timestamp}.html"
539
+
540
+ await self.page.screenshot(path=str(screenshot_path))
541
+ html_content = await self.page.content()
542
+ html_path.write_text(html_content)
543
+
544
+ print(f"🐛 Debug: Screenshot saved to {screenshot_path}")
545
+ print(f"🐛 Debug: HTML saved to {html_path}")
546
+ print(f"🐛 Debug: Current URL: {self.page.url}")
547
+ print(f"🐛 Debug: Page title: {await self.page.title()}")
548
+
549
+ per_tab_limit = None if self.task_limit is None else self.task_limit
550
+ tasks = await self._collect_task_links(selector_candidates, per_tab_limit, tab_label="Tasks")
551
+
552
+ if await self._switch_to_tab("Code reviews"):
553
+ code_review_tasks = await self._collect_task_links(
554
+ selector_candidates,
555
+ per_tab_limit,
556
+ tab_label="Code reviews",
557
+ )
558
+ tasks.extend(code_review_tasks)
559
+
560
+ if not tasks:
561
+ print("⚠️ Still no tasks found")
562
+ return []
563
+
564
+ deduped: List[Dict[str, str]] = []
565
+ seen: Set[str] = set()
566
+ for task in tasks:
567
+ href = task.get("href", "")
568
+ if not href or href in seen:
569
+ continue
570
+ seen.add(href)
571
+ deduped.append(task)
572
+
573
+ if self.task_limit is not None:
574
+ deduped = deduped[: self.task_limit]
575
+
576
+ print(f"✅ Prepared {len(deduped)} task link(s) for processing")
577
+ logger.info(f"Prepared {len(deduped)} task link(s) across tabs")
578
+ return deduped
579
+
580
+ except Exception as e:
581
+ print(f"❌ Error finding tasks: {e}")
582
+ logger.error(f"Error finding tasks: {e}")
583
+ return []
584
+
585
+ async def _collect_task_links(
586
+ self,
587
+ selector_candidates: List[str],
588
+ limit: Optional[int],
589
+ tab_label: str,
590
+ ) -> List[Dict[str, str]]:
591
+ locator_selector = selector_candidates[0]
592
+ locator = self.page.locator(locator_selector)
593
+ task_count = await locator.count()
594
+ if task_count == 0 and len(selector_candidates) > 1:
595
+ for candidate in selector_candidates[1:]:
596
+ locator = self.page.locator(candidate)
597
+ task_count = await locator.count()
598
+ if task_count > 0:
599
+ locator_selector = candidate
600
+ break
601
+
602
+ print(f"\n🔍 Searching for tasks in {tab_label} using selector: {locator_selector}")
603
+
604
+ if task_count == 0:
605
+ print("⚠️ No tasks found, retrying after short wait...")
606
+ await asyncio.sleep(5)
607
+ task_count = await locator.count()
608
+ if task_count == 0:
609
+ return []
610
+
611
+ local_limit = task_count if limit is None else min(task_count, limit)
612
+ tasks: List[Dict[str, str]] = []
613
+ for idx in range(local_limit):
614
+ item = locator.nth(idx)
615
+ href = await item.get_attribute("href") or ""
616
+ text = (await item.text_content()) or ""
617
+ tasks.append({"href": href, "text": text})
618
+ return tasks
619
+
620
+ async def _switch_to_tab(self, label: str) -> bool:
621
+ selectors = [
622
+ f'button:has-text("{label}")',
623
+ f'a:has-text("{label}")',
624
+ f'[role="tab"]:has-text("{label}")',
625
+ ]
626
+ for selector in selectors:
627
+ locator = self.page.locator(selector)
628
+ try:
629
+ if await locator.count() > 0:
630
+ await locator.first.click()
631
+ await asyncio.sleep(2)
632
+ return True
633
+ except Exception as exc:
634
+ logger.debug("Failed to switch to tab %s with %s: %s", label, selector, exc)
635
+ continue
636
+ logger.debug("Unable to switch to tab %s using selectors %s", label, selectors)
637
+ return False
638
+
639
+ async def update_pr_for_task(self, task_link: Dict[str, str]):
640
+ """
641
+ Open task and click 'Update branch' button to update the PR.
642
+
643
+ Args:
644
+ task_link: Mapping containing href and text preview for the task
645
+ """
646
+ href = task_link.get("href", "")
647
+ task_text_raw = task_link.get("text", "")
648
+ task_text = (task_text_raw or "").strip()[:80] or "(no text)"
649
+
650
+ target_url = href if href.startswith("http") else f"https://chatgpt.com{href}"
651
+
652
+ for attempt in range(2):
653
+ if not await self._ensure_page():
654
+ print(" ❌ No active browser page available to update task")
655
+ if attempt == 0:
656
+ continue
657
+ return False
658
+
659
+ try:
660
+ print(f" Navigating to task: {task_text}")
661
+ await self.page.goto(target_url, wait_until="domcontentloaded", timeout=30000)
662
+ await asyncio.sleep(3)
663
+
664
+ update_branch_locator = self.page.locator('button:has-text("Update branch")')
665
+
666
+ if await update_branch_locator.count() > 0:
667
+ await update_branch_locator.first.click()
668
+ print(" ✅ Clicked 'Update branch' button")
669
+ await asyncio.sleep(2)
670
+ else:
671
+ print(" ⚠️ 'Update branch' button not found")
672
+ return False
673
+
674
+ except Exception as e:
675
+ error_text = str(e)
676
+ print(f" ❌ Failed to update PR: {e}")
677
+ if "Target page, context or browser has been closed" in error_text and attempt == 0:
678
+ print(" 🔄 Page was closed; reopening a new tab and retrying...")
679
+ continue
680
+ return False
681
+
682
+ try:
683
+ await self.page.goto("https://chatgpt.com/codex", wait_until="domcontentloaded", timeout=30000)
684
+ await asyncio.sleep(3)
685
+ except Exception as nav_err:
686
+ print(f" ⚠️ Failed to navigate back to Codex after update: {nav_err}")
687
+ return True
688
+
689
+ async def archive_completed_task(self, task_link: Dict[str, str]) -> Optional[str]:
690
+ """
691
+ Archive a task that shows 'View PR' instead of 'Update branch'.
692
+
693
+ Args:
694
+ task_link: Mapping containing href and text preview for the task
695
+
696
+ Returns:
697
+ The task URL if archived successfully, None otherwise
698
+ """
699
+ href = task_link.get("href", "")
700
+ task_text_raw = task_link.get("text", "")
701
+ task_text = (task_text_raw or "").strip()[:80] or "(no text)"
702
+
703
+ target_url = href if href.startswith("http") else f"https://chatgpt.com{href}"
704
+
705
+ for attempt in range(2):
706
+ if not await self._ensure_page():
707
+ print(" ❌ No active browser page available to archive task")
708
+ if attempt == 0:
709
+ continue
710
+ return None
711
+
712
+ try:
713
+ print(f" Navigating to task: {task_text}")
714
+ await self.page.goto(target_url, wait_until="domcontentloaded", timeout=30000)
715
+ await asyncio.sleep(3)
716
+
717
+ # Check if this task has "View PR" (completed) instead of "Update branch"
718
+ view_pr_locator = self.page.locator('a:has-text("View PR"), button:has-text("View PR")')
719
+ update_branch_locator = self.page.locator('button:has-text("Update branch")')
720
+
721
+ if await update_branch_locator.count() > 0:
722
+ print(" ⏭️ Task has 'Update branch' - not completed, skipping archive")
723
+ return None
724
+
725
+ if await view_pr_locator.count() == 0:
726
+ print(" ⚠️ Neither 'View PR' nor 'Update branch' found - skipping")
727
+ return None
728
+
729
+ print(" 📋 Task shows 'View PR' - eligible for archive")
730
+
731
+ # Look for archive button - try multiple selectors
732
+ archive_selectors = [
733
+ 'button:has-text("Archive")',
734
+ 'button[aria-label*="archive" i]',
735
+ '[data-testid="archive-button"]',
736
+ 'button:has-text("Mark as done")',
737
+ 'button:has-text("Complete")',
738
+ ]
739
+
740
+ archived = False
741
+ for selector in archive_selectors:
742
+ archive_locator = self.page.locator(selector)
743
+ if await archive_locator.count() > 0:
744
+ await archive_locator.first.click()
745
+ print(f" ✅ Clicked archive button (selector: {selector})")
746
+ archived = True
747
+ await asyncio.sleep(2)
748
+ break
749
+
750
+ if not archived:
751
+ # Try clicking a menu/kebab button first to reveal archive option
752
+ menu_selectors = [
753
+ 'button[aria-label*="menu" i]',
754
+ 'button[aria-label*="more" i]',
755
+ '[data-testid="task-menu"]',
756
+ 'button:has-text("⋮")',
757
+ 'button:has-text("...")',
758
+ ]
759
+ for menu_sel in menu_selectors:
760
+ menu_locator = self.page.locator(menu_sel)
761
+ if await menu_locator.count() > 0:
762
+ await menu_locator.first.click()
763
+ await asyncio.sleep(1)
764
+ # Now try archive selectors again
765
+ for selector in archive_selectors:
766
+ archive_locator = self.page.locator(selector)
767
+ if await archive_locator.count() > 0:
768
+ await archive_locator.first.click()
769
+ print(f" ✅ Clicked archive from menu (selector: {selector})")
770
+ archived = True
771
+ await asyncio.sleep(2)
772
+ break
773
+ if archived:
774
+ break
775
+
776
+ if not archived:
777
+ print(" ⚠️ Could not find archive button")
778
+ return None
779
+
780
+ return target_url
781
+
782
+ except Exception as e:
783
+ error_text = str(e)
784
+ print(f" ❌ Failed to archive task: {e}")
785
+ if "Target page, context or browser has been closed" in error_text and attempt == 0:
786
+ print(" 🔄 Page was closed; reopening a new tab and retrying...")
787
+ continue
788
+ return None
789
+
790
+ finally:
791
+ try:
792
+ await self.page.goto("https://chatgpt.com/codex", wait_until="domcontentloaded", timeout=30000)
793
+ await asyncio.sleep(3)
794
+ except Exception as nav_err:
795
+ print(f" ⚠️ Failed to navigate back to Codex after archive: {nav_err}")
796
+
797
+ return None
798
+
799
+ async def archive_completed_github_mentions(self, limit: int = 5) -> List[str]:
800
+ """
801
+ Find GitHub mention tasks with 'View PR' and archive them.
802
+
803
+ Args:
804
+ limit: Maximum number of tasks to archive
805
+
806
+ Returns:
807
+ List of archived task URLs
808
+ """
809
+ tasks = await self.find_github_mention_tasks()
810
+
811
+ if not tasks:
812
+ print("\n🎯 No GitHub mention tasks to check for archiving")
813
+ logger.info("No tasks found for archiving")
814
+ return []
815
+
816
+ print(f"\n🗄️ Checking {len(tasks)} task(s) for archiving (limit: {limit})...")
817
+ archived_urls: List[str] = []
818
+
819
+ for i, task in enumerate(tasks, 1):
820
+ if len(archived_urls) >= limit:
821
+ print(f"\n✅ Reached archive limit of {limit}")
822
+ break
823
+
824
+ print(f"\n📝 Task {i}/{len(tasks)}:")
825
+
826
+ try:
827
+ raw_text = task.get("text", "") if isinstance(task, dict) else ""
828
+ task_text = (raw_text or "").strip()
829
+ preview = task_text[:100] + "..." if len(task_text) > 100 else (task_text or "(no text)")
830
+ print(f" {preview}")
831
+ except Exception as text_error:
832
+ print(f" (Could not extract task text: {text_error})")
833
+
834
+ url = await self.archive_completed_task(task)
835
+ if url:
836
+ archived_urls.append(url)
837
+
838
+ await asyncio.sleep(1)
839
+
840
+ print(f"\n✅ Archived {len(archived_urls)}/{len(tasks)} task(s)")
841
+ logger.info(f"Archived {len(archived_urls)} tasks")
842
+ return archived_urls
843
+
844
+ async def process_all_github_mentions(self):
845
+ """Find all GitHub mention tasks and update their PRs."""
846
+ tasks = await self.find_github_mention_tasks()
847
+
848
+ if not tasks:
849
+ print("\n🎯 No GitHub mention tasks to process")
850
+ logger.info("No tasks found to process")
851
+ return 0
852
+
853
+ print(f"\n🎯 Processing {len(tasks)} task(s)...")
854
+ success_count = 0
855
+
856
+ for i, task in enumerate(tasks, 1):
857
+ print(f"\n📝 Task {i}/{len(tasks)}:")
858
+
859
+ try:
860
+ raw_text = task.get("text", "") if isinstance(task, dict) else ""
861
+ task_text = (raw_text or "").strip()
862
+ preview = task_text[:100] + "..." if len(task_text) > 100 else (task_text or "(no text)")
863
+ print(f" {preview}")
864
+ except Exception as text_error:
865
+ print(f" (Could not extract task text: {text_error})")
866
+
867
+ if await self.update_pr_for_task(task):
868
+ success_count += 1
869
+
870
+ await asyncio.sleep(1)
871
+
872
+ print(f"\n✅ Successfully updated {success_count}/{len(tasks)} task(s)")
873
+ logger.info(f"Successfully updated {success_count}/{len(tasks)} tasks")
874
+ return success_count
875
+
876
+ async def run(self):
877
+ """Main automation workflow."""
878
+ print("🤖 OpenAI Codex GitHub Mentions Automation")
879
+ print("=" * 60)
880
+ logger.info("Starting Codex automation workflow")
881
+
882
+ try:
883
+ # Step 1: Setup browser (connect or launch)
884
+ await self.setup()
885
+
886
+ # Step 2: Ensure logged in to OpenAI (will save auth state on first login)
887
+ logged_in = await self.ensure_openai_login()
888
+ if not logged_in:
889
+ print("\n❌ Failed to verify OpenAI login; aborting automation.")
890
+ logger.error("Failed to verify OpenAI login; aborting automation.")
891
+ return False
892
+
893
+ # Step 3: Navigate to Codex if not already on tasks list
894
+ current_url = self.page.url
895
+ if "chatgpt.com/codex" in current_url and not self._is_task_detail_url(current_url):
896
+ print(f"\n✅ Already on Codex page: {current_url}")
897
+ logger.info(f"Already on Codex page: {current_url}")
898
+ await asyncio.sleep(3)
899
+ else:
900
+ await self.navigate_to_codex()
901
+
902
+ # Step 4: Process tasks
903
+ # archive_mode=True means archive ONLY (--archive flag)
904
+ # Otherwise: update first, then archive if auto_archive is enabled
905
+ if self.archive_mode:
906
+ # Archive-only mode (backward compatible with --archive flag)
907
+ archived_urls = await self.archive_completed_github_mentions(limit=self.archive_limit)
908
+ print("\n" + "=" * 60)
909
+ print(f"✅ Archive complete! Archived {len(archived_urls)} task(s)")
910
+ if archived_urls:
911
+ print("\n📋 Archived task URLs:")
912
+ for url in archived_urls:
913
+ print(f" - {url}")
914
+ logger.info(f"Archive completed - archived {len(archived_urls)} task(s)")
915
+ return True
916
+ else:
917
+ # Step 4a: Update branches
918
+ count = await self.process_all_github_mentions()
919
+ print("\n" + "=" * 60)
920
+ print(f"✅ Update complete! Processed {count} task(s)")
921
+ logger.info(f"Update completed - processed {count} task(s)")
922
+
923
+ # Step 4b: Archive completed tasks (if auto_archive enabled)
924
+ if self.auto_archive:
925
+ print("\n🗄️ Auto-archiving completed tasks...")
926
+ # Re-navigate to Codex to get fresh task list after updates
927
+ await self.navigate_to_codex()
928
+ archived_urls = await self.archive_completed_github_mentions(limit=self.archive_limit)
929
+ print(f"✅ Archived {len(archived_urls)} completed task(s)")
930
+ if archived_urls:
931
+ print("\n📋 Archived task URLs:")
932
+ for url in archived_urls:
933
+ print(f" - {url}")
934
+ logger.info(f"Auto-archive completed - archived {len(archived_urls)} task(s)")
935
+
936
+ print("\n" + "=" * 60)
937
+ if self.auto_archive:
938
+ print("✅ Automation complete! Update and auto-archive phases finished.")
939
+ logger.info("Automation completed successfully: update and auto-archive phases finished")
940
+ else:
941
+ print("✅ Automation complete! Update phase finished (auto-archive disabled).")
942
+ logger.info("Automation completed successfully: update phase finished (auto-archive disabled)")
943
+ return True
944
+
945
+ except KeyboardInterrupt:
946
+ print("\n⚠️ Automation interrupted by user")
947
+ logger.warning("Automation interrupted by user")
948
+ return False
949
+
950
+ except Exception as e:
951
+ print(f"\n❌ Automation failed: {e}")
952
+ logger.error(f"Automation failed: {e}")
953
+ traceback.print_exc()
954
+ return False
955
+
956
+ finally:
957
+ # Close context or browser depending on how it was created
958
+ if self.debug:
959
+ print("\n🐛 Debug mode: Keeping browser open for inspection")
960
+ print(" Press Ctrl+C to exit when done inspecting")
961
+ try:
962
+ await asyncio.sleep(3600) # Wait 1 hour for inspection
963
+ except KeyboardInterrupt:
964
+ print("\n🐛 Debug inspection complete")
965
+
966
+ if not self.cdp_url and not self.debug:
967
+ # Close both context and browser (we launched them both)
968
+ print("\n🔒 Closing browser (launched by automation)")
969
+ if self.context:
970
+ await self.context.close()
971
+ if self.browser:
972
+ await self.browser.close()
973
+ else:
974
+ print("\n💡 Browser left open (CDP mode or debug mode)")
975
+
976
+ await self.cleanup()
977
+
978
+ async def cleanup(self):
979
+ """Clean up Playwright client resources."""
980
+ if self.playwright:
981
+ await self.playwright.stop()
982
+ self.playwright = None
983
+
984
+ def _is_task_detail_url(self, url: str) -> bool:
985
+ return "/codex/tasks/" in url and "task_" in url
986
+
987
+
988
+ def _format_cdp_host_for_url(host: str) -> str:
989
+ if ":" in host and not (host.startswith("[") and host.endswith("]")):
990
+ return f"[{host}]"
991
+ return host
992
+
993
+
994
+ async def main():
995
+ """CLI entry point."""
996
+ parser = argparse.ArgumentParser(
997
+ description="Automate OpenAI Codex GitHub mention tasks",
998
+ formatter_class=argparse.RawDescriptionHelpFormatter,
999
+ epilog="""
1000
+ Examples:
1001
+ # Default (connects to Chrome on port 9222)
1002
+ python3 %(prog)s
1003
+
1004
+ # Custom CDP port
1005
+ python3 %(prog)s --cdp-port 9223
1006
+
1007
+ # Verbose mode
1008
+ python3 %(prog)s --verbose
1009
+ """
1010
+ )
1011
+
1012
+ parser.add_argument(
1013
+ "--cdp-port",
1014
+ type=int,
1015
+ default=9222,
1016
+ help="Chrome DevTools Protocol port (default: 9222)"
1017
+ )
1018
+
1019
+ parser.add_argument(
1020
+ "--use-existing-browser",
1021
+ action="store_true",
1022
+ help="Connect to existing Chrome (requires start_chrome_debug.sh)"
1023
+ )
1024
+
1025
+ parser.add_argument(
1026
+ "--cdp-host",
1027
+ default="127.0.0.1",
1028
+ help="CDP host (default: 127.0.0.1)"
1029
+ )
1030
+
1031
+ parser.add_argument(
1032
+ "--verbose",
1033
+ action="store_true",
1034
+ help="Enable verbose logging"
1035
+ )
1036
+
1037
+ parser.add_argument(
1038
+ "--limit",
1039
+ type=int,
1040
+ default=50,
1041
+ help="Maximum number of tasks to process (default: 50)"
1042
+ )
1043
+
1044
+ parser.add_argument(
1045
+ "--all-tasks",
1046
+ action="store_true",
1047
+ help="Process all Codex tasks (not just GitHub Mention tasks)",
1048
+ )
1049
+
1050
+ parser.add_argument(
1051
+ "--profile-dir",
1052
+ help="Chrome profile directory for persistent login (default: ~/.chrome-codex-automation)"
1053
+ )
1054
+
1055
+ parser.add_argument(
1056
+ "--debug",
1057
+ action="store_true",
1058
+ help="Debug mode: take screenshots, save HTML, keep browser open"
1059
+ )
1060
+
1061
+ parser.add_argument(
1062
+ "--archive",
1063
+ action="store_true",
1064
+ help="Archive completed tasks (tasks showing 'View PR' instead of 'Update branch')"
1065
+ )
1066
+
1067
+ parser.add_argument(
1068
+ "--archive-limit",
1069
+ type=int,
1070
+ default=5,
1071
+ help="Maximum number of tasks to archive (default: 5)"
1072
+ )
1073
+
1074
+ parser.add_argument(
1075
+ "--no-auto-archive",
1076
+ action="store_true",
1077
+ help="Disable automatic archiving after update (by default, update + archive run together)"
1078
+ )
1079
+
1080
+ args = parser.parse_args()
1081
+
1082
+ if args.verbose:
1083
+ logger.setLevel(logging.DEBUG)
1084
+ for handler in logger.handlers:
1085
+ handler.setLevel(logging.DEBUG)
1086
+
1087
+ # Build CDP URL only if using existing browser
1088
+ cdp_host = _format_cdp_host_for_url(args.cdp_host)
1089
+ cdp_url = f"http://{cdp_host}:{args.cdp_port}" if args.use_existing_browser else None
1090
+
1091
+ # Run automation
1092
+ automation = CodexGitHubMentionsAutomation(
1093
+ cdp_url=cdp_url,
1094
+ task_limit=args.limit,
1095
+ user_data_dir=args.profile_dir,
1096
+ debug=args.debug,
1097
+ all_tasks=args.all_tasks,
1098
+ archive_mode=args.archive,
1099
+ archive_limit=args.archive_limit,
1100
+ auto_archive=not args.no_auto_archive,
1101
+ )
1102
+
1103
+ try:
1104
+ success = await automation.run()
1105
+ sys.exit(0 if success else 1)
1106
+ finally:
1107
+ await automation.cleanup()
1108
+
1109
+
1110
+ if __name__ == "__main__":
1111
+ asyncio.run(main())