@m13v/s4l 1.6.197-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1336 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +513 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,2804 @@
1
+ #!/usr/bin/env python3
2
+ """Twitter/X browser automation functions for Social Autoposter.
3
+
4
+ Replaces multi-step Claude browser MCP calls with single Python function calls.
5
+ Each function does all browser work internally and returns structured JSON.
6
+
7
+ Usage:
8
+ # Reply to a tweet (auto-likes the parent tweet after the reply lands)
9
+ python3 twitter_browser.py reply "https://x.com/user/status/123" "reply text"
10
+
11
+ # Like a tweet (standalone; same like the reply path fires automatically)
12
+ python3 twitter_browser.py like "https://x.com/user/status/123"
13
+
14
+ # Scan DM inbox for unread conversations
15
+ python3 twitter_browser.py unread-dms
16
+
17
+ # Read messages from a DM conversation
18
+ python3 twitter_browser.py read-conversation "https://x.com/i/chat/123-456"
19
+
20
+ # Send a DM message
21
+ python3 twitter_browser.py send-dm "https://x.com/i/chat/123-456" "message text"
22
+
23
+ Requires: pip install playwright && playwright install chromium
24
+
25
+ Connects to the running twitter-harness MCP browser via CDP (Chrome DevTools
26
+ Protocol, http://127.0.0.1:9555 by default; override via TWITTER_CDP_URL env
27
+ var set by skill/lib/twitter-backend.sh) to reuse the existing logged-in
28
+ session on the browser-harness profile.
29
+ """
30
+
31
+ import atexit
32
+ import json
33
+ import os
34
+ import random
35
+ import re
36
+ import signal
37
+ import subprocess
38
+ import sys
39
+ import time
40
+
41
+
42
+ LOCK_FILE = os.path.expanduser("~/.claude/twitter-browser-lock.json")
43
+ LOCK_EXPIRY = 300 # process-level mutex TTL; refreshed during long ops
44
+ # Posting-specific silence ceiling, DECOUPLED from the fleet-wide LOCK_EXPIRY.
45
+ # A role:"post" holder (an approved batch, or a single reply) is reclaimed by a
46
+ # peer once its lock has gone unrefreshed this long; a role:"scan" holder keeps
47
+ # the 300s LOCK_EXPIRY untouched. Posting refreshes the lock at every candidate
48
+ # boundary (twitter_post_plan holds it across the whole batch), so a healthy
49
+ # poster never goes silent this long -- only a genuinely hung poster (e.g.
50
+ # link_tail's `claude -p` wedged) trips it. Kept as its own knob so tuning the
51
+ # scan TTL never moves the poster's hang ceiling and vice-versa. Must exceed the
52
+ # worst-case single candidate step (one reply + the link_tail AI call), and stay
53
+ # well under any value that would let a hung poster block the browser for long.
54
+ POST_LOCK_EXPIRY = 180 # seconds; applies ONLY to a role:"post" holder
55
+ LOCK_WAIT_MAX = 45 # seconds to wait for lock to free before giving up
56
+ LOCK_POLL_INTERVAL = 2
57
+ PREEMPT_KILL_WAIT = 5 # secs to wait for a preempted scan holder to die before SIGKILL
58
+
59
+ # Lock role priority. A "post" holder is user-initiated (an approved reply) and
60
+ # outranks any "scan" holder (the scan/draft cycle, autopilot or plugin). When a
61
+ # poster finds a LIVE lower-priority holder it PREEMPTS it (SIGTERM + reclaim)
62
+ # instead of waiting LOCK_WAIT_MAX and giving up. This is what makes "posting
63
+ # takes priority over scanning" hold CROSS-PROCESS: the old in-process
64
+ # preemptScanForPost only killed the plugin's own scan, never a scan spawned by a
65
+ # separate autopilot agent / launchd cron, so an approved post kept losing the
66
+ # 45s race to a live scan that held the browser. Default "scan" so any unmarked
67
+ # browser op is preemptable; only the poster path sets S4L_LOCK_ROLE=post.
68
+ LOCK_ROLE = (os.environ.get("S4L_LOCK_ROLE") or "scan").strip() or "scan"
69
+ VIEWPORT = {"width": 911, "height": 1016}
70
+
71
+ # Posting handle. Resolved at call time from AUTOPOSTER_TWITTER_HANDLE env
72
+ # var (set by per-account launchd/systemd units) or config.json
73
+ # accounts.twitter.handle. Returns None when neither source is set.
74
+ #
75
+ # There is intentionally NO hardcoded fallback handle. The old "m13v_"
76
+ # default meant any install with an unset handle silently posted under the
77
+ # repo owner's identity: it stamped posts.our_account = m13v_ and built reply
78
+ # permalinks as x.com/m13v_/status/<id> for tweets that actually belonged to a
79
+ # different account, corrupting attribution in the shared DB. Callers that
80
+ # build a URL or post under this identity MUST treat None as "account not
81
+ # configured" and refuse, rather than impersonate someone.
82
+ def our_handle():
83
+ try:
84
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
85
+ import account_resolver
86
+ return account_resolver.resolve("twitter")
87
+ except Exception:
88
+ return None
89
+
90
+ # DM encryption passcode from .env
91
+ DM_PASSCODE = os.environ.get("TWITTER_DM_PASSCODE", "")
92
+ if not DM_PASSCODE:
93
+ env_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), ".env")
94
+ if os.path.exists(env_path):
95
+ with open(env_path) as f:
96
+ for line in f:
97
+ if line.startswith("TWITTER_DM_PASSCODE="):
98
+ DM_PASSCODE = line.strip().split("=", 1)[1]
99
+ break
100
+
101
+
102
+ def _load_active_twitter_campaigns():
103
+ """Best-effort loader for active Twitter campaigns with literal suffixes.
104
+
105
+ Returns [(id, suffix, sample_rate), ...]. On any failure (no API, no
106
+ creds, network glitch) returns []. This keeps twitter_browser.py usable
107
+ in non-DB contexts (e.g. ad-hoc invocations from a shell). Mirrors the
108
+ `_load_active_reddit_campaigns_for_dm` helper in reddit_browser.py.
109
+
110
+ Migrated 2026-05-18: was a direct psycopg2 SELECT; now hits
111
+ /api/v1/campaigns?platform=twitter&has_suffix=true&with_budget_remaining=true&status=active
112
+ via scripts/http_api.py. Same WHERE clause runs server-side.
113
+ """
114
+ try:
115
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
116
+ from http_api import api_get
117
+ resp = api_get(
118
+ "/api/v1/campaigns",
119
+ query={
120
+ "status": "active",
121
+ "platform": "twitter",
122
+ "has_suffix": "true",
123
+ "with_budget_remaining": "true",
124
+ "limit": 50,
125
+ },
126
+ )
127
+ rows = (resp.get("data") or {}).get("campaigns") or []
128
+ out = []
129
+ for r in rows:
130
+ suffix = r.get("suffix")
131
+ if not suffix:
132
+ continue
133
+ sample_rate = r.get("sample_rate")
134
+ try:
135
+ sample_rate = float(sample_rate if sample_rate is not None else 1.0)
136
+ except (TypeError, ValueError):
137
+ sample_rate = 1.0
138
+ out.append((r.get("id"), suffix, sample_rate))
139
+ return out
140
+ except Exception as e:
141
+ print(f"[twitter_browser] _load_active_twitter_campaigns failed: {e}",
142
+ file=sys.stderr)
143
+ return []
144
+
145
+
146
+ def _log_twitter_dm_outbound(dm_id, content, minted_codes=None):
147
+ """After a verified send, log via dm_conversation.py log-outbound so the
148
+ suffix-detection path attributes the message to the active campaign and
149
+ advances the counter. `minted_codes` is the list of dm_links codes minted
150
+ for the URLs in this message; passed via env so log-outbound can backfill
151
+ dm_links.message_id after RETURNING id. Best-effort; failures are non-fatal."""
152
+ if not dm_id:
153
+ return False
154
+ try:
155
+ env = os.environ.copy()
156
+ if minted_codes:
157
+ env["WRAP_MINTED_CODES"] = ",".join(minted_codes)
158
+ subprocess.run(
159
+ ["python3",
160
+ os.path.join(os.path.dirname(os.path.abspath(__file__)),
161
+ "dm_conversation.py"),
162
+ "log-outbound", "--dm-id", str(dm_id),
163
+ "--content", content, "--verified"],
164
+ capture_output=True, text=True, timeout=20, env=env,
165
+ )
166
+ return True
167
+ except Exception as e:
168
+ print(f"[twitter_browser] internal log-outbound failed: {e}",
169
+ file=sys.stderr)
170
+ return False
171
+
172
+
173
+ def find_twitter_cdp_port():
174
+ """Find the CDP port of the running twitter-harness Chrome.
175
+
176
+ Scans all chrome/chromium processes for --remote-debugging-port=NNNN and
177
+ returns the first port whose /json index lists at least one x.com or
178
+ twitter.com tab (preferring logged-in tabs over login pages). Used only
179
+ as a fallback when TWITTER_CDP_URL isn't exported by the caller.
180
+ """
181
+ try:
182
+ ps_out = subprocess.check_output(
183
+ ["ps", "aux"], text=True, stderr=subprocess.DEVNULL
184
+ )
185
+ ports = set()
186
+ for line in ps_out.splitlines():
187
+ if "chromium" not in line.lower() and "chrome" not in line.lower():
188
+ continue
189
+ m = re.search(r"remote-debugging-port=(\d+)", line)
190
+ if m:
191
+ ports.add(int(m.group(1)))
192
+
193
+ import urllib.request
194
+
195
+ best_port = None
196
+ for port in sorted(ports):
197
+ try:
198
+ resp = urllib.request.urlopen(
199
+ f"http://localhost:{port}/json", timeout=2
200
+ )
201
+ pages = json.loads(resp.read())
202
+ twitter_urls = [
203
+ p.get("url", "")
204
+ for p in pages
205
+ if "x.com" in p.get("url", "") or "twitter.com" in p.get("url", "")
206
+ ]
207
+ if not twitter_urls:
208
+ continue
209
+ # Prefer ports with logged-in pages (home, chat, notifications)
210
+ logged_in = any(
211
+ ("home" in u or "chat" in u or "notifications" in u or "status" in u)
212
+ and "login" not in u
213
+ for u in twitter_urls
214
+ )
215
+ if logged_in:
216
+ return port
217
+ if best_port is None:
218
+ best_port = port
219
+ except Exception:
220
+ continue
221
+ return best_port
222
+ except Exception:
223
+ pass
224
+ return None
225
+
226
+
227
+ _LOCK_SESSION_ID = f"python:{os.getpid()}"
228
+ _LOCK_INHERITED = False
229
+ _UUID_RE = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$")
230
+
231
+
232
+ def _release_browser_lock():
233
+ """Release the lock if we hold it.
234
+
235
+ If we inherited the lock from a Claude session (UUID holder), leave it for
236
+ the hook/session-end handler to release — don't clobber the parent's lock.
237
+ """
238
+ if _LOCK_INHERITED:
239
+ return
240
+ try:
241
+ if os.path.exists(LOCK_FILE):
242
+ with open(LOCK_FILE) as f:
243
+ lock = json.load(f)
244
+ if lock.get("session_id") == _LOCK_SESSION_ID:
245
+ os.remove(LOCK_FILE)
246
+ except (json.JSONDecodeError, OSError):
247
+ pass
248
+
249
+
250
+ atexit.register(_release_browser_lock)
251
+
252
+
253
+ def _is_holder_alive(holder: str) -> bool:
254
+ """Check whether a Claude session UUID lock holder is still running.
255
+
256
+ A live Claude session puts its UUID on the cmdline as
257
+ `claude --session-id <UUID>`. pgrep matches it; absence means the
258
+ holder is dead and the lock is stale, even if its JSONL transcript
259
+ is still tail-flushing. Legacy semantics from the retired
260
+ twitter-agent-lock.sh PreToolUse hook; only python:PID holders are
261
+ written to the lock file today, so this code path is dormant unless
262
+ a Claude session still inherits an in-flight UUID lock.
263
+ """
264
+ if not holder:
265
+ return False
266
+ try:
267
+ return (
268
+ subprocess.run(
269
+ ["pgrep", "-f", f"claude.*--session-id {holder}"],
270
+ stdout=subprocess.DEVNULL,
271
+ stderr=subprocess.DEVNULL,
272
+ timeout=2,
273
+ ).returncode
274
+ == 0
275
+ )
276
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
277
+ return True # err on the side of NOT stealing
278
+
279
+
280
+ def _is_python_holder_alive(holder: str) -> bool:
281
+ """Liveness probe for a `python:PID` lock holder.
282
+
283
+ Holders written today are `python:<pid>` (see _LOCK_SESSION_ID). Before this
284
+ check existed (defect a, 2026-06-16), a holder whose process died WITHOUT
285
+ running its atexit _release_browser_lock (SIGKILL, OOM, watchdog SIGTERM,
286
+ hard hang) left the lockfile behind, and _acquire_browser_lock had no way to
287
+ tell it was dead -- so every peer waited the full LOCK_WAIT_MAX and gave up,
288
+ and the lock only cleared after LOCK_EXPIRY (300s). os.kill(pid, 0) sends no
289
+ signal; it just probes existence. Returns True (treat as held, do NOT steal)
290
+ for anything we cannot prove dead, so the worst case degrades to the old
291
+ LOCK_EXPIRY failsafe rather than stealing a live peer's lock.
292
+ """
293
+ if not holder.startswith("python:"):
294
+ return True # not a python holder; this probe makes no claim
295
+ try:
296
+ pid = int(holder.split(":", 1)[1])
297
+ except (ValueError, IndexError):
298
+ return True # unparseable holder -> don't steal on this basis
299
+ try:
300
+ os.kill(pid, 0)
301
+ return True # process exists -> alive
302
+ except ProcessLookupError:
303
+ return False # no such process -> dead, reclaimable
304
+ except PermissionError:
305
+ return True # exists but another owner -> alive
306
+ except OSError:
307
+ return True # ambiguous -> err toward NOT stealing
308
+
309
+
310
+ def _try_take_lock() -> bool:
311
+ """Atomically claim LOCK_FILE for this process. Returns True iff we created
312
+ it. O_CREAT|O_EXCL makes "is it free? then take it" a single syscall, so two
313
+ cold-start acquirers can't both win the way the old os.path.exists +
314
+ open(w) check-then-act allowed (defect c, 2026-06-16). A False return means a
315
+ peer beat us to it; the caller re-loops and re-evaluates the holder.
316
+ """
317
+ try:
318
+ fd = os.open(LOCK_FILE, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
319
+ except FileExistsError:
320
+ return False
321
+ except OSError:
322
+ return False
323
+ try:
324
+ os.write(fd, json.dumps(
325
+ {"session_id": _LOCK_SESSION_ID, "timestamp": int(time.time()), "role": LOCK_ROLE}
326
+ ).encode())
327
+ finally:
328
+ os.close(fd)
329
+ return True
330
+
331
+
332
+ def _preempt_holder(pid: int) -> bool:
333
+ """Preempt a live lock holder we outrank (a poster taking the browser from a
334
+ scan). SIGTERM it, wait PREEMPT_KILL_WAIT for it to die so its pid frees the
335
+ lock, then escalate to SIGKILL once. Returns True once the holder is gone
336
+ (or was already gone). Best-effort; never raises. The caller then removes the
337
+ stale lockfile and claims it via O_EXCL.
338
+ """
339
+ try:
340
+ os.kill(pid, signal.SIGTERM)
341
+ except ProcessLookupError:
342
+ return True # already gone
343
+ except OSError:
344
+ return False # not ours to signal / ambiguous -> don't claim
345
+ deadline = time.time() + PREEMPT_KILL_WAIT
346
+ while time.time() < deadline:
347
+ try:
348
+ os.kill(pid, 0)
349
+ except OSError:
350
+ return True # ProcessLookupError or perm change -> dead enough
351
+ time.sleep(0.2)
352
+ # Still alive after the SIGTERM grace window -> escalate once.
353
+ try:
354
+ os.kill(pid, signal.SIGKILL)
355
+ except OSError:
356
+ pass
357
+ try:
358
+ os.kill(pid, 0)
359
+ except OSError:
360
+ return True
361
+ return False
362
+
363
+
364
+ def _acquire_browser_lock():
365
+ """Acquire the Twitter browser session mutex (~/.claude/twitter-browser-lock.json).
366
+
367
+ This file-mutex is the UNIVERSAL serializer for every twitter_browser.py
368
+ browser op (all of them route through get_browser_and_page below). The shell
369
+ FIFO lock in skill/lock.sh only serializes the pipelines that bother to take
370
+ it; this one catches everything, including cross-pipeline handoff races and
371
+ MCP-driven posts.
372
+
373
+ Holders today are python:PID. UUID-style holders are a legacy artifact of the
374
+ retired PreToolUse hook (twitter-agent-lock.sh); a live UUID holder is a
375
+ parent Claude session still in flight, so we INHERIT rather than fight it.
376
+
377
+ Reclaim priority (a holder we can PROVE is dead is taken immediately, so a
378
+ crashed peer can never starve the fleet for LOCK_WAIT_MAX/LOCK_EXPIRY):
379
+ 1. holder == us -> re-entrant; we already hold it.
380
+ 2. UUID holder, pid gone -> stale legacy lock, reclaim.
381
+ 3. python:PID, pid gone -> dead peer (defect a fix), reclaim.
382
+ 4. age >= LOCK_EXPIRY -> failsafe for holders we cannot probe.
383
+ 5. live UUID holder -> inherit (parent session).
384
+ 6. live python:PID holder -> real peer; wait, then give up after
385
+ LOCK_WAIT_MAX with a structured error.
386
+
387
+ Acquisition itself is atomic (_try_take_lock / O_EXCL), so the moment we
388
+ decide the lock is free, no concurrent acquirer can also claim it.
389
+
390
+ NOTE for future maintainers: do NOT "simplify" this by having the shell
391
+ pipelines `rm -f` the lockfile around release_lock. That blind rm deleted
392
+ LIVE peers' locks (defect b) and was removed 2026-06-16. Dead holders are
393
+ reclaimed here instead. See docs/twitter_browser_lock.md.
394
+ """
395
+ global _LOCK_SESSION_ID, _LOCK_INHERITED
396
+ deadline = time.time() + LOCK_WAIT_MAX
397
+ # Guarantee the lock dir exists so _try_take_lock's O_EXCL create can't fail
398
+ # for a missing-parent reason (which would otherwise spin the no-file path).
399
+ try:
400
+ os.makedirs(os.path.dirname(LOCK_FILE), exist_ok=True)
401
+ except OSError:
402
+ pass
403
+ while True:
404
+ if not os.path.exists(LOCK_FILE):
405
+ if _try_take_lock():
406
+ break
407
+ # Lost the create race to a peer (or a persistent create failure).
408
+ # Bound by `deadline` so this path can never spin forever.
409
+ if time.time() >= deadline:
410
+ print(json.dumps({
411
+ "success": False,
412
+ "error": f"Twitter browser lock contended on create; waited {LOCK_WAIT_MAX}s, giving up."
413
+ }))
414
+ sys.exit(1)
415
+ time.sleep(LOCK_POLL_INTERVAL)
416
+ continue
417
+ try:
418
+ with open(LOCK_FILE) as f:
419
+ lock = json.load(f)
420
+ except (json.JSONDecodeError, OSError):
421
+ # Corrupt / half-written / vanished between exists() and open().
422
+ # Try to claim atomically; if a peer holds a valid lock our O_EXCL
423
+ # create fails and we re-loop. Bounded by `deadline` so a persistently
424
+ # unreadable lockfile gives up instead of hanging the pipeline.
425
+ if _try_take_lock():
426
+ break
427
+ if time.time() >= deadline:
428
+ print(json.dumps({
429
+ "success": False,
430
+ "error": f"Twitter browser lock unreadable; waited {LOCK_WAIT_MAX}s, giving up."
431
+ }))
432
+ sys.exit(1)
433
+ time.sleep(LOCK_POLL_INTERVAL)
434
+ continue
435
+ age = time.time() - lock.get("timestamp", 0)
436
+ holder = lock.get("session_id", "")
437
+ holder_role = lock.get("role", "scan") # legacy locks (no role) = preemptable
438
+
439
+ # 1. Re-entrant: the lock is already ours (same process, or a stale lock
440
+ # left by a previous process whose PID we have since reused). Refresh the
441
+ # timestamp so a peer's LOCK_EXPIRY failsafe can't reclaim it under us.
442
+ if holder == _LOCK_SESSION_ID and not _LOCK_INHERITED:
443
+ _refresh_browser_lock()
444
+ break
445
+
446
+ # 1b. Batch-owner inherit (posting). The poster (twitter_post_plan.py)
447
+ # acquires this lock ONCE and holds it across the WHOLE approved batch,
448
+ # exporting its own session id as S4L_LOCK_OWNER for the child
449
+ # twitter_browser.py reply subprocesses it spawns. Each child INHERITS the
450
+ # parent's hold instead of contending for it -- two role:"post" peers would
451
+ # otherwise both fall to the case-6 peer-wait and give up after
452
+ # LOCK_WAIT_MAX, breaking the post. The child refreshes the timestamp
453
+ # (proof of progress at this candidate boundary, so the POST_LOCK_EXPIRY
454
+ # failsafe only ever fires on a real hang) and, being _LOCK_INHERITED,
455
+ # leaves the lock in place for the PARENT to release at batch end. A DEAD
456
+ # owner is never inherited: the alive-probe fails here and we fall through
457
+ # to the dead_python reclaim below, so a crashed batch can't wedge the
458
+ # browser. This is what closes the inter-candidate gap (the link_tail
459
+ # claude -p call, ~5-20s) the every-60s autopilot scan used to slip into.
460
+ _batch_owner = os.environ.get("S4L_LOCK_OWNER") or ""
461
+ if holder and holder == _batch_owner and _is_python_holder_alive(holder):
462
+ _LOCK_SESSION_ID = holder
463
+ _LOCK_INHERITED = True
464
+ _refresh_browser_lock()
465
+ print(f"[browser_lock] inherited batch owner={holder} "
466
+ f"role={holder_role} -> pid={os.getpid()}", file=sys.stderr)
467
+ break
468
+
469
+ # 2-4. Reclaim a holder we can prove is dead/expired. Remove-then-take so
470
+ # the O_EXCL claim wins; if a peer reclaims at the same instant exactly
471
+ # one of us creates the file and the other re-loops (never both).
472
+ reclaim_reason = ""
473
+ if _UUID_RE.match(holder or "") and not _is_holder_alive(holder):
474
+ reclaim_reason = "dead_uuid"
475
+ elif holder.startswith("python:") and not _is_python_holder_alive(holder):
476
+ reclaim_reason = "dead_python"
477
+ elif age >= (POST_LOCK_EXPIRY if holder_role == "post" else LOCK_EXPIRY):
478
+ # Role-aware failsafe: a hung poster self-clears on the posting-only
479
+ # POST_LOCK_EXPIRY, a scan on the fleet-wide LOCK_EXPIRY. Scan
480
+ # behaviour is unchanged; only the post ceiling is decoupled.
481
+ reclaim_reason = "expired"
482
+ if reclaim_reason:
483
+ try:
484
+ os.remove(LOCK_FILE)
485
+ except OSError:
486
+ pass
487
+ if _try_take_lock():
488
+ # Verifiable signal that defect-a starvation was prevented.
489
+ print(f"[browser_lock] reclaimed holder={holder or '<none>'} "
490
+ f"reason={reclaim_reason} age={int(age)}s -> pid={os.getpid()}",
491
+ file=sys.stderr)
492
+ break
493
+ time.sleep(LOCK_POLL_INTERVAL)
494
+ continue
495
+
496
+ # 5. Live UUID holder = parent Claude session still in flight -> inherit.
497
+ if _UUID_RE.match(holder or ""):
498
+ _LOCK_SESSION_ID = holder
499
+ _LOCK_INHERITED = True
500
+ break
501
+
502
+ # 5b. POSTING PRIORITY (cross-process). A LIVE python:PID peer running a
503
+ # lower-priority op (role != "post": the scan/draft cycle, whether the
504
+ # plugin's own, a separate autopilot agent's, or the launchd cron's) must
505
+ # YIELD to an approved post. Preempt it by signal and reclaim, so the post
506
+ # takes the browser at once instead of waiting LOCK_WAIT_MAX and giving up
507
+ # while the scan holds it. The aborted scan just re-runs next cron tick;
508
+ # posting is the scarce, user-initiated action. Only a poster
509
+ # (LOCK_ROLE == "post") ever preempts, and only a non-post holder -- two
510
+ # posters fall through to the normal peer-wait below so neither kills the
511
+ # other. UUID holders are handled above (we inherit, never kill those).
512
+ if (
513
+ LOCK_ROLE == "post"
514
+ and holder.startswith("python:")
515
+ and holder_role != "post"
516
+ and _is_python_holder_alive(holder)
517
+ ):
518
+ try:
519
+ victim_pid = int(holder.split(":", 1)[1])
520
+ except (ValueError, IndexError):
521
+ victim_pid = 0
522
+ if victim_pid and _preempt_holder(victim_pid):
523
+ try:
524
+ os.remove(LOCK_FILE)
525
+ except OSError:
526
+ pass
527
+ if _try_take_lock():
528
+ print(
529
+ f"[browser_lock] post preempted holder={holder} "
530
+ f"role={holder_role} age={int(age)}s -> pid={os.getpid()}",
531
+ file=sys.stderr,
532
+ )
533
+ break
534
+ # Preempt didn't land (couldn't kill, or a peer reclaimed first) ->
535
+ # re-loop and re-evaluate rather than busy-spin.
536
+ time.sleep(LOCK_POLL_INTERVAL)
537
+ continue
538
+
539
+ # 6. Live python:PID peer. Wait, then give up. Reaching the deadline now
540
+ # means the holder is a genuinely LIVE peer (dead ones were reclaimed
541
+ # above), i.e. real contention -- NOT the defect-a starvation. The
542
+ # "locked by session" substring is preserved for downstream parsers.
543
+ if time.time() >= deadline:
544
+ print(json.dumps({
545
+ "success": False,
546
+ "error": f"Twitter browser locked by session {holder} ({int(age)}s, peer alive); waited {LOCK_WAIT_MAX}s, giving up."
547
+ }))
548
+ sys.exit(1)
549
+ time.sleep(LOCK_POLL_INTERVAL)
550
+ continue
551
+
552
+
553
+ def _refresh_browser_lock():
554
+ """Refresh the lock timestamp to prevent expiry during long operations."""
555
+ try:
556
+ with open(LOCK_FILE, "w") as f:
557
+ json.dump({"session_id": _LOCK_SESSION_ID, "timestamp": int(time.time()), "role": LOCK_ROLE}, f)
558
+ except OSError:
559
+ pass
560
+
561
+
562
+ def get_browser_and_page(playwright):
563
+ """Connect to the running twitter-harness Chrome via CDP.
564
+
565
+ Returns (browser, page, is_cdp=True). `page` is a reused existing Twitter
566
+ tab when one is open; otherwise a freshly created page on the same
567
+ browser-harness context. Caller should navigate it, not close it.
568
+
569
+ Connection order:
570
+ 1. TWITTER_CDP_URL env (set by lib/twitter-backend.sh) — direct attach.
571
+ 2. find_twitter_cdp_port() — ps-based discovery of any Chrome serving
572
+ x.com/twitter.com (fallback when env not exported by the caller).
573
+
574
+ Both paths target the browser-harness Chrome since the legacy twitter-agent
575
+ profile + MCP wrapper were retired on 2026-05-19. There is no
576
+ launch_persistent_context fallback: if neither CDP attach succeeds the
577
+ caller (skill/lib/twitter-backend.sh:ensure_twitter_browser_for_backend)
578
+ is responsible for booting the harness Chrome first.
579
+ """
580
+ _acquire_browser_lock()
581
+
582
+ cdp_url_override = os.environ.get("TWITTER_CDP_URL", "").strip()
583
+ if cdp_url_override:
584
+ try:
585
+ browser = playwright.chromium.connect_over_cdp(cdp_url_override)
586
+ contexts = browser.contexts
587
+ if contexts:
588
+ context = contexts[0]
589
+ # Prefer a reusable Twitter tab if one exists.
590
+ for pg in context.pages:
591
+ if ("x.com" in pg.url or "twitter.com" in pg.url) and "login" not in pg.url:
592
+ return browser, pg, True
593
+ # Otherwise reuse the first page (caller will navigate it).
594
+ if context.pages:
595
+ return browser, context.pages[0], True
596
+ return browser, context.new_page(), True
597
+ # No contexts present (unusual on a fresh harness Chrome) — create one.
598
+ context = browser.new_context()
599
+ return browser, context.new_page(), True
600
+ except Exception as e:
601
+ _release_browser_lock()
602
+ print(json.dumps({
603
+ "success": False,
604
+ "error": f"TWITTER_CDP_URL connect failed ({cdp_url_override}): {e}"
605
+ }))
606
+ sys.exit(1)
607
+
608
+ cdp_port = find_twitter_cdp_port()
609
+
610
+ if cdp_port:
611
+ try:
612
+ browser = playwright.chromium.connect_over_cdp(
613
+ f"http://localhost:{cdp_port}"
614
+ )
615
+ contexts = browser.contexts
616
+ if contexts:
617
+ context = contexts[0]
618
+ for pg in context.pages:
619
+ if ("x.com" in pg.url or "twitter.com" in pg.url) and "login" not in pg.url:
620
+ return browser, pg, True
621
+ if context.pages:
622
+ return browser, context.pages[0], True
623
+ return browser, context.new_page(), True
624
+ except Exception as e:
625
+ _release_browser_lock()
626
+ print(json.dumps({
627
+ "success": False,
628
+ "error": f"harness CDP attach failed (port {cdp_port}): {e}"
629
+ }))
630
+ sys.exit(1)
631
+
632
+ _release_browser_lock()
633
+ print(json.dumps({
634
+ "success": False,
635
+ "error": (
636
+ "No twitter-harness Chrome reachable. Set TWITTER_CDP_URL or boot "
637
+ "harness Chrome via skill/lib/twitter-backend.sh:ensure_twitter_"
638
+ "browser_for_backend before invoking twitter_browser.py."
639
+ )
640
+ }))
641
+ sys.exit(1)
642
+
643
+
644
+ def _handle_dm_passcode(page):
645
+ """Handle the DM encryption passcode dialog if it appears.
646
+
647
+ Twitter/X requires a 4-digit passcode to decrypt DMs.
648
+ Returns True if passcode was entered, False if not needed.
649
+ """
650
+ if "pin/recovery" not in page.url:
651
+ return False
652
+
653
+ if not DM_PASSCODE:
654
+ print("Warning: DM passcode required but TWITTER_DM_PASSCODE not set", file=sys.stderr)
655
+ return False
656
+
657
+ try:
658
+ digits = list(DM_PASSCODE)
659
+ # Find the 4 passcode input boxes
660
+ inputs = page.locator('input')
661
+ count = inputs.count()
662
+ for i in range(min(len(digits), count)):
663
+ inp = inputs.nth(i)
664
+ inp.click()
665
+ page.keyboard.type(digits[i])
666
+ page.wait_for_timeout(300)
667
+
668
+ page.wait_for_timeout(3000)
669
+ return "pin/recovery" not in page.url
670
+ except Exception as e:
671
+ print(f"Warning: Failed to enter DM passcode: {e}", file=sys.stderr)
672
+ return False
673
+
674
+
675
+
676
+ def _install_rate_limit_listener(page):
677
+ """Count 429 responses on x.com DM API endpoints.
678
+
679
+ X throttles the account (not per-tab) after too many /i/chat navigations
680
+ and GetInboxPageRequestQuery hits in a window. Returns a mutable counter
681
+ dict; caller reads counter['429'] after the page settles.
682
+ """
683
+ counter = {"429": 0, "first_429_url": None}
684
+
685
+ def on_response(resp):
686
+ try:
687
+ if resp.status != 429:
688
+ return
689
+ url = resp.url
690
+ if "api.x.com" not in url and "x.com/i/api" not in url:
691
+ return
692
+ counter["429"] += 1
693
+ if counter["first_429_url"] is None:
694
+ counter["first_429_url"] = url
695
+ except Exception:
696
+ pass
697
+
698
+ page.on("response", on_response)
699
+ return counter
700
+
701
+
702
+ def _is_x_unreachable(page):
703
+ """Return (True, reason) if Chrome rendered its own error page for x.com.
704
+
705
+ Happens when x.com drops the TCP connection after sustained 429s; Chrome
706
+ shows `chrome-error://chromewebdata/` with "This site can't be reached".
707
+ Distinct from "normal" x.com errors (which still render a valid x.com DOM).
708
+ """
709
+ try:
710
+ url = page.url or ""
711
+ if url.startswith("chrome-error:"):
712
+ return True, f"chrome_error_url:{url}"
713
+ body_text = page.evaluate("() => document.body ? document.body.innerText : ''") or ""
714
+ if "ERR_FAILED" in body_text and "site can" in body_text.lower():
715
+ return True, "err_failed_body"
716
+ except Exception:
717
+ pass
718
+ return False, None
719
+
720
+
721
+ def _rate_limit_response(reason, counter=None, url=None):
722
+ """Build the JSON payload we return when X has blocked us.
723
+
724
+ Also prints a loud stderr marker so grep finds it in launchd logs.
725
+ """
726
+ payload = {
727
+ "ok": False,
728
+ "error": "rate_limited",
729
+ "reason": reason,
730
+ "rate_limit_count": counter["429"] if counter else 0,
731
+ "url": url,
732
+ "conversations": [],
733
+ }
734
+ print(
735
+ f"RATE_LIMITED_TWITTER: reason={reason} "
736
+ f"429s={payload['rate_limit_count']} url={url}",
737
+ file=sys.stderr,
738
+ )
739
+ return payload
740
+
741
+
742
+ def _collect_our_reply_links(page):
743
+ """Collect all /<our_handle>/status/ links currently in the DOM."""
744
+ handle = our_handle()
745
+ return set(page.evaluate(f"""() => {{
746
+ const links = new Set();
747
+ document.querySelectorAll('a[href*="/{handle}/status/"]').forEach(a => {{
748
+ const href = a.getAttribute('href');
749
+ if (href && /\\/{handle}\\/status\\/\\d+$/.test(href))
750
+ links.add(href);
751
+ }});
752
+ return [...links];
753
+ }}"""))
754
+
755
+
756
+ def _wait_for_reply_textbox(page, total_timeout_ms=45000):
757
+ """Wait for the reply composer textbox to mount. Returns a locator or None.
758
+
759
+ Polls multiple selectors because the React composer sometimes attaches late
760
+ on slow egress (E2B sandbox) and the aria-label has historically varied
761
+ ("Post text" / "Tweet your reply" / "Post your reply"). The data-testid
762
+ `tweetTextarea_0` has been stable for years and is the primary signal.
763
+ """
764
+ import time as _t
765
+ selectors = (
766
+ '[data-testid="tweetTextarea_0"]',
767
+ '[role="textbox"][aria-label="Post text"]',
768
+ '[role="textbox"][aria-label="Tweet your reply"]',
769
+ '[role="textbox"][aria-label="Post your reply"]',
770
+ )
771
+ deadline = _t.monotonic() + (total_timeout_ms / 1000.0)
772
+ while _t.monotonic() < deadline:
773
+ for sel in selectors:
774
+ try:
775
+ loc = page.locator(sel).first
776
+ if loc.count() > 0 and loc.is_visible():
777
+ return loc
778
+ except Exception:
779
+ pass
780
+ page.wait_for_timeout(500)
781
+ return None
782
+
783
+
784
+ # Post-action interstitials X shows AFTER a successful reply (e.g. the
785
+ # "Unlock more on X" graduated-access sheet). They don't block the post that
786
+ # triggered them, but the sheet stays up on screen and would overlay the
787
+ # composer on the NEXT reply in a batch -> spurious reply_box_not_found for
788
+ # posts 2..N. We dismiss them deterministically right after each successful
789
+ # post (not before the next reply), so the sheet never lingers. Targeted by the
790
+ # sheet's CTA label so we never touch a real compose/confirm dialog (those have
791
+ # no "Got it"); best-effort, fast, never raises.
792
+ _OVERLAY_DISMISS_LABELS = ("Got it", "Dismiss")
793
+
794
+
795
+ def _dismiss_known_overlays(page) -> bool:
796
+ """Click-dismiss any known X nudge sheet currently covering the page.
797
+
798
+ Returns True if something was dismissed. Safe to call on every reply: it is
799
+ a no-op when no known overlay is present and swallows all errors."""
800
+ for label in _OVERLAY_DISMISS_LABELS:
801
+ try:
802
+ btn = page.get_by_role("button", name=label, exact=True).first
803
+ if btn.count() > 0 and btn.is_visible():
804
+ btn.click(timeout=2000)
805
+ page.wait_for_timeout(800)
806
+ print(f"[overlay] dismissed known interstitial via '{label}' button",
807
+ file=sys.stderr)
808
+ return True
809
+ except Exception:
810
+ pass
811
+ return False
812
+
813
+
814
+ def _dump_reply_failure_diag(page, tweet_url):
815
+ """Dump screenshot + DOM state on reply_box_not_found. Returns a diag dict."""
816
+ import time as _t
817
+ ts = int(_t.time())
818
+ diag = {"ts": ts, "tweet_url": tweet_url}
819
+ try:
820
+ diag["final_url"] = page.url
821
+ except Exception as _e:
822
+ diag["final_url_err"] = str(_e)
823
+ try:
824
+ png_path = f"/tmp/twitter_reply_failure_{ts}.png"
825
+ page.screenshot(path=png_path, full_page=False)
826
+ diag["screenshot"] = png_path
827
+ except Exception as _e:
828
+ diag["screenshot_err"] = str(_e)
829
+ try:
830
+ diag["dom"] = page.evaluate("""() => {
831
+ const tbs = Array.from(document.querySelectorAll('[role="textbox"]'));
832
+ const body = (document.body && document.body.innerText || '');
833
+ const tweetRendered = !!document.querySelector('article[data-testid="tweet"]');
834
+ // Reply-audience restriction: X renders one of these phrasings when the
835
+ // author limits who can reply. "Only some accounts can reply" is the
836
+ // confirmed live string; the others cover the documented variants.
837
+ const RESTRICT = /Only some accounts can reply|People who follow .{0,40} can reply|Accounts .{0,40} (follows?|mentioned) can reply|People .{0,40} mentioned can reply|Verified accounts can reply|Subscribers can reply|You can.?t reply to this/i;
838
+ const m = body.match(RESTRICT);
839
+ // The audience control aria-label ("Everyone can reply" vs a restricted label).
840
+ const audLabel = (Array.from(document.querySelectorAll('[aria-label]'))
841
+ .map(e => e.getAttribute('aria-label') || '')
842
+ .find(s => /can reply$/i.test(s)) || '');
843
+ const restrictedByAud = !!audLabel && !/everyone can reply/i.test(audLabel);
844
+ return {
845
+ title: (document.title || '').slice(0, 120),
846
+ textbox_count: tbs.length,
847
+ textbox_labels: tbs.map(t => t.getAttribute('aria-label')),
848
+ has_tweetTextarea_0: !!document.querySelector('[data-testid="tweetTextarea_0"]'),
849
+ has_login_modal: !!document.querySelector('[data-testid="loginButton"]'),
850
+ has_age_gate: !!document.querySelector('[data-testid="sensitive-media-button"]'),
851
+ tweet_rendered: tweetRendered,
852
+ reply_restricted: !!(m || restrictedByAud),
853
+ restriction_label: (m ? m[0] : (restrictedByAud ? audLabel : '')).slice(0, 80),
854
+ page_text_snippet: body.slice(0, 300),
855
+ };
856
+ }""")
857
+ except Exception as _e:
858
+ diag["dom_err"] = str(_e)
859
+ return diag
860
+
861
+
862
+ def _like_first_tweet_on_page(page):
863
+ """Like the primary (first) tweet currently rendered on the page.
864
+
865
+ Operates on an already-open page positioned on a tweet permalink (the
866
+ parent tweet is the first ``article[data-testid="tweet"]``). Used both by
867
+ the standalone ``like`` command and inline by ``reply_to_tweet()`` right
868
+ after a reply lands (the page is still on the thread).
869
+
870
+ Strictly scoped to the FIRST article so we like the parent tweet, never a
871
+ reply below it. Idempotent: if the tweet is already liked (button testid
872
+ has flipped ``like`` -> ``unlike``) we report already_liked without
873
+ clicking. Returns one of:
874
+ {"ok": True, "liked": True, "already_liked": False}
875
+ {"ok": True, "liked": False, "already_liked": True}
876
+ {"ok": False, "error": "..."}
877
+ """
878
+ try:
879
+ first_article = page.locator('article[data-testid="tweet"]').first
880
+ first_article.wait_for(state="visible", timeout=15000)
881
+
882
+ # Already liked? The action-bar button testid flips like -> unlike.
883
+ if first_article.locator('[data-testid="unlike"]').count() > 0:
884
+ print("[like] parent tweet already liked; nothing to do", file=sys.stderr)
885
+ return {"ok": True, "liked": False, "already_liked": True}
886
+
887
+ like_btn = first_article.locator('[data-testid="like"]')
888
+ if like_btn.count() == 0:
889
+ print("[like] no like button found on parent tweet", file=sys.stderr)
890
+ return {"ok": False, "error": "like_button_not_found"}
891
+
892
+ like_btn.first.click()
893
+ page.wait_for_timeout(1500)
894
+
895
+ # Verify the click registered: testid should now be 'unlike'.
896
+ if first_article.locator('[data-testid="unlike"]').count() > 0:
897
+ print("[like] parent tweet liked OK", file=sys.stderr)
898
+ return {"ok": True, "liked": True, "already_liked": False}
899
+ print("[like] clicked like but unlike state not confirmed", file=sys.stderr)
900
+ return {"ok": False, "liked": False, "error": "like_unconfirmed"}
901
+ except Exception as e:
902
+ print(f"[like] parent tweet not liked (non-fatal): {str(e).splitlines()[0]}", file=sys.stderr)
903
+ return {"ok": False, "error": str(e).splitlines()[0]}
904
+
905
+
906
+ def like_tweet(tweet_url):
907
+ """Standalone: navigate to a tweet and like it (CLI: ``like <tweet_url>``).
908
+
909
+ Connects to the running twitter-harness Chrome via CDP (the same logged-in
910
+ session the reply path uses) so the like comes from our account. Returns
911
+ the dict from ``_like_first_tweet_on_page`` with ``tweet_url`` attached.
912
+ """
913
+ print(f"[twitter_browser] like_tweet called: {tweet_url}", file=sys.stderr)
914
+ from playwright.sync_api import sync_playwright
915
+
916
+ with sync_playwright() as p:
917
+ browser, page, is_cdp = get_browser_and_page(p)
918
+ try:
919
+ try:
920
+ page.goto(tweet_url, wait_until="load", timeout=60000)
921
+ except Exception:
922
+ try:
923
+ page.goto(tweet_url, wait_until="domcontentloaded", timeout=60000)
924
+ except Exception:
925
+ pass
926
+ page.wait_for_timeout(4000)
927
+ try:
928
+ page.wait_for_selector(
929
+ 'article[data-testid="tweet"]', state="attached", timeout=20000
930
+ )
931
+ except Exception:
932
+ return {"ok": False, "error": "tweet_not_rendered", "tweet_url": tweet_url}
933
+ result = _like_first_tweet_on_page(page)
934
+ result["tweet_url"] = tweet_url
935
+ return result
936
+ finally:
937
+ if not is_cdp:
938
+ page.close()
939
+ browser.close()
940
+
941
+
942
+ def reply_to_tweet(tweet_url, text, apply_campaigns=True):
943
+ """Reply to a tweet.
944
+
945
+ Navigates to the tweet, clicks the reply box, types the reply, and submits.
946
+
947
+ Active Twitter campaigns with a `suffix` are applied at this tool layer:
948
+ the suffix is appended to `text` (per `sample_rate` coin flip per campaign)
949
+ before typing, so the literal text is guaranteed to land. Caller opts out
950
+ via `apply_campaigns=False` (used by the self-reply path so the project URL
951
+ follow-up doesn't carry the campaign tag).
952
+
953
+ Returns: {"ok": true, "tweet_url": "...", "reply_url": "...",
954
+ "applied_campaigns": [...], "final_text": "..."}
955
+ or {"ok": false, "error": "..."}
956
+ """
957
+ print(f"[twitter_browser] reply_to_tweet called: {tweet_url}", file=sys.stderr)
958
+
959
+ # Identity gate: refuse to post when no account is configured. Without a
960
+ # resolved handle we cannot attribute the post or build a correct reply
961
+ # permalink, and the old behaviour silently impersonated the repo owner
962
+ # (handle "m13v_"). Fail fast and loud so the misconfiguration surfaces
963
+ # instead of polluting the shared DB under someone else's identity.
964
+ _handle = our_handle()
965
+ if not _handle:
966
+ print("[twitter_browser] no twitter account configured "
967
+ "(set AUTOPOSTER_TWITTER_HANDLE or accounts.twitter.handle in "
968
+ "config.json); refusing to post.", file=sys.stderr)
969
+ return {"ok": False, "error": "no_account_configured"}
970
+
971
+ applied_campaigns = []
972
+ if apply_campaigns:
973
+ for cid, suffix, sample_rate in _load_active_twitter_campaigns():
974
+ if random.random() < sample_rate:
975
+ # Wrap any URLs in the suffix through dm_short_links so clicks
976
+ # attribute. The suffix carries no project_name, so we detect
977
+ # the project from the URL hostname against config.json before
978
+ # minting. Falls back to raw suffix if no project matches (e.g.
979
+ # plain-text suffix like " written with ai", or third-party URL).
980
+ wrapped_suffix = suffix
981
+ if 'http' in suffix:
982
+ try:
983
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
984
+ from dm_short_links import wrap_text_for_post, _classify_url, _load_projects, _URL_RE
985
+ projects = _load_projects()
986
+ # Detect project_name from the first URL in the suffix.
987
+ m = _URL_RE.search(suffix)
988
+ detected_project = None
989
+ if m:
990
+ _, detected_project = _classify_url(m.group(0), projects)
991
+ if detected_project:
992
+ wrap_res = wrap_text_for_post(text=suffix, platform='twitter',
993
+ project_name=detected_project)
994
+ # Use the wrapped text whenever the wrap call succeeded.
995
+ # codes=[] is now valid (UTM-only fallback path for
996
+ # projects with short_links_live=false), and the
997
+ # rewritten text still carries full s4l attribution.
998
+ # Old guard `and wrap_res.get('codes')` silently
999
+ # skipped utm_only fallbacks and let bare URLs
1000
+ # through in the suffix.
1001
+ if wrap_res.get('ok'):
1002
+ wrapped_suffix = wrap_res['text']
1003
+ tag = 'codes' if wrap_res.get('codes') else 'utm_only'
1004
+ print(f"[reply_to_tweet] suffix wrap project={detected_project} "
1005
+ f"{tag}={wrap_res.get('codes') or [s.get('reason') for s in wrap_res.get('skipped',[])]}",
1006
+ file=sys.stderr)
1007
+ except Exception as _e:
1008
+ print(f"[reply_to_tweet] suffix wrap failed ({_e}); raw",
1009
+ file=sys.stderr)
1010
+ text = text + wrapped_suffix
1011
+ applied_campaigns.append(cid)
1012
+ print(f"[reply_to_tweet] applied_campaigns={applied_campaigns} text_len={len(text)}",
1013
+ file=sys.stderr)
1014
+
1015
+ from playwright.sync_api import sync_playwright
1016
+
1017
+ with sync_playwright() as p:
1018
+ browser, page, is_cdp = get_browser_and_page(p)
1019
+
1020
+ try:
1021
+ # Set up Network interception to capture CreateTweet response.
1022
+ # Two parallel paths for redundancy:
1023
+ # (a) page.on("response") — Playwright's event-loop hook.
1024
+ # (b) CDP Network.responseReceived — slightly faster + less
1025
+ # body-fetch overhead, Chromium-only.
1026
+ # Both write into _created_tweet_ids; dedup-on-append keeps the
1027
+ # list a set of unique rest_ids regardless of which path fired.
1028
+ _cdp_session = None
1029
+ _created_tweet_ids = []
1030
+
1031
+ def _on_response_event(resp):
1032
+ # Engine-agnostic CreateTweet capture. Filter by URL FIRST so
1033
+ # we don't pay a body-fetch round-trip on every graphql call.
1034
+ try:
1035
+ if "CreateTweet" not in resp.url:
1036
+ return
1037
+ if resp.status != 200:
1038
+ return
1039
+ data = resp.json()
1040
+ rest_id = (
1041
+ data.get("data", {})
1042
+ .get("create_tweet", {})
1043
+ .get("tweet_results", {})
1044
+ .get("result", {})
1045
+ .get("rest_id")
1046
+ )
1047
+ if rest_id and rest_id not in _created_tweet_ids:
1048
+ _created_tweet_ids.append(rest_id)
1049
+ except Exception:
1050
+ pass
1051
+
1052
+ page.on("response", _on_response_event)
1053
+
1054
+ try:
1055
+ _cdp_session = page.context.new_cdp_session(page)
1056
+ _cdp_session.send("Network.enable")
1057
+
1058
+ def _on_cdp_response(params):
1059
+ try:
1060
+ url = params.get("response", {}).get("url", "")
1061
+ if "CreateTweet" in url:
1062
+ body_resp = _cdp_session.send(
1063
+ "Network.getResponseBody",
1064
+ {"requestId": params["requestId"]},
1065
+ )
1066
+ data = json.loads(body_resp.get("body", "{}"))
1067
+ rest_id = (
1068
+ data.get("data", {})
1069
+ .get("create_tweet", {})
1070
+ .get("tweet_results", {})
1071
+ .get("result", {})
1072
+ .get("rest_id")
1073
+ )
1074
+ if rest_id and rest_id not in _created_tweet_ids:
1075
+ _created_tweet_ids.append(rest_id)
1076
+ except Exception:
1077
+ pass
1078
+
1079
+ _cdp_session.on("Network.responseReceived", _on_cdp_response)
1080
+ except Exception:
1081
+ pass
1082
+
1083
+ # Navigate + locate reply box. Composer mount is flaky on E2B
1084
+ # sandbox egress (~1-in-5 misses on first attempt). Strategy:
1085
+ # up to 2 navigation attempts; on miss, scroll-nudge once before
1086
+ # re-navigating. On final miss, dump diagnostics for triage.
1087
+ reply_box = None
1088
+ tweet_not_found = False
1089
+ for nav_attempt in (1, 2):
1090
+ try:
1091
+ page.goto(tweet_url, wait_until="load", timeout=60000)
1092
+ except Exception:
1093
+ try:
1094
+ page.goto(tweet_url, wait_until="domcontentloaded", timeout=60000)
1095
+ except Exception:
1096
+ pass
1097
+ # Was a blind 15s/8s settle here -> pure dead latency. SPA
1098
+ # readiness is ALREADY gated actively below by
1099
+ # wait_for_selector("main") (up to 20s) and
1100
+ # _wait_for_reply_textbox (polls every 500ms up to 45s); both
1101
+ # return the instant the composer mounts, so the blind sleep
1102
+ # only delayed the start of that polling. Keep a short floor so
1103
+ # the initial JS kicks off (and the deleted-tweet text check
1104
+ # below has content to read), then let the active gates do the
1105
+ # real waiting. Cuts ~12s off every happy-path reply.
1106
+ # (optimized 2026-06-22: 15000/8000 -> 2500)
1107
+ page.wait_for_timeout(2500)
1108
+
1109
+ # `wait_until="load"` fires before Twitter's SPA mounts the
1110
+ # <main> app shell, so "loaded" != "rendered". Explicitly gate
1111
+ # on <main> attaching. If it never mounts (rate-limit
1112
+ # interstitial, error page, logged-out shell, or a stalled SPA)
1113
+ # DO NOT let text_content("main") raise a bare TimeoutError that
1114
+ # crashes the whole script with no_reply_json and no diagnostics.
1115
+ # Swallow it, log the actual URL (rate-limit vs logout triage),
1116
+ # and fall through to the nudge + re-nav; on the final miss the
1117
+ # reply_box-None path reaches _dump_reply_failure_diag below.
1118
+ try:
1119
+ page.wait_for_selector("main", state="attached", timeout=20000)
1120
+ page_text = page.text_content("main", timeout=5000) or ""
1121
+ except Exception:
1122
+ page_text = ""
1123
+ try:
1124
+ cur_url = page.url
1125
+ except Exception:
1126
+ cur_url = "<unknown>"
1127
+ print(f"[reply_to_tweet] <main> not rendered on "
1128
+ f"nav_attempt={nav_attempt} (url={cur_url!r}); "
1129
+ f"nudging + re-navigating", file=sys.stderr)
1130
+ if "this page doesn't exist" in page_text.lower():
1131
+ tweet_not_found = True
1132
+ break
1133
+
1134
+ reply_box = _wait_for_reply_textbox(page, total_timeout_ms=45000)
1135
+ if reply_box:
1136
+ break
1137
+
1138
+ # Nudge: small scroll + scroll back; sometimes coaxes the
1139
+ # composer to attach when React stalled on the initial mount.
1140
+ print(f"[reply_to_tweet] reply_box missing on nav_attempt={nav_attempt}; "
1141
+ f"nudging + re-navigating", file=sys.stderr)
1142
+ try:
1143
+ page.evaluate("window.scrollBy(0, 400)")
1144
+ page.wait_for_timeout(1500)
1145
+ page.evaluate("window.scrollTo(0, 0)")
1146
+ page.wait_for_timeout(1500)
1147
+ except Exception:
1148
+ pass
1149
+
1150
+ if tweet_not_found:
1151
+ return {"ok": False, "error": "tweet_not_found"}
1152
+
1153
+ if not reply_box:
1154
+ diag = _dump_reply_failure_diag(page, tweet_url)
1155
+ print(f"[reply_to_tweet] reply_box_not_found diag: "
1156
+ f"{json.dumps(diag, default=str)}", file=sys.stderr)
1157
+ dom = diag.get("dom") or {}
1158
+ # Classify WHY the composer is missing so the poster can suppress
1159
+ # PERMANENT conditions (never re-attempt) vs retry TRANSIENT ones:
1160
+ # - reply_restricted: author limits who can reply -> permanent,
1161
+ # suppress thread + author.
1162
+ # - tweet_unavailable: tweet deleted/suspended (nothing rendered)
1163
+ # -> permanent, suppress thread. A login modal is OUR session
1164
+ # problem, not the tweet's, so it stays transient.
1165
+ # - else: composer just didn't mount -> transient, retry as before.
1166
+ if dom.get("reply_restricted"):
1167
+ return {"ok": False, "error": "reply_restricted",
1168
+ "restriction_label": dom.get("restriction_label") or "",
1169
+ "diag": diag}
1170
+ if not dom.get("tweet_rendered") and not dom.get("has_login_modal"):
1171
+ return {"ok": False, "error": "tweet_unavailable", "diag": diag}
1172
+ return {"ok": False, "error": "reply_box_not_found", "diag": diag}
1173
+
1174
+ # Snapshot our reply links right before posting (to detect the new one)
1175
+ links_before = _collect_our_reply_links(page)
1176
+
1177
+ # Click and type the reply
1178
+ reply_box.click()
1179
+ page.wait_for_timeout(500)
1180
+ page.keyboard.type(text, delay=10)
1181
+ page.wait_for_timeout(1000)
1182
+
1183
+ # Click the Reply submit button. MUST target tweetButtonInline by
1184
+ # testid; substring-matching "Reply" by accessible name matches
1185
+ # every reply-icon on the page and picks the wrong one.
1186
+ try:
1187
+ reply_btn = page.locator('[data-testid="tweetButtonInline"]').first
1188
+ reply_btn.wait_for(state="visible", timeout=5000)
1189
+ for _ in range(20):
1190
+ if reply_btn.get_attribute("aria-disabled") != "true":
1191
+ break
1192
+ page.wait_for_timeout(100)
1193
+ reply_btn.click()
1194
+ except Exception:
1195
+ page.keyboard.press("Meta+Enter")
1196
+
1197
+ # Post-submit settle: lets the CDP network response (which carries
1198
+ # the new tweet id -> reply_url, captured below) and the success
1199
+ # interstitial arrive. Trimmed from 4000ms 2026-06-22; the DOM-diff
1200
+ # fallback (3x2s, below) still covers a slow CDP response, so the
1201
+ # reply_url is not lost if 2000ms is short on a given run.
1202
+ page.wait_for_timeout(2000)
1203
+
1204
+ # Verify: check if the reply box is empty (cleared after posting)
1205
+ try:
1206
+ box_text = reply_box.text_content() or ""
1207
+ verified = len(box_text.strip()) == 0 or text not in box_text
1208
+ except Exception:
1209
+ verified = True
1210
+
1211
+ # Dismiss the post-success interstitial X shows right after a reply
1212
+ # (e.g. the "Unlock more on X" graduated-access sheet). It animates
1213
+ # in on top of the composer once the reply lands, so we close it
1214
+ # here, immediately after the post succeeds, rather than before the
1215
+ # next reply -> the sheet never lingers on screen and never masks
1216
+ # the next reply box. Best-effort, fast, never raises.
1217
+ _dismiss_known_overlays(page)
1218
+
1219
+ # Clean up CDP session
1220
+ if _cdp_session:
1221
+ try:
1222
+ _cdp_session.detach()
1223
+ except Exception:
1224
+ pass
1225
+
1226
+ # Capture reply URL
1227
+ reply_url = None
1228
+
1229
+ # Method 1: CDP network interception (most reliable)
1230
+ if _created_tweet_ids:
1231
+ reply_url = f"https://x.com/{_handle}/status/{_created_tweet_ids[-1]}"
1232
+ print(f"[reply_url] captured via CDP+response-listener: {reply_url}", file=sys.stderr)
1233
+
1234
+ # Method 2: DOM diff (check if new reply links appeared)
1235
+ if not reply_url:
1236
+ for attempt in range(3):
1237
+ links_after = _collect_our_reply_links(page)
1238
+ new_links = links_after - links_before
1239
+ if new_links:
1240
+ reply_path = max(new_links, key=lambda x: int(re.search(r'/status/(\d+)', x).group(1)))
1241
+ reply_url = f"https://x.com{reply_path}" if not reply_path.startswith("http") else reply_path
1242
+ break
1243
+ page.wait_for_timeout(2000)
1244
+
1245
+ # Method 3 REMOVED 2026-05-01: profile-page (`/with_replies`)
1246
+ # scrape was returning the wrong URL under parallel cycles. It
1247
+ # picked `max(status_id)` of any m13v_ reply on the profile page
1248
+ # and de-duped against a shared `/tmp` tracker file, but with
1249
+ # multiple cycles posting in parallel that "latest" reply often
1250
+ # belonged to a DIFFERENT thread than the one we just posted to.
1251
+ # Observed cross-thread contamination on 2026-05-01: cycles
1252
+ # 074506 and 080006 both captured 2050228098633982405 as "their"
1253
+ # reply URL but for different parent tweets. Better to leave
1254
+ # reply_url=None and let the caller treat it as soft-skip than
1255
+ # to attribute someone else's tweet to this candidate's row.
1256
+ if reply_url:
1257
+ print(f"[reply_url] found: {reply_url}", file=sys.stderr)
1258
+ else:
1259
+ print("[reply_url] capture failed (CDP+DOM both empty); "
1260
+ "returning null — caller should skip without retry",
1261
+ file=sys.stderr)
1262
+
1263
+ # Snapshot the single best-performing human reply on this thread
1264
+ # AT post-success time. The page is already on the candidate
1265
+ # thread URL with replies visible (we just posted there). We
1266
+ # filter out our own reply and the thread author, sort by likes,
1267
+ # and keep only the top one. Failures are swallowed: an empty
1268
+ # top_replies list is the correct downstream signal ("nothing
1269
+ # to track").
1270
+ #
1271
+ # Three-layer defense against X's "Discover more" /
1272
+ # "More replies" suggested-content cards, which render as
1273
+ # full article elements right alongside real replies and used to
1274
+ # leak in as the "top" reply (e.g. @mntruell 1343 likes on a
1275
+ # @zhenthebuilder thread, @OpenAIDevs 4050 likes on a @kr0der
1276
+ # thread — both viral standalone tweets X surfaced as
1277
+ # "discover more", neither was an actual reply). Layers:
1278
+ # (1) DOM-position boundary: stop iterating at the first
1279
+ # "Discover more" / "More replies" heading.
1280
+ # (2) Snowflake age: real replies must be POSTED AFTER the
1281
+ # thread, so reply_tweet_id > thread_tweet_id.
1282
+ # (3) Quoted-tweet embeds: skip articles nested inside
1283
+ # another article (rare but possible source of leaks).
1284
+ top_replies = []
1285
+ try:
1286
+ self_handle = (our_handle() or "").lower().lstrip("@")
1287
+ m_author = re.search(r"(?:x|twitter)\.com/([^/]+)/status/(\d+)", tweet_url)
1288
+ thread_author_handle = (m_author.group(1).lower() if m_author else "")
1289
+ thread_tweet_id = (m_author.group(2) if m_author else "")
1290
+ scrape_js = """
1291
+ (() => {
1292
+ const headings = Array.from(document.querySelectorAll('div, h2, [role="heading"]'))
1293
+ .filter(el => {
1294
+ const t = (el.textContent || '').trim();
1295
+ return t === 'Discover more' || t === 'More replies' || t === 'Show more replies';
1296
+ });
1297
+ const articles = Array.from(document.querySelectorAll('article[data-testid="tweet"]'));
1298
+ if (articles.length < 1) return JSON.stringify({replies: [], article_count: articles.length, dropped_after_discover: 0, dropped_nested: 0});
1299
+ let dropped_after_discover = 0, dropped_nested = 0;
1300
+ const replyArticles = articles.slice(1, 31);
1301
+ const replies = [];
1302
+ for (const art of replyArticles) {
1303
+ try {
1304
+ // Layer 1: hard boundary at "Discover more" heading.
1305
+ // headings[0] is the FIRST such heading on the page;
1306
+ // any article after it is a suggested-content card.
1307
+ if (headings.length > 0) {
1308
+ const cmp = art.compareDocumentPosition(headings[0]);
1309
+ if (!(cmp & Node.DOCUMENT_POSITION_FOLLOWING)) {
1310
+ dropped_after_discover += 1;
1311
+ continue;
1312
+ }
1313
+ }
1314
+ // Layer 3: skip quoted-tweet embeds (nested article).
1315
+ let p = art.parentElement, nested = false;
1316
+ while (p) { if (p.tagName === 'ARTICLE') { nested = true; break; } p = p.parentElement; }
1317
+ if (nested) { dropped_nested += 1; continue; }
1318
+
1319
+ const linkEls = art.querySelectorAll('a[href*="/status/"]');
1320
+ let reply_url = null;
1321
+ for (const a of linkEls) {
1322
+ const m = a.getAttribute('href').match(/^\\/[^/]+\\/status\\/\\d+$/);
1323
+ if (m) { reply_url = 'https://x.com' + a.getAttribute('href'); break; }
1324
+ }
1325
+ if (!reply_url) continue;
1326
+ const tid_m = reply_url.match(/\\/status\\/(\\d+)/);
1327
+ const reply_tweet_id = tid_m ? tid_m[1] : null;
1328
+ const handle_m = reply_url.match(/x\\.com\\/([^/]+)\\/status/);
1329
+ const reply_author_handle = handle_m ? handle_m[1] : null;
1330
+ const userName = art.querySelector('[data-testid="User-Name"]');
1331
+ const reply_author = userName ? (userName.textContent || '').trim().slice(0, 80) : null;
1332
+ const textEl = art.querySelector('[data-testid="tweetText"]');
1333
+ const reply_content = textEl ? (textEl.textContent || '').trim().slice(0, 500) : null;
1334
+ const groupEl = art.querySelector('[role="group"][aria-label]');
1335
+ let likes = 0, replies_count = 0, retweets = 0, views = 0;
1336
+ if (groupEl) {
1337
+ const label = groupEl.getAttribute('aria-label') || '';
1338
+ const lm = label.match(/(\\d[\\d,]*)\\s+(?:Like|Likes)/i);
1339
+ const rm = label.match(/(\\d[\\d,]*)\\s+(?:Reply|Replies)/i);
1340
+ const tm = label.match(/(\\d[\\d,]*)\\s+(?:Repost|Reposts)/i);
1341
+ const vm = label.match(/(\\d[\\d,]*)\\s+(?:View|Views)/i);
1342
+ likes = lm ? parseInt(lm[1].replace(/,/g, ''), 10) : 0;
1343
+ replies_count = rm ? parseInt(rm[1].replace(/,/g, ''), 10) : 0;
1344
+ retweets = tm ? parseInt(tm[1].replace(/,/g, ''), 10) : 0;
1345
+ views = vm ? parseInt(vm[1].replace(/,/g, ''), 10) : 0;
1346
+ }
1347
+ // Link detection. Twitter exclusively shortens external
1348
+ // links through t.co, so any <a href="https://t.co/..."]>
1349
+ // inside the article (excluding any nested article like
1350
+ // a quoted tweet) means the reply author posted an
1351
+ // outbound link. Pick the first matching anchor whose
1352
+ // nearest ancestor article IS this article (rules out
1353
+ // links embedded inside a quoted-tweet block).
1354
+ let reply_link_url = null;
1355
+ let reply_link_display = null;
1356
+ const tcoAnchors = art.querySelectorAll('a[href^="https://t.co/"]');
1357
+ for (const a of tcoAnchors) {
1358
+ let q = a.parentElement, owner = null;
1359
+ while (q) { if (q.tagName === 'ARTICLE') { owner = q; break; } q = q.parentElement; }
1360
+ if (owner === art) {
1361
+ reply_link_url = a.getAttribute('href');
1362
+ // The anchor's textContent is the unrolled display
1363
+ // URL twitter shows the reader (e.g. "deno.com/blog
1364
+ // /agents-deploy"). Strip whitespace + Unicode
1365
+ // ellipsis that x.com inserts on long display URLs.
1366
+ reply_link_display = ((a.textContent || '').trim()).slice(0, 500) || null;
1367
+ break;
1368
+ }
1369
+ }
1370
+ replies.push({reply_url, reply_tweet_id, reply_author_handle, reply_author, reply_content, likes, replies: replies_count, retweets, views, reply_link_url, reply_link_display});
1371
+ } catch (e) {}
1372
+ }
1373
+ return JSON.stringify({replies, article_count: articles.length, dropped_after_discover, dropped_nested, headings_found: headings.length});
1374
+ })()
1375
+ """
1376
+ raw = page.evaluate(scrape_js)
1377
+ parsed = json.loads(raw) if isinstance(raw, str) else (raw or {})
1378
+ all_replies = parsed.get("replies", []) or []
1379
+ dropped_older = 0
1380
+ filtered = []
1381
+ for r in all_replies:
1382
+ h = (r.get("reply_author_handle") or "").lower().lstrip("@")
1383
+ if not h:
1384
+ continue
1385
+ if self_handle and h == self_handle:
1386
+ continue
1387
+ if thread_author_handle and h == thread_author_handle:
1388
+ continue
1389
+ # Layer 2: snowflake age. A real reply MUST have been
1390
+ # posted after the thread; older snowflakes are
1391
+ # quoted-tweet embeds or suggested-content leaks that
1392
+ # somehow made it past the DOM boundary.
1393
+ rtid = (r.get("reply_tweet_id") or "").strip()
1394
+ if thread_tweet_id and rtid:
1395
+ try:
1396
+ if int(rtid) <= int(thread_tweet_id):
1397
+ dropped_older += 1
1398
+ continue
1399
+ except ValueError:
1400
+ pass
1401
+ filtered.append(r)
1402
+ filtered.sort(key=lambda r: int(r.get("likes") or 0), reverse=True)
1403
+ # Two-row snapshot strategy (2026-05-22):
1404
+ # rank=1 = top reply by likes regardless of link presence
1405
+ # (the existing "what's winning here?" benchmark).
1406
+ # rank=2 = top *link-bearing* reply, if one exists and is
1407
+ # distinct from rank=1. This gives us an
1408
+ # apples-to-apples comparison against our own
1409
+ # link-bearing posts. ~96% of top replies don't
1410
+ # include a link, so without this second row the
1411
+ # benchmark population was too small.
1412
+ # If rank=1 already has a link, the rank=2 candidate is the
1413
+ # same row and we skip it to honor UNIQUE(post_id, reply_url).
1414
+ top_replies = []
1415
+ if filtered:
1416
+ primary = filtered[0]
1417
+ top_replies.append(primary)
1418
+ primary_url = primary.get("reply_url")
1419
+ if not primary.get("reply_link_url"):
1420
+ for cand in filtered[1:]:
1421
+ if cand.get("reply_link_url") and cand.get("reply_url") != primary_url:
1422
+ top_replies.append(cand)
1423
+ break
1424
+ print(f"[top_replies] scraped {len(all_replies)} articles "
1425
+ f"(headings={parsed.get('headings_found', 0)}, "
1426
+ f"dropped_after_discover={parsed.get('dropped_after_discover', 0)}, "
1427
+ f"dropped_nested={parsed.get('dropped_nested', 0)}, "
1428
+ f"dropped_older={dropped_older}), "
1429
+ f"kept top {len(top_replies)} after self+author filter "
1430
+ f"(rank2_has_link={'yes' if len(top_replies) > 1 else 'no'})",
1431
+ file=sys.stderr)
1432
+ except Exception as e:
1433
+ print(f"[top_replies] scrape failed: {e}", file=sys.stderr)
1434
+ top_replies = []
1435
+
1436
+ # Like the parent tweet we just replied to. Deterministic: fires on
1437
+ # EVERY successful reply. The page is still on the thread, so the
1438
+ # parent is the first article and no extra navigation is needed.
1439
+ # Wrapped so a like failure can NEVER fail the reply itself — we
1440
+ # carry the outcome out in `like_result` for the caller to log.
1441
+ like_result = {"ok": False, "error": "not_attempted"}
1442
+ try:
1443
+ like_result = _like_first_tweet_on_page(page)
1444
+ except Exception as _le:
1445
+ like_result = {"ok": False, "error": str(_le)}
1446
+ print(f"[like] unexpected error in reply_to_tweet: {_le}", file=sys.stderr)
1447
+
1448
+ return {
1449
+ "ok": True,
1450
+ "tweet_url": tweet_url,
1451
+ "reply_url": reply_url,
1452
+ "verified": verified,
1453
+ "applied_campaigns": applied_campaigns,
1454
+ "final_text": text,
1455
+ "top_replies": top_replies,
1456
+ "liked": bool(like_result.get("liked") or like_result.get("already_liked")),
1457
+ "like_result": like_result,
1458
+ }
1459
+
1460
+ finally:
1461
+ if not is_cdp:
1462
+ page.close()
1463
+ browser.close()
1464
+
1465
+
1466
+ def unread_dms():
1467
+ """Scan Twitter/X DM inbox for conversations.
1468
+
1469
+ Navigates to /i/chat, handles the encryption passcode if needed,
1470
+ and extracts all visible conversations with their author, preview text,
1471
+ timestamp, and conversation URL.
1472
+
1473
+ Returns: [{"author": "...", "handle": "...", "preview": "...", "time": "...",
1474
+ "thread_url": "...", "is_from_us": bool, "has_unread": bool}, ...]
1475
+
1476
+ `has_unread` is the signal callers should filter on. It is derived from the
1477
+ sidebar's visual unread state (aria-label "unread", bold font weight on the
1478
+ preview/name, or a notification dot SVG). Threads where we sent last AND have
1479
+ no new inbound show `has_unread: false` even when the "You:" prefix is
1480
+ truncated, so this avoids opening every thread to verify.
1481
+ """
1482
+ from playwright.sync_api import sync_playwright
1483
+
1484
+ with sync_playwright() as p:
1485
+ browser, page, is_cdp = get_browser_and_page(p)
1486
+
1487
+ try:
1488
+ rl_counter = _install_rate_limit_listener(page)
1489
+ page.goto("https://x.com/i/chat", wait_until="domcontentloaded")
1490
+ page.wait_for_timeout(5000)
1491
+
1492
+ unreachable, reason = _is_x_unreachable(page)
1493
+ if unreachable:
1494
+ return _rate_limit_response(reason, rl_counter, page.url)
1495
+
1496
+ # Handle DM passcode if needed
1497
+ _handle_dm_passcode(page)
1498
+ page.wait_for_timeout(2000)
1499
+
1500
+ # Verify we're on the DM inbox
1501
+ if "chat" not in page.url:
1502
+ unreachable, reason = _is_x_unreachable(page)
1503
+ if unreachable:
1504
+ return _rate_limit_response(reason, rl_counter, page.url)
1505
+ return {"ok": False, "error": "not_on_dm_page", "url": page.url}
1506
+
1507
+ # Extract conversation list by walking the real DOM structure.
1508
+ #
1509
+ # 2026-05-14: X redesigned the sidebar; all unread visual signals
1510
+ # moved and the list is now virtualized (~14-18 rows render at
1511
+ # once). This was the root cause of the 2026-05-01..05-14 inbound
1512
+ # DM ingestion cliff:
1513
+ # - bolded preview text: was fw>=600, now fw=500
1514
+ # - unread dot: was a small <div> with background-color, now
1515
+ # <svg data-icon="icon-circle-fill"> 8x8 with transparent bg
1516
+ # and color: rgb(30, 156, 241) (Twitter blue) via fill
1517
+ # - aria-label "unread": gone entirely
1518
+ # Every row also exposes data-testid `dm-conversation-item-<ids>`.
1519
+ # We now (a) detect unread via the SVG dot AND any non-400 weight
1520
+ # on the preview span, and (b) scroll the chat panel until no new
1521
+ # rows surface for several iterations so older unreads (Prince
1522
+ # Canuma at 1w, Foad Green at 2w) are not buried beneath the fold.
1523
+ scrape_js = """() => {
1524
+ const results = [];
1525
+ const items = document.querySelectorAll(
1526
+ '[data-testid^="dm-conversation-item-"], main li, main [role="listitem"]'
1527
+ );
1528
+
1529
+ for (const item of items) {
1530
+ const link = item.querySelector('a[href*="/i/chat/"]');
1531
+ if (!link) continue;
1532
+
1533
+ const threadUrl = link.href;
1534
+ if (!threadUrl.match(/\\/i\\/chat\\/[\\d-g]/)) continue;
1535
+
1536
+ let handle = '';
1537
+ const avatarLink = item.querySelector('a[href^="https://x.com/"]');
1538
+ if (avatarLink) {
1539
+ const href = avatarLink.getAttribute('href') || '';
1540
+ const m = href.match(/x\\.com\\/([^/]+)/);
1541
+ if (m) handle = m[1];
1542
+ }
1543
+
1544
+ const leaves = [];
1545
+ const all = link.querySelectorAll('*');
1546
+ for (const el of all) {
1547
+ if (el.children.length !== 0) continue;
1548
+ const t = (el.textContent || '').trim();
1549
+ if (!t) continue;
1550
+ const fw = parseInt(window.getComputedStyle(el).fontWeight, 10) || 400;
1551
+ leaves.push({tag: el.tagName.toLowerCase(), fw: fw, t: t});
1552
+ }
1553
+
1554
+ let author = '';
1555
+ let time = '';
1556
+ let preview = '';
1557
+ let isFromUs = false;
1558
+ let previewFw = 400;
1559
+
1560
+ for (const node of leaves) {
1561
+ if (!author && node.fw >= 700 && node.t.length < 80 &&
1562
+ !/^(\\d+[hmd]|\\d+w|Just now)$/.test(node.t)) {
1563
+ author = node.t;
1564
+ continue;
1565
+ }
1566
+ if (!time && /^(\\d+[hmd]|\\d+w|Just now)$/.test(node.t)) {
1567
+ time = node.t;
1568
+ continue;
1569
+ }
1570
+ if (!isFromUs && node.tag === 'span' && /^You:?$/.test(node.t)) {
1571
+ isFromUs = true;
1572
+ continue;
1573
+ }
1574
+ if (!preview && node.t.length > 0) {
1575
+ preview = node.t;
1576
+ previewFw = node.fw;
1577
+ }
1578
+ }
1579
+
1580
+ // Primary: <svg data-icon="icon-circle-fill"> = blue unread dot.
1581
+ let hasUnread = !!item.querySelector('svg[data-icon="icon-circle-fill"]');
1582
+
1583
+ // Secondary: any non-400 weight on the preview leaf (X
1584
+ // currently uses 500 for unread; we accept >400 in case
1585
+ // they tweak it again).
1586
+ if (!hasUnread && previewFw > 400) hasUnread = true;
1587
+
1588
+ // Tertiary legacy signals (kept for safety).
1589
+ if (!hasUnread && item.querySelector('[aria-label*="unread" i]')) {
1590
+ hasUnread = true;
1591
+ }
1592
+ if (!hasUnread) {
1593
+ const candidates = item.querySelectorAll('span, div');
1594
+ for (const el of candidates) {
1595
+ if (el.children.length !== 0) continue;
1596
+ const style = window.getComputedStyle(el);
1597
+ const bg = style.backgroundColor || '';
1598
+ if (!bg || bg === 'rgba(0, 0, 0, 0)' || bg === 'transparent') continue;
1599
+ const w = el.offsetWidth, h = el.offsetHeight;
1600
+ if (w > 0 && w <= 14 && h > 0 && h <= 14 && Math.abs(w - h) <= 2) {
1601
+ hasUnread = true;
1602
+ break;
1603
+ }
1604
+ }
1605
+ }
1606
+
1607
+ // If we sent the last visible message ("You:" prefix), it
1608
+ // can't be unread on our end regardless of bolding.
1609
+ if (isFromUs) hasUnread = false;
1610
+
1611
+ if (author || handle) {
1612
+ results.push({
1613
+ author: author,
1614
+ handle: handle,
1615
+ preview: preview,
1616
+ time: time,
1617
+ thread_url: threadUrl,
1618
+ is_from_us: isFromUs,
1619
+ has_unread: hasUnread,
1620
+ });
1621
+ }
1622
+ }
1623
+
1624
+ return results;
1625
+ }"""
1626
+
1627
+ scroll_js = """() => {
1628
+ const items = document.querySelectorAll(
1629
+ '[data-testid^="dm-conversation-item-"], main li, main [role="listitem"]'
1630
+ );
1631
+ let last = null;
1632
+ for (const item of items) {
1633
+ if (item.querySelector('a[href*="/i/chat/"]')) last = item;
1634
+ }
1635
+ if (!last) return -1;
1636
+ last.scrollIntoView({behavior: 'instant', block: 'end'});
1637
+ let el = last;
1638
+ while (el) {
1639
+ const s = window.getComputedStyle(el);
1640
+ if ((s.overflowY === 'auto' || s.overflowY === 'scroll') &&
1641
+ el.scrollHeight > el.clientHeight) {
1642
+ return el.scrollTop;
1643
+ }
1644
+ el = el.parentElement;
1645
+ }
1646
+ return 0;
1647
+ }"""
1648
+
1649
+ seen = {}
1650
+ stuck_iters = 0
1651
+ max_iters = int(os.environ.get("TWITTER_UNREAD_SCROLL_MAX_ITERS", "60"))
1652
+ max_no_growth = int(os.environ.get("TWITTER_UNREAD_SCROLL_NO_GROWTH", "5"))
1653
+ for _ in range(max_iters):
1654
+ batch = page.evaluate(scrape_js)
1655
+ grew = False
1656
+ for c in batch:
1657
+ if c["thread_url"] not in seen:
1658
+ seen[c["thread_url"]] = c
1659
+ grew = True
1660
+ if not grew:
1661
+ stuck_iters += 1
1662
+ else:
1663
+ stuck_iters = 0
1664
+ if stuck_iters >= max_no_growth:
1665
+ break
1666
+ page.evaluate(scroll_js)
1667
+ page.wait_for_timeout(600)
1668
+
1669
+ unique = list(seen.values())
1670
+
1671
+ # If the inbox API was throttled hard AND we got nothing back,
1672
+ # treat this as rate-limited so the caller can back off instead
1673
+ # of reporting "0 new inbounds" (which then silently skips work).
1674
+ if not unique and rl_counter["429"] >= 3:
1675
+ return _rate_limit_response(
1676
+ "inbox_api_throttled", rl_counter, page.url
1677
+ )
1678
+
1679
+ return unique
1680
+
1681
+ finally:
1682
+ if not is_cdp:
1683
+ page.close()
1684
+ browser.close()
1685
+
1686
+
1687
+ def read_conversation(thread_url, max_messages=20):
1688
+ """Read messages from a specific Twitter/X DM conversation.
1689
+
1690
+ Navigates to the thread URL and extracts the most recent messages
1691
+ with their sender, content, and timestamp.
1692
+
1693
+ Returns: {"partner_name": "...", "partner_handle": "...",
1694
+ "messages": [{"sender": "...", "content": "...", "time": "...",
1695
+ "is_from_us": bool}, ...], "total_found": N}
1696
+ """
1697
+ from playwright.sync_api import sync_playwright
1698
+
1699
+ with sync_playwright() as p:
1700
+ browser, page, is_cdp = get_browser_and_page(p)
1701
+
1702
+ try:
1703
+ rl_counter = _install_rate_limit_listener(page)
1704
+ # Navigate using JS to avoid SPA navigation timeouts
1705
+ page.evaluate(f"window.location.href = '{thread_url}'")
1706
+ page.wait_for_timeout(6000)
1707
+
1708
+ unreachable, reason = _is_x_unreachable(page)
1709
+ if unreachable:
1710
+ return _rate_limit_response(reason, rl_counter, page.url)
1711
+
1712
+ # Handle DM passcode if needed
1713
+ _handle_dm_passcode(page)
1714
+ page.wait_for_timeout(2000)
1715
+
1716
+ result = page.evaluate("""(params) => {
1717
+ const maxMessages = params.maxMessages;
1718
+ const ourHandle = params.ourHandle;
1719
+
1720
+ let partnerName = '';
1721
+ let partnerHandle = '';
1722
+ const main = document.querySelector('main');
1723
+ if (!main) return {partner_name: '', partner_handle: '', messages: [], total_found: 0};
1724
+
1725
+ // Find the conversation panel (the section containing the
1726
+ // message textbox), NOT the sidebar conversation list.
1727
+ // The textbox has aria-label like "Unencrypted message".
1728
+ const textbox = main.querySelector('[role="textbox"]');
1729
+ // Walk up from textbox to find the conversation container
1730
+ // that holds the message list items.
1731
+ let convPanel = null;
1732
+ if (textbox) {
1733
+ // The conversation panel is typically a sibling of or
1734
+ // ancestor of the textbox container. Walk up to find
1735
+ // the div that contains BOTH the message list and textbox.
1736
+ let el = textbox;
1737
+ for (let i = 0; i < 10; i++) {
1738
+ el = el.parentElement;
1739
+ if (!el) break;
1740
+ const lis = el.querySelectorAll('li, [role="listitem"]');
1741
+ if (lis.length >= 2) {
1742
+ convPanel = el;
1743
+ break;
1744
+ }
1745
+ }
1746
+ }
1747
+
1748
+ // Fallback: if no textbox found, try to find the panel
1749
+ // that has "View Profile" text (the conversation header)
1750
+ if (!convPanel) {
1751
+ const allDivs = main.querySelectorAll('div');
1752
+ for (const d of allDivs) {
1753
+ if (d.textContent.includes('View Profile') &&
1754
+ d.textContent.includes('Joined ') &&
1755
+ d.querySelectorAll('li').length >= 2) {
1756
+ convPanel = d;
1757
+ break;
1758
+ }
1759
+ }
1760
+ }
1761
+
1762
+ // Last fallback: use main but filter out sidebar items
1763
+ if (!convPanel) convPanel = main;
1764
+
1765
+ // Extract partner info from profile card in the conversation
1766
+ const profileLink = convPanel.querySelector('a[href*="x.com/"]');
1767
+ if (profileLink) {
1768
+ const href = profileLink.getAttribute('href') || '';
1769
+ const m = href.match(/x\\.com\\/([^/]+)/);
1770
+ if (m && m[1] !== ourHandle) partnerHandle = m[1];
1771
+ }
1772
+
1773
+ // Look for @handle text
1774
+ const handleEls = convPanel.querySelectorAll('div, span');
1775
+ for (const el of handleEls) {
1776
+ const t = el.textContent.trim();
1777
+ if (t.startsWith('@') && t.length > 2 && t.length < 50 &&
1778
+ !t.includes(' ') && t.substring(1) !== ourHandle) {
1779
+ partnerHandle = t.substring(1);
1780
+ break;
1781
+ }
1782
+ }
1783
+
1784
+ // Find messages — only from the conversation panel
1785
+ const items = convPanel.querySelectorAll('li, [role="listitem"]');
1786
+ const messages = [];
1787
+ let currentDate = '';
1788
+
1789
+ for (const item of items) {
1790
+ const text = item.textContent || '';
1791
+
1792
+ // Skip sidebar conversation items (they contain
1793
+ // avatar links to x.com/username profiles)
1794
+ const sidebarLink = item.querySelector('a[href*="/i/chat/"]');
1795
+ if (sidebarLink) continue;
1796
+
1797
+ // Date separator
1798
+ if (text.match(/^(Mon|Tue|Wed|Thu|Fri|Sat|Sun|Today|Yesterday)/) &&
1799
+ text.length < 30) {
1800
+ currentDate = text.trim();
1801
+ continue;
1802
+ }
1803
+
1804
+ // Profile card
1805
+ if (text.includes('View Profile') || text.includes('Joined ')) {
1806
+ const nameEl = item.querySelector('div[dir="ltr"], span');
1807
+ if (nameEl && !partnerName) {
1808
+ const n = nameEl.textContent.trim();
1809
+ if (n && n.length > 1 && n.length < 50 &&
1810
+ !n.startsWith('@') && !n.includes('View') &&
1811
+ !n.includes('Joined')) {
1812
+ partnerName = n;
1813
+ }
1814
+ }
1815
+ continue;
1816
+ }
1817
+
1818
+ if (text.trim().length < 2) continue;
1819
+
1820
+ // Extract message content and time
1821
+ let content = '';
1822
+ let time = '';
1823
+ let isFromUs = false;
1824
+
1825
+ const timeMatch = text.match(/(\\d{1,2}:\\d{2}\\s*[AP]M)/);
1826
+ if (timeMatch) {
1827
+ time = timeMatch[1];
1828
+ }
1829
+
1830
+ // Content: find the deepest div with message text
1831
+ const contentDivs = item.querySelectorAll('div');
1832
+ for (const cd of contentDivs) {
1833
+ const t = cd.textContent.trim();
1834
+ if (t.match(/^\\d{1,2}:\\d{2}\\s*[AP]M$/)) continue;
1835
+ if (t === time) continue;
1836
+ if (t.length > 2 && t.length < 5000 &&
1837
+ !t.includes('View Profile') && !t.includes('Joined ')) {
1838
+ const childDivs = cd.querySelectorAll('div');
1839
+ if (childDivs.length <= 2) {
1840
+ content = t.replace(/(\\d{1,2}:\\d{2}\\s*[AP]M)/g, '').trim();
1841
+ if (content.length > 0) break;
1842
+ }
1843
+ }
1844
+ }
1845
+
1846
+ if (!content || content.length < 1) continue;
1847
+
1848
+ // Determine isFromUs via multiple signals. The previous
1849
+ // heuristic (any SVG present => ours) misclassified inbound
1850
+ // messages that contained a link-preview card, because the
1851
+ // card itself renders SVG icons (GitHub logo, external-link
1852
+ // glyph, etc.). See DM #1486 / session d986d23e where an
1853
+ // inbound "U can check its open source" + auto-unfurled
1854
+ // GitHub card was labeled as ours and the agent then
1855
+ // reconciled to DB with a bare-URL outbound.
1856
+ //
1857
+ // Signal 1 (strong): delivery receipt text. Seen/Delivered/
1858
+ // Sent only render on our outgoing messages.
1859
+ let hasStatusText = false;
1860
+ const statusCandidates = item.querySelectorAll('span, div');
1861
+ for (const s of statusCandidates) {
1862
+ const t = (s.textContent || '').trim();
1863
+ if (t === 'Seen' || t === 'Delivered' || t === 'Sent') {
1864
+ hasStatusText = true;
1865
+ break;
1866
+ }
1867
+ if (/^Seen\\s+\\d/.test(t) || /^Delivered\\s+\\d/.test(t)) {
1868
+ hasStatusText = true;
1869
+ break;
1870
+ }
1871
+ }
1872
+
1873
+ // Signal 2: horizontal alignment. X right-aligns our bubbles.
1874
+ let hasRightAlign = false;
1875
+ const alignCandidates = item.querySelectorAll('div[style]');
1876
+ for (const a of alignCandidates) {
1877
+ const style = a.getAttribute('style') || '';
1878
+ if (style.indexOf('flex-end') !== -1 ||
1879
+ style.indexOf('justify-content: end') !== -1) {
1880
+ hasRightAlign = true;
1881
+ break;
1882
+ }
1883
+ }
1884
+
1885
+ // Signal 3 (fallback): SVG presence, but only delivery-status
1886
+ // SVGs. Exclude SVGs inside <a>, inside card/article wrappers,
1887
+ // and inside any element that also contains an <img>
1888
+ // (all strong tells of a link-preview, not a receipt).
1889
+ let hasDeliverySvg = false;
1890
+ const allSvgs = item.querySelectorAll('svg');
1891
+ for (const svg of allSvgs) {
1892
+ if (svg.closest('a')) continue;
1893
+ if (svg.closest('article')) continue;
1894
+ if (svg.closest('[data-testid*="card"]')) continue;
1895
+ if (svg.closest('[role="link"]')) continue;
1896
+ const wrapperWithImg = svg.closest('div');
1897
+ if (wrapperWithImg && wrapperWithImg.querySelector('img')) continue;
1898
+ hasDeliverySvg = true;
1899
+ break;
1900
+ }
1901
+
1902
+ isFromUs = hasStatusText || hasRightAlign || hasDeliverySvg;
1903
+
1904
+ messages.push({
1905
+ sender: isFromUs ? 'us' : partnerName || partnerHandle || 'them',
1906
+ content: content,
1907
+ time: currentDate ? currentDate + ' ' + time : time,
1908
+ is_from_us: isFromUs,
1909
+ });
1910
+ }
1911
+
1912
+ const recent = messages.slice(-maxMessages);
1913
+
1914
+ return {
1915
+ partner_name: partnerName,
1916
+ partner_handle: partnerHandle,
1917
+ messages: recent,
1918
+ total_found: messages.length,
1919
+ };
1920
+ }""", {"maxMessages": max_messages, "ourHandle": our_handle()})
1921
+
1922
+ return result
1923
+
1924
+ finally:
1925
+ if not is_cdp:
1926
+ page.close()
1927
+ browser.close()
1928
+
1929
+
1930
+ def send_dm(thread_url, message, dm_id=None):
1931
+ """Send a message in a Twitter/X DM conversation.
1932
+
1933
+ Navigates to the thread URL, types the message in the compose box,
1934
+ and sends it.
1935
+
1936
+ Active Twitter campaigns with a `suffix` are applied at this tool layer:
1937
+ the suffix is appended to `message` (per `sample_rate` coin flip per
1938
+ campaign) before typing, so the literal text is guaranteed to be
1939
+ delivered. After a verified send, logs via dm_conversation.py log-outbound
1940
+ so the campaign counter advances automatically (the CLI auto-detects the
1941
+ suffix in stored content). `dm_id` is required for the auto-log; without
1942
+ it the suffix still applies but counter attribution is skipped.
1943
+
1944
+ Returns: {"ok": true, "thread_url": "...", "verified": true,
1945
+ "applied_campaigns": [...], "message_sent": "..."}
1946
+ or {"ok": false, "error": "..."}
1947
+ """
1948
+ # Tool-level URL wrap pass: every URL in the model's message gets minted
1949
+ # through dm_short_links.wrap_text so clicks attribute to this DM. Runs
1950
+ # BEFORE campaign-suffix injection. Refuses if any URL points at a project
1951
+ # not in dms.target_projects[]; the pipeline must set-target-project
1952
+ # --append before retrying.
1953
+ minted_link_codes = []
1954
+ if dm_id is not None:
1955
+ from dm_short_links import wrap_text as _wrap_text
1956
+ wrap_res = _wrap_text(dm_id=dm_id, text=message)
1957
+ if not wrap_res.get("ok"):
1958
+ return {
1959
+ "ok": False,
1960
+ "error": "link_wrap_failed",
1961
+ "wrap_error": wrap_res.get("error"),
1962
+ "needed_project": wrap_res.get("needed_project"),
1963
+ "url": wrap_res.get("url"),
1964
+ }
1965
+ message = wrap_res["text"]
1966
+ minted_link_codes = wrap_res.get("minted_codes", [])
1967
+
1968
+ applied_campaigns = []
1969
+ for cid, suffix, sample_rate in _load_active_twitter_campaigns():
1970
+ if random.random() < sample_rate:
1971
+ # Wrap any URLs in the suffix through dm_short_links (DM rail) so
1972
+ # clicks attribute to this DM. Falls back to raw suffix if dm_id
1973
+ # missing or wrap fails (e.g. plain-text suffix " written with ai").
1974
+ wrapped_suffix = suffix
1975
+ if 'http' in suffix and dm_id is not None:
1976
+ try:
1977
+ from dm_short_links import wrap_text as _wrap_text_dm
1978
+ wrap_res2 = _wrap_text_dm(dm_id=dm_id, text=suffix)
1979
+ if wrap_res2.get('ok') and wrap_res2.get('minted_codes'):
1980
+ wrapped_suffix = wrap_res2['text']
1981
+ minted_link_codes.extend(wrap_res2.get('minted_codes', []))
1982
+ print(f"[send_dm] suffix wrap codes={wrap_res2['minted_codes']}",
1983
+ file=sys.stderr)
1984
+ except Exception as _e:
1985
+ print(f"[send_dm] suffix wrap failed ({_e}); raw",
1986
+ file=sys.stderr)
1987
+ message = message + wrapped_suffix
1988
+ applied_campaigns.append(cid)
1989
+ print(f"[send_dm] applied_campaigns={applied_campaigns} minted_links={minted_link_codes} message_len={len(message)} dm_id={dm_id}",
1990
+ file=sys.stderr)
1991
+
1992
+ from playwright.sync_api import sync_playwright
1993
+
1994
+ with sync_playwright() as p:
1995
+ browser, page, is_cdp = get_browser_and_page(p)
1996
+
1997
+ try:
1998
+ rl_counter = _install_rate_limit_listener(page)
1999
+ # 2026-05-14: navigate directly to the thread URL via JS, mirroring
2000
+ # read_conversation. The previous implementation went to /i/chat/
2001
+ # first and clicked `a[href*="<conv_id>"]` from the sidebar, but X
2002
+ # virtualizes the sidebar so only ~14-18 rows render at once. Any
2003
+ # thread below the initial slice (3+ days old, ~20+ position) hit
2004
+ # `conversation_not_found_in_sidebar` as a terminal error,
2005
+ # producing 0 successful sends on the 19:14 cycle's 11 retries.
2006
+ # Direct nav was historically called out as flaky for DM routes;
2007
+ # in practice it works fine when given a 6s settle window, which
2008
+ # is what read_conversation does.
2009
+ conv_id = thread_url.rstrip("/").split("/")[-1]
2010
+ page.evaluate(f"window.location.href = '{thread_url}'")
2011
+ page.wait_for_timeout(6000)
2012
+
2013
+ unreachable, reason = _is_x_unreachable(page)
2014
+ if unreachable:
2015
+ return _rate_limit_response(reason, rl_counter, page.url)
2016
+
2017
+ # Handle DM passcode if needed
2018
+ _handle_dm_passcode(page)
2019
+ page.wait_for_timeout(2000)
2020
+
2021
+ # Verify the SPA landed on the right conversation. If the URL
2022
+ # doesn't contain the conv_id, something redirected us (login
2023
+ # bounce, suspended account, deleted thread, etc.).
2024
+ if conv_id not in page.url:
2025
+ return {
2026
+ "ok": False,
2027
+ "error": "thread_url_redirected",
2028
+ "expected_conv_id": conv_id,
2029
+ "landed_url": page.url,
2030
+ }
2031
+
2032
+ # Find the message input box
2033
+ msg_box = None
2034
+ for label in ["Unencrypted message", "Start a new message"]:
2035
+ try:
2036
+ msg_box = page.get_by_role("textbox", name=label)
2037
+ msg_box.wait_for(state="visible", timeout=5000)
2038
+ break
2039
+ except Exception:
2040
+ msg_box = None
2041
+
2042
+ if not msg_box:
2043
+ try:
2044
+ msg_box = page.locator(
2045
+ 'div[role="textbox"][contenteditable="true"]'
2046
+ ).last
2047
+ msg_box.wait_for(state="visible", timeout=3000)
2048
+ except Exception:
2049
+ return {"ok": False, "error": "message_box_not_found"}
2050
+
2051
+ # Click and type
2052
+ msg_box.click()
2053
+ page.wait_for_timeout(500)
2054
+ page.keyboard.type(message, delay=10)
2055
+ page.wait_for_timeout(1000)
2056
+
2057
+ # Send: press Enter (Twitter DMs send on Enter)
2058
+ page.keyboard.press("Enter")
2059
+ page.wait_for_timeout(2000)
2060
+
2061
+ # Verify: check if the message appears in the conversation
2062
+ msg_start = message[:50]
2063
+ verified = page.evaluate("""(msgStart) => {
2064
+ const main = document.querySelector('main');
2065
+ if (!main) return false;
2066
+ const text = main.textContent || '';
2067
+ return text.includes(msgStart);
2068
+ }""", msg_start)
2069
+
2070
+ if verified and dm_id is not None:
2071
+ _log_twitter_dm_outbound(dm_id, message, minted_codes=minted_link_codes)
2072
+
2073
+ return {
2074
+ "ok": verified,
2075
+ "thread_url": page.url,
2076
+ "verified": verified,
2077
+ "error": None if verified else "send_unverified_no_dom_confirmation",
2078
+ "applied_campaigns": applied_campaigns,
2079
+ "minted_link_codes": minted_link_codes,
2080
+ "message_sent": message,
2081
+ }
2082
+
2083
+ finally:
2084
+ if not is_cdp:
2085
+ page.close()
2086
+ browser.close()
2087
+
2088
+
2089
+ def discover_notifications(scroll_count=8, tab="all"):
2090
+ """Scrape tweet notifications from x.com/notifications[/{tab}].
2091
+
2092
+ tab:
2093
+ "all" -> /notifications (default; includes replies to our tweets,
2094
+ replies to our replies without @-tag,
2095
+ plus mentions — superset of "mentions")
2096
+ "mentions" -> /notifications/mentions (only explicit @-mentions)
2097
+ "verified" -> /notifications/verified
2098
+
2099
+ Scrolls the selected tab and extracts each tweet as a notification record.
2100
+ No API cost (uses the logged-in session via CDP).
2101
+
2102
+ Returns: {"notifications": [...], "total": N, "tab": "..."} or {"error": "..."}
2103
+ """
2104
+ valid_tabs = {"all": "", "mentions": "/mentions", "verified": "/verified"}
2105
+ if tab not in valid_tabs:
2106
+ return {"error": f"invalid tab {tab!r}; valid: {sorted(valid_tabs)}"}
2107
+ target_url = f"https://x.com/notifications{valid_tabs[tab]}"
2108
+ print(f"[twitter_browser] discover_notifications called (scroll_count={scroll_count}, tab={tab}, url={target_url})", file=sys.stderr)
2109
+ from playwright.sync_api import sync_playwright
2110
+
2111
+ EXTRACTOR_JS = r"""() => {
2112
+ const out = [];
2113
+ for (const article of document.querySelectorAll('article[data-testid="tweet"]')) {
2114
+ try {
2115
+ let handle = '';
2116
+ let displayName = '';
2117
+ for (const link of article.querySelectorAll('a[role="link"]')) {
2118
+ const href = link.getAttribute('href');
2119
+ if (href && href.startsWith('/') && !href.includes('/status/') && !href.includes('/i/') && href.length > 1 && href.split('/').length === 2) {
2120
+ handle = href.replace('/', '');
2121
+ const nameEl = link.querySelector('span');
2122
+ if (nameEl) displayName = nameEl.textContent || '';
2123
+ break;
2124
+ }
2125
+ }
2126
+ const tweetText = article.querySelector('[data-testid="tweetText"]');
2127
+ const text = tweetText ? tweetText.textContent : '';
2128
+ const timeEl = article.querySelector('time');
2129
+ const timeParent = timeEl ? timeEl.closest('a') : null;
2130
+ const tweetHref = timeParent ? timeParent.getAttribute('href') : '';
2131
+ const tweetUrl = tweetHref ? ('https://x.com' + tweetHref) : '';
2132
+ const datetime = timeEl ? timeEl.getAttribute('datetime') : '';
2133
+ const idMatch = tweetHref ? tweetHref.match(/\/status\/(\d+)/) : null;
2134
+ const tweetId = idMatch ? idMatch[1] : '';
2135
+ let replies=0, retweets=0, likes=0, views=0, bookmarks=0;
2136
+ for (const btn of article.querySelectorAll('[role="group"] button, [role="group"] a')) {
2137
+ const al = btn.getAttribute('aria-label') || '';
2138
+ let m;
2139
+ if (m=al.match(/([\d,]+)\s*repl/i)) replies=parseInt(m[1].replace(/,/g,''));
2140
+ if (m=al.match(/([\d,]+)\s*repost/i)) retweets=parseInt(m[1].replace(/,/g,''));
2141
+ if (m=al.match(/([\d,]+)\s*like/i)) likes=parseInt(m[1].replace(/,/g,''));
2142
+ if (m=al.match(/([\d,]+)\s*view/i)) views=parseInt(m[1].replace(/,/g,''));
2143
+ if (m=al.match(/([\d,]+)\s*bookmark/i)) bookmarks=parseInt(m[1].replace(/,/g,''));
2144
+ }
2145
+ // Detect reply-to target (if tweet is a reply, there's a "Replying to" block)
2146
+ let replyingTo = '';
2147
+ const socialContext = article.querySelector('[data-testid="socialContext"]');
2148
+ const ariaLabel = article.getAttribute('aria-label') || '';
2149
+ for (const span of article.querySelectorAll('a[href^="/"]')) {
2150
+ const href = span.getAttribute('href') || '';
2151
+ if (href.includes('/status/') && span.textContent && span.textContent.trim().startsWith('@')) {
2152
+ replyingTo = span.textContent.trim().replace(/^@/, '');
2153
+ break;
2154
+ }
2155
+ }
2156
+ if (tweetId && handle) {
2157
+ out.push({
2158
+ tweet_id: tweetId,
2159
+ handle: handle,
2160
+ display_name: displayName.trim(),
2161
+ text: (text || ''),
2162
+ tweet_url: tweetUrl,
2163
+ datetime: datetime,
2164
+ replies: replies, retweets: retweets, likes: likes, views: views, bookmarks: bookmarks,
2165
+ replying_to: replyingTo
2166
+ });
2167
+ }
2168
+ } catch(e) {}
2169
+ }
2170
+ return out;
2171
+ }"""
2172
+
2173
+ with sync_playwright() as p:
2174
+ browser, page, is_cdp = get_browser_and_page(p)
2175
+ try:
2176
+ page.goto(target_url, wait_until="domcontentloaded")
2177
+ page.wait_for_timeout(4000)
2178
+
2179
+ seen = set()
2180
+ all_tweets = []
2181
+ for i in range(scroll_count):
2182
+ try:
2183
+ new_tweets = page.evaluate(EXTRACTOR_JS)
2184
+ except Exception as e:
2185
+ print(f"[notifications] extractor error on scroll {i}: {e}", file=sys.stderr)
2186
+ new_tweets = []
2187
+ added = 0
2188
+ for t in new_tweets:
2189
+ tid = t.get('tweet_id')
2190
+ if tid and tid not in seen:
2191
+ seen.add(tid)
2192
+ all_tweets.append(t)
2193
+ added += 1
2194
+ print(f"[notifications] scroll {i+1}/{scroll_count}: +{added} new, total {len(all_tweets)}", file=sys.stderr)
2195
+ page.evaluate("window.scrollBy(0, window.innerHeight * 2)")
2196
+ page.wait_for_timeout(1500)
2197
+ _refresh_browser_lock()
2198
+
2199
+ return {"notifications": all_tweets, "total": len(all_tweets), "tab": tab}
2200
+ finally:
2201
+ if not is_cdp:
2202
+ page.close()
2203
+ browser.close()
2204
+
2205
+
2206
+ # Single source of truth for the per-article extractor used by every thread
2207
+ # reader below (scrape_thread_followups, scrape_many_thread_followups,
2208
+ # scrape_thread_media, scrape_many_thread_media). Was previously duplicated
2209
+ # inline in two places, which drifted. It extracts the same text fields as
2210
+ # before PLUS a `media` array [{url, alt, type}] per tweet so the reply-writer
2211
+ # can "see" images / video / GIF / link-card content instead of replying
2212
+ # text-blind (2026-06-03 thread-media feature). `type` is image|video|gif|card;
2213
+ # `alt` is the DOM alt-text / aria-label / card title (empty string when the
2214
+ # DOM gives none, a flag a later vision pass can escalate on).
2215
+ THREAD_EXTRACTOR_JS = r"""() => {
2216
+ function extractMedia(article) {
2217
+ const media = [];
2218
+ const seen = new Set();
2219
+ const push = (url, alt, type) => {
2220
+ if (!url || seen.has(url)) return;
2221
+ seen.add(url);
2222
+ media.push({ url: url, alt: (alt || '').trim(), type: type });
2223
+ };
2224
+ // Photos and animated GIFs live in tweetPhoto containers. A <video> inside
2225
+ // one is an animated GIF; a bare <img> is a still photo.
2226
+ for (const ph of article.querySelectorAll('[data-testid="tweetPhoto"]')) {
2227
+ const img = ph.querySelector('img');
2228
+ const vid = ph.querySelector('video');
2229
+ if (vid) {
2230
+ const poster = vid.getAttribute('poster') || (img ? img.getAttribute('src') : '') || '';
2231
+ const alt = img ? (img.getAttribute('alt') || '') : '';
2232
+ // Twitter thumb URLs disambiguate the kind: tweet_video_thumb is an
2233
+ // animated GIF; amplify_video_thumb / ext_tw_video_thumb is a real
2234
+ // (uploaded) video. Default to video when the pattern is unknown.
2235
+ const isGif = /tweet_video_thumb/.test(poster);
2236
+ push(poster, alt, isGif ? 'gif' : 'video');
2237
+ } else if (img) {
2238
+ push(img.getAttribute('src') || '', img.getAttribute('alt') || '', 'image');
2239
+ }
2240
+ }
2241
+ // Inline videos. Use the poster frame as the URL and the aria-label
2242
+ // (often a human description) as alt-text.
2243
+ for (const vp of article.querySelectorAll('[data-testid="videoPlayer"], [data-testid="videoComponent"]')) {
2244
+ const vid = vp.querySelector('video');
2245
+ const poster = vid ? (vid.getAttribute('poster') || '') : '';
2246
+ push(poster, vp.getAttribute('aria-label') || '', 'video');
2247
+ }
2248
+ // Link-preview card. URL = card href; alt = card image alt or the first
2249
+ // few text spans (title / domain / description).
2250
+ const card = article.querySelector('[data-testid="card.wrapper"]');
2251
+ if (card) {
2252
+ let curl = '';
2253
+ const a = card.querySelector('a[href]');
2254
+ if (a) curl = a.getAttribute('href') || '';
2255
+ let alt = '';
2256
+ const cimg = card.querySelector('img');
2257
+ if (cimg && cimg.getAttribute('alt')) alt = cimg.getAttribute('alt');
2258
+ if (!alt) {
2259
+ const txts = [];
2260
+ for (const span of card.querySelectorAll('span')) {
2261
+ const t = (span.textContent || '').trim();
2262
+ if (t) txts.push(t);
2263
+ }
2264
+ alt = txts.slice(0, 3).join(' | ');
2265
+ }
2266
+ push(curl, alt, 'card');
2267
+ }
2268
+ return media;
2269
+ }
2270
+ // Repost detection mirrors extractMedia: read the "<X> reposted" banner from
2271
+ // the same already-loaded DOM. socialContext is ALSO used for "Pinned", so
2272
+ // match the text /reposted/i, not mere presence. reposted_by = the account
2273
+ // whose profile link wraps the banner.
2274
+ function extractRepost(article) {
2275
+ const sc = article.querySelector('[data-testid="socialContext"]');
2276
+ if (!sc || !/\breposted\b/i.test(sc.textContent || '')) {
2277
+ return { is_repost: false, reposted_by: '' };
2278
+ }
2279
+ let reposted_by = '';
2280
+ const a = sc.closest('a');
2281
+ const rh = a ? (a.getAttribute('href') || '') : '';
2282
+ if (rh.startsWith('/') && rh.split('/').length === 2) reposted_by = rh.replace('/', '');
2283
+ return { is_repost: true, reposted_by: reposted_by };
2284
+ }
2285
+ const out = [];
2286
+ for (const article of document.querySelectorAll('article[data-testid="tweet"]')) {
2287
+ try {
2288
+ let handle = '';
2289
+ let displayName = '';
2290
+ for (const link of article.querySelectorAll('a[role="link"]')) {
2291
+ const href = link.getAttribute('href');
2292
+ if (href && href.startsWith('/') && !href.includes('/status/') && !href.includes('/i/') && href.length > 1 && href.split('/').length === 2) {
2293
+ handle = href.replace('/', '');
2294
+ const nameEl = link.querySelector('span');
2295
+ if (nameEl) displayName = nameEl.textContent || '';
2296
+ break;
2297
+ }
2298
+ }
2299
+ const tweetText = article.querySelector('[data-testid="tweetText"]');
2300
+ const text = tweetText ? tweetText.textContent : '';
2301
+ const timeEl = article.querySelector('time');
2302
+ const timeParent = timeEl ? timeEl.closest('a') : null;
2303
+ const tweetHref = timeParent ? timeParent.getAttribute('href') : '';
2304
+ const tweetUrl = tweetHref ? ('https://x.com' + tweetHref) : '';
2305
+ const datetime = timeEl ? timeEl.getAttribute('datetime') : '';
2306
+ const idMatch = tweetHref ? tweetHref.match(/\/status\/(\d+)/) : null;
2307
+ const tweetId = idMatch ? idMatch[1] : '';
2308
+ // The status URL's first path segment is the AUTHORITATIVE author. The
2309
+ // bare-link scan above grabs the first /handle link, which on a repost is
2310
+ // the REPOSTER, not the author. Override from the URL so author + tweet_id
2311
+ // always agree (matches twitter_scan.py).
2312
+ const authorM = tweetHref ? tweetHref.match(/^\/([^\/]+)\/status\//) : null;
2313
+ if (authorM && authorM[1]) handle = authorM[1];
2314
+ const repost = extractRepost(article);
2315
+ // Detect reply-to target (article with "Replying to" block)
2316
+ let replyingTo = '';
2317
+ for (const span of article.querySelectorAll('a[href^="/"]')) {
2318
+ const href = span.getAttribute('href') || '';
2319
+ if (!href.includes('/status/') && span.textContent && span.textContent.trim().startsWith('@')) {
2320
+ replyingTo = span.textContent.trim().replace(/^@/, '');
2321
+ break;
2322
+ }
2323
+ }
2324
+ if (tweetId && handle) {
2325
+ out.push({
2326
+ tweet_id: tweetId,
2327
+ handle: handle,
2328
+ display_name: displayName.trim(),
2329
+ text: (text || ''),
2330
+ tweet_url: tweetUrl,
2331
+ datetime: datetime,
2332
+ replying_to: replyingTo,
2333
+ media: extractMedia(article),
2334
+ is_repost: repost.is_repost,
2335
+ reposted_by: repost.reposted_by
2336
+ });
2337
+ }
2338
+ } catch(e) {}
2339
+ }
2340
+ return out;
2341
+ }"""
2342
+
2343
+
2344
+ def scrape_thread_followups(thread_url, scroll_count=3):
2345
+ """Navigate to a tweet's permalink and extract reply articles below it.
2346
+
2347
+ Used to detect depth-2+ replies to our own replies that the notifications
2348
+ tab may not surface (X default behavior drops @-tags inside active threads).
2349
+
2350
+ Returns: {"thread_url": "...", "anchor_tweet_id": "...", "followups": [...]}
2351
+ where each followup has the same shape as a notifications record,
2352
+ plus a `media` array [{url, alt, type}] per article.
2353
+ """
2354
+ print(f"[twitter_browser] scrape_thread_followups({thread_url!r}, scroll={scroll_count})", file=sys.stderr)
2355
+ from playwright.sync_api import sync_playwright
2356
+
2357
+ anchor_match = re.search(r"/status/(\d+)", thread_url or "")
2358
+ anchor_tweet_id = anchor_match.group(1) if anchor_match else ""
2359
+
2360
+ EXTRACTOR_JS = THREAD_EXTRACTOR_JS
2361
+
2362
+ with sync_playwright() as p:
2363
+ browser, page, is_cdp = get_browser_and_page(p)
2364
+ try:
2365
+ page.goto(thread_url, wait_until="domcontentloaded")
2366
+ page.wait_for_timeout(3500)
2367
+
2368
+ seen = set()
2369
+ all_tweets = []
2370
+ for i in range(scroll_count):
2371
+ try:
2372
+ new_tweets = page.evaluate(EXTRACTOR_JS)
2373
+ except Exception as e:
2374
+ print(f"[thread_followups] extractor error on scroll {i}: {e}", file=sys.stderr)
2375
+ new_tweets = []
2376
+ for t in new_tweets:
2377
+ tid = t.get('tweet_id')
2378
+ if tid and tid not in seen:
2379
+ seen.add(tid)
2380
+ all_tweets.append(t)
2381
+ page.evaluate("window.scrollBy(0, window.innerHeight * 2)")
2382
+ page.wait_for_timeout(1200)
2383
+ _refresh_browser_lock()
2384
+
2385
+ followups = [t for t in all_tweets if t.get('tweet_id') != anchor_tweet_id]
2386
+ # First article on a permalink page is the conversation root (OP).
2387
+ # Already scraped above — capture for free for thread_author_handle.
2388
+ root_author = (all_tweets[0].get('handle') or '').lstrip('@') if all_tweets else ''
2389
+ root_media = (all_tweets[0].get('media') or []) if all_tweets else []
2390
+ return {
2391
+ "thread_url": thread_url,
2392
+ "anchor_tweet_id": anchor_tweet_id,
2393
+ "root_author": root_author,
2394
+ "root_media": root_media,
2395
+ "followups": followups,
2396
+ "total": len(followups),
2397
+ }
2398
+ finally:
2399
+ if not is_cdp:
2400
+ page.close()
2401
+ browser.close()
2402
+
2403
+
2404
+ def scrape_many_thread_followups(thread_urls, scroll_count=3, per_url_delay_ms=2500):
2405
+ """Iterate scrape_thread_followups over a list of URLs.
2406
+
2407
+ Keeps one browser session open (cheaper) and applies a polite delay between URLs.
2408
+ """
2409
+ from playwright.sync_api import sync_playwright
2410
+
2411
+ results = []
2412
+ with sync_playwright() as p:
2413
+ browser, page, is_cdp = get_browser_and_page(p)
2414
+ try:
2415
+ for url in thread_urls:
2416
+ try:
2417
+ page.goto(url, wait_until="domcontentloaded")
2418
+ page.wait_for_timeout(3500)
2419
+ anchor_match = re.search(r"/status/(\d+)", url or "")
2420
+ anchor_tweet_id = anchor_match.group(1) if anchor_match else ""
2421
+
2422
+ EXTRACTOR_JS = THREAD_EXTRACTOR_JS
2423
+
2424
+ seen = set()
2425
+ collected = []
2426
+ for i in range(scroll_count):
2427
+ try:
2428
+ new_tweets = page.evaluate(EXTRACTOR_JS)
2429
+ except Exception:
2430
+ new_tweets = []
2431
+ for t in new_tweets:
2432
+ tid = t.get('tweet_id')
2433
+ if tid and tid not in seen:
2434
+ seen.add(tid)
2435
+ collected.append(t)
2436
+ page.evaluate("window.scrollBy(0, window.innerHeight * 2)")
2437
+ page.wait_for_timeout(1200)
2438
+ _refresh_browser_lock()
2439
+
2440
+ followups = [t for t in collected if t.get('tweet_id') != anchor_tweet_id]
2441
+ # First article on a permalink page is the conversation root (OP).
2442
+ # Already scraped above — capture for free for thread_author_handle.
2443
+ root_author = (collected[0].get('handle') or '').lstrip('@') if collected else ''
2444
+ root_media = (collected[0].get('media') or []) if collected else []
2445
+ print(f"[thread_followups] {url}: {len(followups)} candidate follow-ups", file=sys.stderr)
2446
+ results.append({
2447
+ "thread_url": url,
2448
+ "anchor_tweet_id": anchor_tweet_id,
2449
+ "root_author": root_author,
2450
+ "root_media": root_media,
2451
+ "followups": followups,
2452
+ })
2453
+ except Exception as e:
2454
+ print(f"[thread_followups] error on {url}: {e}", file=sys.stderr)
2455
+ results.append({"thread_url": url, "error": str(e), "followups": []})
2456
+ page.wait_for_timeout(per_url_delay_ms)
2457
+ return {"results": results, "urls_visited": len(thread_urls)}
2458
+ finally:
2459
+ if not is_cdp:
2460
+ page.close()
2461
+ browser.close()
2462
+
2463
+
2464
+ def _anchor_media_from_tweets(tweets, anchor_tweet_id):
2465
+ """Pick the media of the anchor tweet from a list of scraped articles.
2466
+
2467
+ The anchor is the tweet we plan to reply to (the candidate URL's /status/ID).
2468
+ Match by tweet_id; if the anchor article is not found (X sometimes renders
2469
+ the focused tweet without a resolvable status href in the first paint), fall
2470
+ back to the first article on the page, which on a permalink is the focused
2471
+ tweet. Returns a list [{url, alt, type}] (possibly empty).
2472
+ """
2473
+ if not tweets:
2474
+ return []
2475
+ if anchor_tweet_id:
2476
+ for t in tweets:
2477
+ if t.get("tweet_id") == anchor_tweet_id:
2478
+ return t.get("media") or []
2479
+ return tweets[0].get("media") or []
2480
+
2481
+
2482
+ def _anchor_repost_from_tweets(tweets, anchor_tweet_id):
2483
+ """Pick the repost provenance of the anchor tweet from scraped articles.
2484
+
2485
+ Mirrors _anchor_media_from_tweets: match the anchor by tweet_id, else fall
2486
+ back to the first article (the focused tweet on a permalink). Returns
2487
+ {"is_repost": bool, "reposted_by": str}; defaults to a non-repost.
2488
+ """
2489
+ if not tweets:
2490
+ return {"is_repost": False, "reposted_by": ""}
2491
+ chosen = None
2492
+ if anchor_tweet_id:
2493
+ for t in tweets:
2494
+ if t.get("tweet_id") == anchor_tweet_id:
2495
+ chosen = t
2496
+ break
2497
+ if chosen is None:
2498
+ chosen = tweets[0]
2499
+ return {
2500
+ "is_repost": bool(chosen.get("is_repost", False)),
2501
+ "reposted_by": chosen.get("reposted_by", "") or "",
2502
+ }
2503
+
2504
+
2505
+ def scrape_thread_media(thread_url, scroll_count=1):
2506
+ """Navigate to a tweet's permalink and return the media of the anchor tweet.
2507
+
2508
+ Deterministic, model-free media capture for the MAIN posting cycle: the
2509
+ reply-writer needs to "see" the image / video / GIF / link-card on the tweet
2510
+ it is about to reply to. Returns:
2511
+ {"thread_url": ..., "anchor_tweet_id": ..., "media": [{url,alt,type}, ...]}
2512
+ media is [] when the tweet has none. Cheap: one navigation, minimal scroll
2513
+ (the anchor is at the top of a permalink page).
2514
+ """
2515
+ print(f"[twitter_browser] scrape_thread_media({thread_url!r})", file=sys.stderr)
2516
+ from playwright.sync_api import sync_playwright
2517
+
2518
+ anchor_match = re.search(r"/status/(\d+)", thread_url or "")
2519
+ anchor_tweet_id = anchor_match.group(1) if anchor_match else ""
2520
+
2521
+ with sync_playwright() as p:
2522
+ browser, page, is_cdp = get_browser_and_page(p)
2523
+ try:
2524
+ page.goto(thread_url, wait_until="domcontentloaded")
2525
+ page.wait_for_timeout(3500)
2526
+ tweets = []
2527
+ try:
2528
+ tweets = page.evaluate(THREAD_EXTRACTOR_JS)
2529
+ except Exception as e:
2530
+ print(f"[thread_media] extractor error: {e}", file=sys.stderr)
2531
+ # One short scroll can help lazy-loaded media of the focused tweet
2532
+ # render; re-extract and prefer the richer result.
2533
+ for _ in range(max(0, scroll_count - 1)):
2534
+ page.evaluate("window.scrollBy(0, window.innerHeight)")
2535
+ page.wait_for_timeout(900)
2536
+ try:
2537
+ more = page.evaluate(THREAD_EXTRACTOR_JS)
2538
+ if more and len(more) >= len(tweets):
2539
+ tweets = more
2540
+ except Exception:
2541
+ pass
2542
+ _refresh_browser_lock()
2543
+ media = _anchor_media_from_tweets(tweets, anchor_tweet_id)
2544
+ repost = _anchor_repost_from_tweets(tweets, anchor_tweet_id)
2545
+ print(f"[thread_media] {thread_url}: {len(media)} media item(s)"
2546
+ f"{' [repost]' if repost['is_repost'] else ''}", file=sys.stderr)
2547
+ return {
2548
+ "thread_url": thread_url,
2549
+ "anchor_tweet_id": anchor_tweet_id,
2550
+ "media": media,
2551
+ "is_repost": repost["is_repost"],
2552
+ "reposted_by": repost["reposted_by"],
2553
+ }
2554
+ finally:
2555
+ if not is_cdp:
2556
+ page.close()
2557
+ browser.close()
2558
+
2559
+
2560
+ def scrape_many_thread_media(thread_urls, scroll_count=1, per_url_delay_ms=1500):
2561
+ """Batch scrape_thread_media over a list of candidate URLs in ONE session.
2562
+
2563
+ Used by the main cycle (run-twitter-cycle.sh Phase 2b-prep) to pre-fetch the
2564
+ media of every candidate the model is about to draft against, in a single
2565
+ cheap browser pass, then persist each via scripts/log_thread_media.py.
2566
+
2567
+ Returns: {"results": [{thread_url, anchor_tweet_id, media: [...]}], "urls_visited": N}
2568
+ """
2569
+ from playwright.sync_api import sync_playwright
2570
+
2571
+ results = []
2572
+ with sync_playwright() as p:
2573
+ browser, page, is_cdp = get_browser_and_page(p)
2574
+ try:
2575
+ for url in thread_urls:
2576
+ anchor_match = re.search(r"/status/(\d+)", url or "")
2577
+ anchor_tweet_id = anchor_match.group(1) if anchor_match else ""
2578
+ try:
2579
+ page.goto(url, wait_until="domcontentloaded")
2580
+ page.wait_for_timeout(3000)
2581
+ tweets = []
2582
+ try:
2583
+ tweets = page.evaluate(THREAD_EXTRACTOR_JS)
2584
+ except Exception:
2585
+ tweets = []
2586
+ for _ in range(max(0, scroll_count - 1)):
2587
+ page.evaluate("window.scrollBy(0, window.innerHeight)")
2588
+ page.wait_for_timeout(800)
2589
+ try:
2590
+ more = page.evaluate(THREAD_EXTRACTOR_JS)
2591
+ if more and len(more) >= len(tweets):
2592
+ tweets = more
2593
+ except Exception:
2594
+ pass
2595
+ _refresh_browser_lock()
2596
+ media = _anchor_media_from_tweets(tweets, anchor_tweet_id)
2597
+ repost = _anchor_repost_from_tweets(tweets, anchor_tweet_id)
2598
+ print(f"[thread_media] {url}: {len(media)} media item(s)"
2599
+ f"{' [repost]' if repost['is_repost'] else ''}", file=sys.stderr)
2600
+ results.append({
2601
+ "thread_url": url,
2602
+ "anchor_tweet_id": anchor_tweet_id,
2603
+ "media": media,
2604
+ "is_repost": repost["is_repost"],
2605
+ "reposted_by": repost["reposted_by"],
2606
+ })
2607
+ except Exception as e:
2608
+ print(f"[thread_media] error on {url}: {e}", file=sys.stderr)
2609
+ results.append({"thread_url": url, "anchor_tweet_id": anchor_tweet_id, "error": str(e), "media": [], "is_repost": False, "reposted_by": ""})
2610
+ page.wait_for_timeout(per_url_delay_ms)
2611
+ return {"results": results, "urls_visited": len(thread_urls)}
2612
+ finally:
2613
+ if not is_cdp:
2614
+ page.close()
2615
+ browser.close()
2616
+
2617
+
2618
+ def main():
2619
+ if len(sys.argv) < 2:
2620
+ print(__doc__)
2621
+ sys.exit(1)
2622
+
2623
+ cmd = sys.argv[1]
2624
+
2625
+ if cmd == "reply":
2626
+ if len(sys.argv) < 4:
2627
+ print(
2628
+ "Usage: twitter_browser.py reply <tweet_url> <reply_text>",
2629
+ file=sys.stderr,
2630
+ )
2631
+ sys.exit(1)
2632
+ # S4L_SKIP_CAMPAIGN_SUFFIX=1 opts this reply out of active-campaign
2633
+ # suffixes (e.g. " written with ai"). Set ONLY by the MCP draft_cycle
2634
+ # post path (mcp/src/index.ts::postApproved) so manual/reviewed posts
2635
+ # land clean; the cron pipeline never sets it, so the A/B experiment
2636
+ # keeps running there and on Reddit. Reuses the existing apply_campaigns
2637
+ # plumbing (same flag the self-reply path uses below).
2638
+ _skip_camp = os.environ.get("S4L_SKIP_CAMPAIGN_SUFFIX", "").strip().lower() in ("1", "true", "yes")
2639
+ result = reply_to_tweet(sys.argv[2], sys.argv[3], apply_campaigns=not _skip_camp)
2640
+ print(json.dumps(result, indent=2))
2641
+
2642
+ elif cmd == "like":
2643
+ if len(sys.argv) < 3:
2644
+ print(
2645
+ "Usage: twitter_browser.py like <tweet_url>",
2646
+ file=sys.stderr,
2647
+ )
2648
+ sys.exit(1)
2649
+ result = like_tweet(sys.argv[2])
2650
+ print(json.dumps(result, indent=2))
2651
+
2652
+ elif cmd == "self-reply":
2653
+ # Self-reply with guaranteed project URL. The URL is passed as a
2654
+ # separate arg and appended at the tool level so the LLM cannot
2655
+ # strip it from the text (which happened repeatedly when relying
2656
+ # on prompt instructions alone).
2657
+ if len(sys.argv) < 5:
2658
+ print(
2659
+ "Usage: twitter_browser.py self-reply <our_reply_url> <text> <project_url>",
2660
+ file=sys.stderr,
2661
+ )
2662
+ sys.exit(1)
2663
+ our_url, text, project_url = sys.argv[2], sys.argv[3], sys.argv[4]
2664
+ if not project_url.startswith("http"):
2665
+ print(
2666
+ f"self-reply: project_url must start with http(s), got: {project_url!r}",
2667
+ file=sys.stderr,
2668
+ )
2669
+ sys.exit(1)
2670
+ stripped = text.rstrip()
2671
+ if project_url in stripped:
2672
+ final = stripped
2673
+ else:
2674
+ final = f"{stripped} {project_url}"
2675
+ # Self-reply opts out of the campaign suffix: this turn is the
2676
+ # project-URL follow-up, not the primary post that gets tagged.
2677
+ result = reply_to_tweet(our_url, final, apply_campaigns=False)
2678
+ result["final_text"] = final
2679
+ print(json.dumps(result, indent=2))
2680
+
2681
+ elif cmd == "unread-dms":
2682
+ result = unread_dms()
2683
+ print(json.dumps(result, indent=2))
2684
+
2685
+ elif cmd == "read-conversation":
2686
+ if len(sys.argv) < 3:
2687
+ print(
2688
+ "Usage: twitter_browser.py read-conversation <thread_url>",
2689
+ file=sys.stderr,
2690
+ )
2691
+ sys.exit(1)
2692
+ result = read_conversation(sys.argv[2])
2693
+ print(json.dumps(result, indent=2))
2694
+
2695
+ elif cmd == "send-dm":
2696
+ if len(sys.argv) < 4:
2697
+ print(
2698
+ "Usage: twitter_browser.py send-dm <thread_url> <message> [dm_id]",
2699
+ file=sys.stderr,
2700
+ )
2701
+ sys.exit(1)
2702
+ dm_id_arg = None
2703
+ if len(sys.argv) >= 5 and sys.argv[4].strip():
2704
+ try:
2705
+ dm_id_arg = int(sys.argv[4])
2706
+ except ValueError:
2707
+ print(f"send-dm: dm_id must be int, got {sys.argv[4]!r}", file=sys.stderr)
2708
+ sys.exit(1)
2709
+ result = send_dm(sys.argv[2], sys.argv[3], dm_id=dm_id_arg)
2710
+ print(json.dumps(result, indent=2))
2711
+
2712
+ elif cmd == "notifications":
2713
+ scroll_count = 8
2714
+ tab = "all"
2715
+ if len(sys.argv) >= 3:
2716
+ try:
2717
+ scroll_count = int(sys.argv[2])
2718
+ except ValueError:
2719
+ print(f"notifications: scroll_count must be int, got {sys.argv[2]!r}", file=sys.stderr)
2720
+ sys.exit(1)
2721
+ if len(sys.argv) >= 4:
2722
+ tab = sys.argv[3]
2723
+ result = discover_notifications(scroll_count=scroll_count, tab=tab)
2724
+ print(json.dumps(result, indent=2))
2725
+
2726
+ elif cmd == "thread-followups":
2727
+ if len(sys.argv) < 3:
2728
+ print(
2729
+ "Usage: twitter_browser.py thread-followups <urls_file.txt>\n"
2730
+ " urls_file.txt: one tweet permalink per line (our reply URLs)",
2731
+ file=sys.stderr,
2732
+ )
2733
+ sys.exit(1)
2734
+ urls_path = sys.argv[2]
2735
+ scroll_count = 3
2736
+ if len(sys.argv) >= 4:
2737
+ try:
2738
+ scroll_count = int(sys.argv[3])
2739
+ except ValueError:
2740
+ print(f"thread-followups: scroll_count must be int, got {sys.argv[3]!r}", file=sys.stderr)
2741
+ sys.exit(1)
2742
+ with open(urls_path) as f:
2743
+ urls = [line.strip() for line in f if line.strip()]
2744
+ if not urls:
2745
+ print(json.dumps({"results": [], "urls_visited": 0}, indent=2))
2746
+ sys.exit(0)
2747
+ result = scrape_many_thread_followups(urls, scroll_count=scroll_count)
2748
+ print(json.dumps(result, indent=2))
2749
+
2750
+ elif cmd == "thread-media":
2751
+ # Single-URL anchor media fetch (deterministic, model-free).
2752
+ # Usage: twitter_browser.py thread-media <tweet_url> [scroll_count]
2753
+ if len(sys.argv) < 3:
2754
+ print(
2755
+ "Usage: twitter_browser.py thread-media <tweet_url> [scroll_count]\n"
2756
+ " Returns {thread_url, anchor_tweet_id, media:[{url,alt,type}]}",
2757
+ file=sys.stderr,
2758
+ )
2759
+ sys.exit(1)
2760
+ scroll_count = 1
2761
+ if len(sys.argv) >= 4:
2762
+ try:
2763
+ scroll_count = int(sys.argv[3])
2764
+ except ValueError:
2765
+ print(f"thread-media: scroll_count must be int, got {sys.argv[3]!r}", file=sys.stderr)
2766
+ sys.exit(1)
2767
+ result = scrape_thread_media(sys.argv[2], scroll_count=scroll_count)
2768
+ print(json.dumps(result, indent=2))
2769
+
2770
+ elif cmd == "thread-media-batch":
2771
+ # Batch anchor media fetch over a file of candidate URLs in ONE session.
2772
+ # Usage: twitter_browser.py thread-media-batch <urls_file.txt> [scroll_count]
2773
+ if len(sys.argv) < 3:
2774
+ print(
2775
+ "Usage: twitter_browser.py thread-media-batch <urls_file.txt> [scroll_count]\n"
2776
+ " urls_file.txt: one candidate tweet permalink per line\n"
2777
+ " Returns {results:[{thread_url, anchor_tweet_id, media:[...]}], urls_visited}",
2778
+ file=sys.stderr,
2779
+ )
2780
+ sys.exit(1)
2781
+ urls_path = sys.argv[2]
2782
+ scroll_count = 1
2783
+ if len(sys.argv) >= 4:
2784
+ try:
2785
+ scroll_count = int(sys.argv[3])
2786
+ except ValueError:
2787
+ print(f"thread-media-batch: scroll_count must be int, got {sys.argv[3]!r}", file=sys.stderr)
2788
+ sys.exit(1)
2789
+ with open(urls_path) as f:
2790
+ urls = [line.strip() for line in f if line.strip()]
2791
+ if not urls:
2792
+ print(json.dumps({"results": [], "urls_visited": 0}, indent=2))
2793
+ sys.exit(0)
2794
+ result = scrape_many_thread_media(urls, scroll_count=scroll_count)
2795
+ print(json.dumps(result, indent=2))
2796
+
2797
+ else:
2798
+ print(f"Unknown command: {cmd}", file=sys.stderr)
2799
+ print(__doc__)
2800
+ sys.exit(1)
2801
+
2802
+
2803
+ if __name__ == "__main__":
2804
+ main()