@m13v/s4l 1.6.197-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1336 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +513 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,2702 @@
1
+ #!/usr/bin/env python3
2
+ """Fetch engagement stats for Reddit + Moltbook posts via public APIs.
3
+
4
+ Updates upvotes, comments_count, and status in the DB. No browser needed.
5
+ Reddit profile scrape (Step 1 of stats.sh) covers most stats; this script
6
+ acts as deletion/removal detection and as a fallback for rows the scrape
7
+ couldn't match.
8
+
9
+ Usage:
10
+ python3 scripts/stats.py [--db PATH] [--quiet]
11
+ """
12
+
13
+ import argparse
14
+ import json
15
+ import os
16
+ import re
17
+ import sys
18
+ import time
19
+ import urllib.error
20
+ import urllib.request
21
+ from datetime import datetime, timedelta, timezone
22
+
23
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
24
+ from http_api import api_get, api_post, api_patch, load_env
25
+
26
+
27
+ # --- HTTP wrappers for the Reddit branch (2026-05-12 migration) --------------
28
+ # The Reddit pipeline must have zero direct-SQL paths. These helpers wrap the
29
+ # small set of /api/v1/posts and /api/v1/replies operations the Reddit branch
30
+ # needs, so the original logic in refresh_reddit / refresh_reddit_replies /
31
+ # refresh_reddit_resurrect can stay readable while still routing every
32
+ # read/write through HTTP. Other platforms (twitter, github, moltbook) still
33
+ # use direct SQL until they migrate; the helpers below are intentionally
34
+ # named *_http to make the boundary obvious.
35
+
36
+ def _http_list_reddit_active_posts():
37
+ """Walk /api/v1/posts in pages and return rows for the Reddit refresh job.
38
+
39
+ The /api/v1/posts GET caps a single page at 500. Sort by id ASC so we can
40
+ page deterministically; we re-issue with an increasing id cursor until the
41
+ server returns a short page. We need scan_no_change_count, posted_at,
42
+ engagement_updated_at, deletion_detect_count, upvotes, comments_count.
43
+ """
44
+ out = []
45
+ seen_ids = set()
46
+ cursor_since = None # unused for id-asc paging
47
+ last_seen_id = 0
48
+ while True:
49
+ query = {
50
+ "platform": "reddit",
51
+ "status": "active",
52
+ "has_our_url": "true",
53
+ "order_by": "id",
54
+ "order_dir": "asc",
55
+ "limit": 500,
56
+ }
57
+ resp = api_get("/api/v1/posts", query=query)
58
+ rows = ((resp or {}).get("data") or {}).get("posts") or []
59
+ new_rows = [r for r in rows if r.get("id") and r["id"] not in seen_ids]
60
+ if not new_rows:
61
+ break
62
+ for r in new_rows:
63
+ seen_ids.add(r["id"])
64
+ out.append(r)
65
+ if r["id"] > last_seen_id:
66
+ last_seen_id = r["id"]
67
+ # Without a server-side cursor, we get the same first 500 every call.
68
+ # Break to avoid an infinite loop; the typical Reddit active-post count
69
+ # is well under 500 so one page covers it.
70
+ break
71
+ return out
72
+
73
+
74
+ def _http_list_reddit_dead_posts(days):
75
+ """Posts marked deleted/removed in the last N days (resurrect job)."""
76
+ since_iso = (datetime.now(timezone.utc) - timedelta(days=int(days))).isoformat()
77
+ resp = api_get(
78
+ "/api/v1/posts",
79
+ query={
80
+ "platform": "reddit",
81
+ "statuses": "deleted,removed",
82
+ "has_our_url": "true",
83
+ "since": since_iso,
84
+ "order_by": "id",
85
+ "order_dir": "asc",
86
+ "limit": 500,
87
+ },
88
+ )
89
+ return ((resp or {}).get("data") or {}).get("posts") or []
90
+
91
+
92
+ def _http_patch_post(post_id, body):
93
+ return api_patch(f"/api/v1/posts/{int(post_id)}", body)
94
+
95
+
96
+ def _http_detect_deletion(post_id, kind, threshold=2):
97
+ """Bump deletion_detect_count and flip status if threshold met."""
98
+ resp = api_post(
99
+ f"/api/v1/posts/{int(post_id)}/detect-deletion",
100
+ {"kind": kind, "threshold": int(threshold)},
101
+ )
102
+ data = (resp or {}).get("data") or {}
103
+ return int(data.get("detect_count") or 0), bool(data.get("status_set"))
104
+
105
+
106
+ def _http_list_reddit_replies_to_refresh():
107
+ """Replies for our Reddit comments (status='replied', our_reply_id NOT NULL)."""
108
+ out = []
109
+ seen_ids = set()
110
+ resp = api_get(
111
+ "/api/v1/replies",
112
+ query={
113
+ "platform": "reddit",
114
+ "status": "replied",
115
+ "has_our_reply_id": "true",
116
+ "order_by": "id",
117
+ "limit": 500,
118
+ },
119
+ )
120
+ rows = ((resp or {}).get("data") or {}).get("replies") or []
121
+ for r in rows:
122
+ rid = r.get("id")
123
+ if rid and rid not in seen_ids:
124
+ seen_ids.add(rid)
125
+ out.append(r)
126
+ return out
127
+
128
+
129
+ def _http_patch_reply(reply_id, body):
130
+ return api_patch(f"/api/v1/replies/{int(reply_id)}", body)
131
+
132
+
133
+ # --- HTTP wrappers for the Twitter branch (2026-05-19 migration) -------------
134
+ # Mirror the Reddit pattern: every read + write in refresh_twitter() and
135
+ # refresh_twitter_replies() goes through HTTP so the VM (no DATABASE_URL) can
136
+ # run the stats job too. Scoping by `our_account` happens server-side in the
137
+ # /api/v1/posts/active-for-stats endpoint; the local mac passes 'm13v_', the
138
+ # VM passes 'matt_diak'. Strict scoping means neither machine touches the
139
+ # other's posts even when both cron-fire concurrently.
140
+
141
+ def _http_list_twitter_active_posts(our_account, audit_mode=False, stale_hours=5):
142
+ """Posts to refresh for the Twitter stats job, scoped by handle."""
143
+ resp = api_get(
144
+ "/api/v1/posts/active-for-stats",
145
+ query={
146
+ "platform": "twitter",
147
+ "our_account": our_account,
148
+ "audit": "true" if audit_mode else "false",
149
+ "engagement_stale_after_hours": int(stale_hours),
150
+ },
151
+ )
152
+ return ((resp or {}).get("data") or {}).get("posts") or []
153
+
154
+
155
+ def _http_list_twitter_replies_to_refresh():
156
+ """Reply rows to refresh for the Twitter stats job, scoped by install_id
157
+ via the auth header (route reads resolveAuth().install_id and filters)."""
158
+ resp = api_get(
159
+ "/api/v1/replies/active-for-stats",
160
+ query={"platform": "x"},
161
+ )
162
+ return ((resp or {}).get("data") or {}).get("replies") or []
163
+
164
+
165
+ def _http_list_twitter_top_replies_to_refresh(stale_hours=5):
166
+ """thread_top_replies rows the Twitter stats job should refresh.
167
+
168
+ Scoped to the calling install via X-Installation header (route reads
169
+ resolveAuth().install_id; primary historical install also claims the
170
+ NULL-install_id rows). Same freshness gate (5h default) as posts so
171
+ the snapshot and benchmark curves stay aligned per cycle.
172
+ """
173
+ resp = api_get(
174
+ "/api/v1/thread-top-replies/active-for-stats",
175
+ query={"platform": "twitter",
176
+ "engagement_stale_after_hours": int(stale_hours)},
177
+ )
178
+ return ((resp or {}).get("data") or {}).get("thread_top_replies") or []
179
+
180
+
181
+ def _http_patch_top_reply(ttr_id, body):
182
+ return api_patch(f"/api/v1/thread-top-replies/{int(ttr_id)}", body)
183
+
184
+
185
+ def _http_detect_deletion_top_reply(ttr_id, kind, threshold=2):
186
+ resp = api_post(
187
+ f"/api/v1/thread-top-replies/{int(ttr_id)}/detect-deletion",
188
+ {"kind": kind, "threshold": int(threshold)},
189
+ )
190
+ data = (resp or {}).get("data") or {}
191
+ return int(data.get("detect_count") or 0), bool(data.get("status_set"))
192
+
193
+
194
+ def _http_snapshot_post_views(post_id, views):
195
+ """HTTP equivalent of dbmod.snapshot_post_views — UPSERT one row of
196
+ post_views_daily for CURRENT_DATE. Errors swallowed so a transient
197
+ network blip doesn't abort the stats run (the parent row's views/upvotes
198
+ are already updated; the daily rollup is best-effort)."""
199
+ try:
200
+ api_post(
201
+ "/api/v1/post-views-daily/snapshot",
202
+ {"post_id": int(post_id), "views": int(views)},
203
+ )
204
+ except Exception:
205
+ pass
206
+
207
+
208
+ # --- HTTP wrappers for the parent-thread snapshot lane (2026-05-26) ----------
209
+ # refresh_twitter_threads() polls the *parent* tweet of every active comment
210
+ # we made and appends one row to thread_snapshots per poll. Two helpers:
211
+ # - list: returns deduped parent threads to poll right now, scoped by
212
+ # our_account and gated by staleness (skip threads polled within the
213
+ # window).
214
+ # - insert: appends one snapshot row, attributable to the caller's
215
+ # install_id via the auth header.
216
+
217
+ def _http_list_twitter_parent_threads(our_account, stale_hours=5,
218
+ max_age_days=30):
219
+ """Parent threads the twitter stats job should refresh.
220
+
221
+ Returns a list of dicts with: post_id, thread_url, thread_author_handle,
222
+ posted_at, last_captured_at (NULL if never polled), plus the previous
223
+ snapshot's counters so the writer can short-circuit "nothing changed".
224
+ """
225
+ resp = api_get(
226
+ "/api/v1/thread-snapshots/active-for-stats",
227
+ query={
228
+ "platform": "twitter",
229
+ "our_account": our_account,
230
+ "stale_hours": int(stale_hours),
231
+ "max_age_days": int(max_age_days),
232
+ },
233
+ )
234
+ return ((resp or {}).get("data") or {}).get("threads") or []
235
+
236
+
237
+ def _http_insert_thread_snapshot(platform, thread_url, *,
238
+ thread_external_id=None,
239
+ thread_author_handle=None,
240
+ views=None, likes=None, replies=None,
241
+ retweets=None, bookmarks=None, quotes=None,
242
+ is_deleted=False, error=None):
243
+ """Append one snapshot row. Returns the inserted row id or None on error.
244
+
245
+ Errors are swallowed so a single bad row doesn't abort the whole refresh
246
+ pass; the caller logs and continues."""
247
+ body = {
248
+ "platform": platform,
249
+ "thread_url": thread_url,
250
+ "thread_external_id": thread_external_id,
251
+ "thread_author_handle": thread_author_handle,
252
+ "views": views,
253
+ "likes": likes,
254
+ "replies": replies,
255
+ "retweets": retweets,
256
+ "bookmarks": bookmarks,
257
+ "quotes": quotes,
258
+ "is_deleted": bool(is_deleted),
259
+ "error": error,
260
+ }
261
+ try:
262
+ resp = api_post("/api/v1/thread-snapshots", body)
263
+ data = (resp or {}).get("data") or {}
264
+ return data.get("id")
265
+ except Exception:
266
+ return None
267
+
268
+
269
+ def _http_list_moltbook_active_posts():
270
+ """Active moltbook posts to refresh. The generic /api/v1/posts list can't
271
+ order by engagement_updated_at, so we take id-desc; moltbook active volume
272
+ is small so one 500-row page covers it."""
273
+ resp = api_get(
274
+ "/api/v1/posts",
275
+ query={
276
+ "platform": "moltbook",
277
+ "status": "active",
278
+ "has_our_url": "true",
279
+ "order_by": "id",
280
+ "order_dir": "desc",
281
+ "limit": 500,
282
+ },
283
+ )
284
+ return ((resp or {}).get("data") or {}).get("posts") or []
285
+
286
+
287
+ def _http_list_github_active_posts(limit=None):
288
+ """All active github comments with our_url, plus a folded-in reply_count
289
+ (so the caller skips a per-post COUNT round trip). Server-side query has no
290
+ posted_at window / account scoping, matching refresh_github's plain SELECT.
291
+ limit is applied client-side (smoke tests only)."""
292
+ resp = api_get("/api/v1/posts/active-for-stats", query={"platform": "github"})
293
+ rows = ((resp or {}).get("data") or {}).get("posts") or []
294
+ if limit:
295
+ rows = rows[: int(limit)]
296
+ return rows
297
+
298
+
299
+ def _http_list_github_replies_to_refresh():
300
+ """Replies for our github comments (status='replied', our_reply_url NOT
301
+ NULL). Reuses the install-scoped replies/active-for-stats endpoint, which
302
+ returns id, our_reply_url, engagement_updated_at with no 500-row cap."""
303
+ resp = api_get("/api/v1/replies/active-for-stats", query={"platform": "github"})
304
+ return ((resp or {}).get("data") or {}).get("replies") or []
305
+
306
+
307
+ def _http_mark_minimized(post_id, reason):
308
+ """Flip a hidden (isMinimized) github comment to status='deleted' with the
309
+ GREATEST/source_summary-append semantics strike_alert expects."""
310
+ return api_post(
311
+ f"/api/v1/posts/{int(post_id)}/mark-minimized",
312
+ {"reason": str(reason or "")},
313
+ )
314
+
315
+
316
+ def _parse_dt(v):
317
+ """Tolerate both datetime objects (legacy) and ISO strings (HTTP)."""
318
+ if not v:
319
+ return None
320
+ if hasattr(v, "isoformat"):
321
+ return v
322
+ try:
323
+ return datetime.fromisoformat(str(v).replace("Z", "+00:00"))
324
+ except ValueError:
325
+ return None
326
+
327
+
328
+ import progress
329
+ from moltbook_tools import (
330
+ fetch_moltbook_json,
331
+ HttpNotFoundError as MoltbookNotFoundError,
332
+ MoltbookRateLimitedError,
333
+ )
334
+
335
+ CONFIG_PATH = os.path.expanduser("~/social-autoposter/config.json")
336
+
337
+
338
+ def load_config():
339
+ if os.path.exists(CONFIG_PATH):
340
+ with open(CONFIG_PATH) as f:
341
+ return json.load(f)
342
+ return {}
343
+
344
+
345
+ class HttpNotFoundError(Exception):
346
+ """Raised when a fetch returns HTTP 404.
347
+
348
+ Carries the parsed JSON body (when present) on .body. fxtwitter serves
349
+ its *tombstone* objects (type="tombstone", reason="unavailable" -- guest-API
350
+ blind spot, the tweet is ALIVE to a logged-in viewer) WITH an HTTP 404
351
+ status, so discarding the body here is what produced false deletion strikes.
352
+ Preserve the body so refresh_twitter's tombstone guard can see it.
353
+ """
354
+
355
+ def __init__(self, url, body=None):
356
+ super().__init__(url)
357
+ self.body = body
358
+
359
+
360
+ def fetch_json(url, headers=None, user_agent="social-autoposter/1.0"):
361
+ hdrs = {"User-Agent": user_agent}
362
+ if headers:
363
+ hdrs.update(headers)
364
+ req = urllib.request.Request(url, headers=hdrs)
365
+ try:
366
+ with urllib.request.urlopen(req, timeout=15) as resp:
367
+ return json.loads(resp.read())
368
+ except urllib.error.HTTPError as e:
369
+ if e.code == 404:
370
+ # NOTE: never throw away the body on the status code that carries
371
+ # the payload. fxtwitter returns its meaningful tombstone object
372
+ # WITH a 404; reading e.read() here is what lets the tombstone
373
+ # guard distinguish "alive but guest-blind" from a real deletion.
374
+ # Verified live 2026-06-05: a full stats-twitter run logged 2
375
+ # TOMBSTONE skips, 0 false DELETED.
376
+ body = None
377
+ try:
378
+ body = json.loads(e.read())
379
+ except Exception:
380
+ body = None
381
+ raise HttpNotFoundError(url, body=body)
382
+ return None
383
+ except Exception as e:
384
+ return None
385
+
386
+
387
+ _reddit_rate_state = {"remaining": None, "reset_in": None}
388
+
389
+
390
+ def _parse_float_header(v):
391
+ if v is None:
392
+ return None
393
+ try:
394
+ return float(v)
395
+ except (TypeError, ValueError):
396
+ return None
397
+
398
+
399
+ def _update_reddit_rate_state(headers):
400
+ """Read x-ratelimit-* headers into module state for pacing decisions."""
401
+ if not headers:
402
+ return
403
+ rem = _parse_float_header(headers.get("x-ratelimit-remaining"))
404
+ reset = _parse_float_header(headers.get("x-ratelimit-reset"))
405
+ if rem is not None:
406
+ _reddit_rate_state["remaining"] = rem
407
+ if reset is not None:
408
+ _reddit_rate_state["reset_in"] = reset
409
+
410
+
411
+ def _reddit_pacing_sleep():
412
+ """Sleep between Reddit calls based on remaining rate budget.
413
+
414
+ Reddit's public endpoint allows ~100 calls per 10-minute sliding window.
415
+ If we've read rate headers, spread remaining calls across the reset window.
416
+ Otherwise fall back to a flat 2s pacer.
417
+ """
418
+ rem = _reddit_rate_state.get("remaining")
419
+ reset_in = _reddit_rate_state.get("reset_in")
420
+ if rem is None or reset_in is None:
421
+ time.sleep(2)
422
+ return
423
+ if rem <= 0:
424
+ time.sleep(min(max(1, reset_in), 120))
425
+ return
426
+ per_call = reset_in / rem
427
+ time.sleep(max(1, min(per_call, 30)))
428
+
429
+
430
+ def fetch_reddit_json(url, user_agent, max_retries=2, timeout=15):
431
+ """Rate-limit aware Reddit JSON fetch.
432
+
433
+ Returns a 2-tuple (status, data). status is one of:
434
+ 'ok' - parsed JSON returned as data
435
+ 'not_found' - HTTP 404 (data=None)
436
+ 'rate_limited' - HTTP 429 even after retries (data=None)
437
+ 'empty' - HTTP 200 but empty/malformed body (data=None)
438
+ 'error' - network, timeout, or other HTTPError (data=None)
439
+
440
+ Reads x-ratelimit-remaining / x-ratelimit-reset from every response
441
+ (success AND error) into _reddit_rate_state so the caller can pace.
442
+ On 429, honors Retry-After (capped to 120s) and retries.
443
+ """
444
+ req = urllib.request.Request(url, headers={"User-Agent": user_agent})
445
+ for attempt in range(max_retries + 1):
446
+ try:
447
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
448
+ _update_reddit_rate_state(resp.headers)
449
+ body = resp.read()
450
+ if not body:
451
+ return ("empty", None)
452
+ try:
453
+ return ("ok", json.loads(body))
454
+ except Exception:
455
+ return ("empty", None)
456
+ except urllib.error.HTTPError as e:
457
+ _update_reddit_rate_state(e.headers)
458
+ if e.code == 404:
459
+ return ("not_found", None)
460
+ if e.code == 429:
461
+ retry_after = None
462
+ if e.headers:
463
+ ra = e.headers.get("Retry-After")
464
+ if ra:
465
+ try:
466
+ retry_after = int(ra)
467
+ except (TypeError, ValueError):
468
+ retry_after = None
469
+ if retry_after is None:
470
+ retry_after = int(_reddit_rate_state.get("reset_in") or 60)
471
+ retry_after = max(1, min(retry_after, 120))
472
+ if attempt < max_retries:
473
+ time.sleep(retry_after)
474
+ continue
475
+ return ("rate_limited", None)
476
+ return ("error", None)
477
+ except Exception:
478
+ if attempt < max_retries:
479
+ time.sleep(5 * (attempt + 1))
480
+ continue
481
+ return ("error", None)
482
+ return ("error", None)
483
+
484
+
485
+ def refresh_reddit(db, user_agent, config=None, quiet=False):
486
+ config = config or {}
487
+ # 2026-05-12: read all rows via /api/v1/posts so the Reddit branch owns no
488
+ # SQL. `db` is preserved in the signature for backwards compatibility with
489
+ # callers in main(); it's ignored here.
490
+ posts_rows = _http_list_reddit_active_posts()
491
+ # Build a list-of-tuples shape that the existing for-loop expects:
492
+ # (id, our_url, thread_url, upvotes, comments_count, scan_no_change_count,
493
+ # posted_at-as-datetime, engagement_updated_at-as-datetime)
494
+ def _parse_iso(v):
495
+ if not v:
496
+ return None
497
+ try:
498
+ return datetime.fromisoformat(str(v).replace("Z", "+00:00"))
499
+ except Exception:
500
+ return None
501
+ posts = [
502
+ (
503
+ r.get("id"),
504
+ r.get("our_url"),
505
+ r.get("thread_url"),
506
+ r.get("upvotes"),
507
+ r.get("comments_count"),
508
+ int(r.get("scan_no_change_count") or 0),
509
+ _parse_iso(r.get("posted_at")),
510
+ _parse_iso(r.get("engagement_updated_at")),
511
+ )
512
+ for r in posts_rows
513
+ ]
514
+
515
+ BATCH_SIZE = 200
516
+ total = updated = changed = deleted = removed = errors = skipped = 0
517
+ # `updated`: rows the Reddit JSON API answered for and we wrote back
518
+ # (every successful poll). Effectively `total - errors - skipped - frozen`.
519
+ # `changed`: subset of `updated` where score OR comments_count actually
520
+ # shifted since the prior scan. The dashboard's "updated" pill renders
521
+ # this (see log_run.py --updated docstring) — before 2026-05-08 it
522
+ # showed the polled count, which trivially matched "checked" whenever
523
+ # errors were zero and hid that ~90% of Reddit polls observe no change.
524
+ skipped_fresh = 0
525
+ errors_404 = errors_rate_limited = errors_empty = errors_other = 0
526
+ results = []
527
+
528
+ # If Step 1 (profile scrape) just ran, the row was already refreshed and
529
+ # has a recent engagement_updated_at. Skip to save API calls. Applies to
530
+ # both thread and comment rows since the scrape now captures comment-row
531
+ # scores too. Deletion detection is delayed by up to FRESH_WINDOW for
532
+ # those rows, which is acceptable (next cycle catches it).
533
+ FRESH_WINDOW = timedelta(hours=4)
534
+ now_utc = datetime.now(timezone.utc)
535
+
536
+ for post in posts:
537
+ total += 1
538
+ if total % BATCH_SIZE == 0:
539
+ progress.tick("reddit", total, len(posts),
540
+ updated=updated, changed=changed, errors=errors,
541
+ errors_404=errors_404,
542
+ errors_rate_limited=errors_rate_limited,
543
+ errors_empty=errors_empty,
544
+ errors_other=errors_other)
545
+ if not quiet:
546
+ rem = _reddit_rate_state.get("remaining")
547
+ rem_str = f", rem={int(rem)}" if rem is not None else ""
548
+ print(f" Batch ({total}/{len(posts)} iterated, {updated} polled, {changed} changed, {errors} errors [404={errors_404} rl={errors_rate_limited} empty={errors_empty} other={errors_other}]{rem_str})", flush=True)
549
+ post_id, our_url, thread_url = post[0], post[1], post[2]
550
+ prev_upvotes, prev_comments = post[3], post[4]
551
+ no_change = post[5]
552
+ posted_at = post[6]
553
+ engagement_updated_at = post[7]
554
+
555
+ # Skip any row (thread or comment) refreshed by Step 1 within the
556
+ # fresh window. Step 1 captures views + upvotes + comments_count for
557
+ # both row types, so all stats are covered without an API hit.
558
+ if engagement_updated_at:
559
+ eu = engagement_updated_at
560
+ if eu.tzinfo is None:
561
+ eu = eu.replace(tzinfo=timezone.utc)
562
+ if now_utc - eu < FRESH_WINDOW:
563
+ skipped_fresh += 1
564
+ continue
565
+
566
+ # Skip stable posts: 2+ scans with no change AND older than 3 days
567
+ if no_change >= 2 and posted_at:
568
+ age = datetime.now(timezone.utc) - (posted_at.replace(tzinfo=timezone.utc) if posted_at.tzinfo is None else posted_at)
569
+ if age > timedelta(days=3):
570
+ skipped += 1
571
+ continue
572
+
573
+ if not our_url or not our_url.startswith("http"):
574
+ errors += 1
575
+ errors_other += 1
576
+ continue
577
+
578
+ # Detect if our_url points to a specific comment or just the thread
579
+ has_comment_id = bool(
580
+ re.search(r"/comment/[a-z0-9]+", our_url) or
581
+ re.search(r"/comments/[a-z0-9]+/[^/]+/[a-z0-9]+", our_url)
582
+ )
583
+
584
+ json_url = re.sub(r"www\.reddit\.com", "old.reddit.com", our_url).rstrip("/") + ".json"
585
+
586
+ _reddit_pacing_sleep()
587
+ status, response = fetch_reddit_json(json_url, user_agent)
588
+ if status == "not_found":
589
+ errors += 1
590
+ errors_404 += 1
591
+ continue
592
+ if status == "rate_limited":
593
+ errors += 1
594
+ errors_rate_limited += 1
595
+ continue
596
+ if status == "empty" or not isinstance(response, list) or len(response) < 2:
597
+ errors += 1
598
+ errors_empty += 1
599
+ continue
600
+ if status != "ok":
601
+ errors += 1
602
+ errors_other += 1
603
+ continue
604
+
605
+ thread_data = response[0].get("data", {}).get("children", [{}])[0].get("data", {})
606
+ thread_score = thread_data.get("score", 0)
607
+ thread_comments = thread_data.get("num_comments", 0)
608
+ thread_title = thread_data.get("title", "")[:60]
609
+ thread_author = thread_data.get("author", "")
610
+
611
+ if has_comment_id:
612
+ # our_url has a comment permalink — response[1] contains the specific comment
613
+ children = response[1].get("data", {}).get("children", [])
614
+ if not children:
615
+ errors += 1
616
+ continue
617
+ comment_data = children[0].get("data")
618
+ if not comment_data:
619
+ errors += 1
620
+ continue
621
+
622
+ body = comment_data.get("body", "")
623
+ author = comment_data.get("author", "")
624
+ score = comment_data.get("score", 0)
625
+
626
+ # Count direct replies to our comment
627
+ replies_obj = comment_data.get("replies", "")
628
+ comment_reply_count = 0
629
+ if replies_obj and isinstance(replies_obj, dict):
630
+ reply_children = replies_obj.get("data", {}).get("children", [])
631
+ comment_reply_count = sum(1 for c in reply_children if c.get("kind") == "t1")
632
+ comment_reply_count += sum(
633
+ c.get("data", {}).get("count", 0)
634
+ for c in reply_children if c.get("kind") == "more"
635
+ )
636
+
637
+ if body in ("[deleted]",) or author == "[deleted]":
638
+ # Two-strike deletion detection. The /detect-deletion endpoint
639
+ # atomically bumps deletion_detect_count and flips status when
640
+ # the threshold is reached.
641
+ detect_count, was_set = _http_detect_deletion(post_id, "deleted", 2)
642
+ if was_set:
643
+ deleted += 1
644
+ if not quiet:
645
+ print(f"DELETED [{post_id}] (confirmed after {detect_count} detections)")
646
+ else:
647
+ if not quiet:
648
+ print(f"DELETION PENDING [{post_id}] (detection {detect_count}/2)")
649
+ continue
650
+
651
+ if body == "[removed]":
652
+ detect_count, was_set = _http_detect_deletion(post_id, "removed", 2)
653
+ if was_set:
654
+ removed += 1
655
+ if not quiet:
656
+ print(f"REMOVED [{post_id}] (confirmed after {detect_count} detections)")
657
+ else:
658
+ if not quiet:
659
+ print(f"REMOVAL PENDING [{post_id}] (detection {detect_count}/2)")
660
+ continue
661
+
662
+ _http_patch_post(post_id, {
663
+ "upvotes": score,
664
+ "comments_count": comment_reply_count,
665
+ "stamp_engagement_now": True,
666
+ "stamp_status_checked_now": True,
667
+ "reset_deletion_detect_count": True,
668
+ })
669
+ updated += 1
670
+ if score != prev_upvotes or comment_reply_count != prev_comments:
671
+ changed += 1
672
+ results.append({"id": post_id, "score": score, "comment_replies": comment_reply_count,
673
+ "thread_score": thread_score, "thread_comments": thread_comments,
674
+ "title": thread_title,
675
+ # _comments_written = the value we wrote to
676
+ # posts.comments_count (used by the skip-optimization
677
+ # block below to gate scan_no_change_count on
678
+ # comment-count change as well as score change).
679
+ "_comments_written": comment_reply_count})
680
+ else:
681
+ # our_url is a thread URL without a comment ID
682
+ # Check if it's our original post (we are the thread author)
683
+ is_our_post = thread_author.lower() == config.get("accounts", {}).get("reddit", {}).get("username", "").lower()
684
+
685
+ if is_our_post:
686
+ # Original post — use thread-level stats (they ARE our stats)
687
+ if thread_data.get("removed_by_category"):
688
+ detect_count, was_set = _http_detect_deletion(post_id, "removed", 2)
689
+ if was_set:
690
+ removed += 1
691
+ if not quiet:
692
+ print(f"REMOVED (thread) [{post_id}] (confirmed after {detect_count} detections)")
693
+ else:
694
+ if not quiet:
695
+ print(f"REMOVAL PENDING (thread) [{post_id}] (detection {detect_count}/2)")
696
+ continue
697
+
698
+ _http_patch_post(post_id, {
699
+ "upvotes": thread_score,
700
+ "comments_count": thread_comments,
701
+ "stamp_engagement_now": True,
702
+ "stamp_status_checked_now": True,
703
+ "reset_deletion_detect_count": True,
704
+ })
705
+ updated += 1
706
+ if thread_score != prev_upvotes or thread_comments != prev_comments:
707
+ changed += 1
708
+ results.append({"id": post_id, "score": thread_score, "thread_score": thread_score,
709
+ "thread_comments": thread_comments, "title": thread_title,
710
+ "_comments_written": thread_comments})
711
+ else:
712
+ # Comment without permalink — we can't get comment-specific stats
713
+ # Only update thread engagement metadata, don't touch upvotes/comments_count
714
+ # Check if our comment is still visible by searching response[1]
715
+ our_found = False
716
+ our_removed = False
717
+ our_username = config.get("accounts", {}).get("reddit", {}).get("username", "")
718
+ children = response[1].get("data", {}).get("children", [])
719
+ for child in children:
720
+ cd = child.get("data", {})
721
+ if cd.get("author", "").lower() == our_username.lower():
722
+ our_found = True
723
+ if cd.get("body") == "[removed]":
724
+ our_removed = True
725
+ elif cd.get("body") in ("[deleted]",) or cd.get("author") == "[deleted]":
726
+ our_removed = True
727
+ else:
728
+ # Found our comment with stats — update
729
+ score = cd.get("score", 0)
730
+ _http_patch_post(post_id, {
731
+ "upvotes": score,
732
+ "stamp_engagement_now": True,
733
+ "stamp_status_checked_now": True,
734
+ "reset_deletion_detect_count": True,
735
+ })
736
+ updated += 1
737
+ # No comments_count write in this branch (no-permalink
738
+ # comments lack per-comment reply visibility), so
739
+ # change detection is score-only and the skip block
740
+ # reads _comments_written=None and ignores comments.
741
+ if score != prev_upvotes:
742
+ changed += 1
743
+ results.append({"id": post_id, "score": score, "thread_score": thread_score,
744
+ "thread_comments": thread_comments, "title": thread_title,
745
+ "_comments_written": None})
746
+ break
747
+
748
+ if our_removed:
749
+ detect_count, was_set = _http_detect_deletion(post_id, "removed", 2)
750
+ if was_set:
751
+ removed += 1
752
+ if not quiet:
753
+ print(f"REMOVED (no permalink) [{post_id}] (confirmed after {detect_count} detections)")
754
+ else:
755
+ if not quiet:
756
+ print(f"REMOVAL PENDING (no permalink) [{post_id}] (detection {detect_count}/2)")
757
+ elif not our_found:
758
+ # Comment not in top-level replies — just update checked timestamp
759
+ _http_patch_post(post_id, {"stamp_status_checked_now": True})
760
+ if not quiet:
761
+ print(f"SKIP (no permalink, comment not in top-level) [{post_id}]")
762
+
763
+ # Track whether stats changed for skip optimization. A row counts as
764
+ # "no change" only when BOTH score and comments_count are unchanged
765
+ # since the prior scan. _comments_written = None means this branch
766
+ # didn't write comments_count (no-permalink case), so we don't gate
767
+ # the skip on comments — score-only. PATCH /api/v1/posts/[id] supports
768
+ # `scan_no_change_delta` to bump by +1, or `scan_no_change_count=0`
769
+ # to reset.
770
+ if results and results[-1]["id"] == post_id:
771
+ new_score = results[-1]["score"]
772
+ new_comments = results[-1].get("_comments_written")
773
+ score_unchanged = (new_score == prev_upvotes)
774
+ comments_unchanged = (new_comments is None or new_comments == prev_comments)
775
+ if score_unchanged and comments_unchanged:
776
+ _http_patch_post(post_id, {"scan_no_change_delta": 1})
777
+ else:
778
+ _http_patch_post(post_id, {"scan_no_change_count": 0})
779
+
780
+ # Pacing now happens at top of loop (before API call) via _reddit_pacing_sleep().
781
+
782
+ progress.done("reddit", len(posts),
783
+ updated=updated, changed=changed, deleted=deleted, removed=removed,
784
+ errors=errors, skipped=skipped, skipped_fresh=skipped_fresh)
785
+ if skipped and not quiet:
786
+ print(f" Skipped {skipped} stable posts (2+ scans unchanged, older than 3 days)")
787
+ if skipped_fresh and not quiet:
788
+ print(f" Skipped {skipped_fresh} rows refreshed by Step 1 within 4h")
789
+ return {"total": total, "updated": updated, "changed": changed,
790
+ "deleted": deleted, "removed": removed,
791
+ "errors": errors,
792
+ "errors_404": errors_404,
793
+ "errors_rate_limited": errors_rate_limited,
794
+ "errors_empty": errors_empty,
795
+ "errors_other": errors_other,
796
+ "skipped": skipped, "skipped_fresh": skipped_fresh, "results": results}
797
+
798
+
799
+ def refresh_reddit_resurrect(db, user_agent, config=None, quiet=False, days=60):
800
+ """Re-check Reddit posts marked 'deleted'/'removed' in the last N days.
801
+
802
+ If the post/comment is now visible with real content, flip status back to 'active'.
803
+ One live detection is enough (bias: don't falsely mark deleted).
804
+ """
805
+ config = config or {}
806
+ our_username = config.get("accounts", {}).get("reddit", {}).get("username", "")
807
+
808
+ # 2026-05-12: read via /api/v1/posts. `db` is ignored.
809
+ posts_rows = _http_list_reddit_dead_posts(days)
810
+ posts = [
811
+ (r.get("id"), r.get("our_url"), r.get("thread_url"), r.get("status"))
812
+ for r in posts_rows
813
+ ]
814
+
815
+ total = resurrected = still_dead = errors = 0
816
+ errors_404 = errors_rate_limited = errors_empty = errors_malformed = errors_other = 0
817
+
818
+ for post in posts:
819
+ total += 1
820
+ post_id, our_url, thread_url, prev_status = post[0], post[1], post[2], post[3]
821
+
822
+ if not our_url or not our_url.startswith("http"):
823
+ errors += 1
824
+ continue
825
+
826
+ has_comment_id = bool(
827
+ re.search(r"/comment/[a-z0-9]+", our_url) or
828
+ re.search(r"/comments/[a-z0-9]+/[^/]+/[a-z0-9]+", our_url)
829
+ )
830
+
831
+ json_url = re.sub(r"www\.reddit\.com", "old.reddit.com", our_url).rstrip("/") + ".json"
832
+
833
+ _reddit_pacing_sleep()
834
+ status, response = fetch_reddit_json(json_url, user_agent)
835
+ if status == "not_found":
836
+ still_dead += 1
837
+ _http_patch_post(post_id, {"stamp_status_checked_now": True})
838
+ continue
839
+ if status == "rate_limited":
840
+ errors += 1; errors_rate_limited += 1
841
+ continue
842
+ if status == "empty":
843
+ errors += 1; errors_empty += 1
844
+ continue
845
+ if status == "error":
846
+ errors += 1; errors_other += 1
847
+ continue
848
+ if not isinstance(response, list) or len(response) < 2:
849
+ errors += 1; errors_malformed += 1
850
+ continue
851
+
852
+ thread_data = response[0].get("data", {}).get("children", [{}])[0].get("data", {})
853
+ thread_author = thread_data.get("author", "")
854
+
855
+ is_live = False
856
+
857
+ if has_comment_id:
858
+ children = response[1].get("data", {}).get("children", [])
859
+ comment_data = children[0].get("data") if children else None
860
+ if comment_data:
861
+ body = comment_data.get("body", "")
862
+ author = comment_data.get("author", "")
863
+ if body not in ("[deleted]", "[removed]") and author != "[deleted]" and body.strip():
864
+ is_live = True
865
+ else:
866
+ is_our_post = thread_author.lower() == our_username.lower()
867
+ if is_our_post:
868
+ if not thread_data.get("removed_by_category") and thread_data.get("selftext") not in ("[removed]", "[deleted]"):
869
+ is_live = True
870
+ else:
871
+ children = response[1].get("data", {}).get("children", [])
872
+ for child in children:
873
+ cd = child.get("data", {})
874
+ if cd.get("author", "").lower() == our_username.lower():
875
+ body = cd.get("body", "")
876
+ if body not in ("[deleted]", "[removed]") and body.strip():
877
+ is_live = True
878
+ break
879
+
880
+ if is_live:
881
+ _http_patch_post(post_id, {
882
+ "status": "active",
883
+ "reset_deletion_detect_count": True,
884
+ "stamp_status_checked_now": True,
885
+ "stamp_resurrected_now": True,
886
+ })
887
+ resurrected += 1
888
+ if not quiet:
889
+ print(f"RESURRECTED [{post_id}] ({prev_status} -> active): {our_url}", flush=True)
890
+ else:
891
+ still_dead += 1
892
+ _http_patch_post(post_id, {"stamp_status_checked_now": True})
893
+
894
+ # Pacing now happens at top of loop (before API call) via _reddit_pacing_sleep().
895
+
896
+ return {"total": total, "resurrected": resurrected, "still_dead": still_dead, "errors": errors,
897
+ "errors_404": errors_404, "errors_rate_limited": errors_rate_limited,
898
+ "errors_empty": errors_empty, "errors_malformed": errors_malformed,
899
+ "errors_other": errors_other}
900
+
901
+
902
+ def refresh_moltbook(db, api_key, quiet=False):
903
+ if not api_key:
904
+ return {"skipped": True, "reason": "no_api_key"}
905
+
906
+ posts = _http_list_moltbook_active_posts()
907
+
908
+ total = updated = deleted = errors = skipped = 0
909
+ results = []
910
+ rate_limited = False
911
+
912
+ for post in posts:
913
+ if total and total % 50 == 0:
914
+ progress.tick("moltbook", total, len(posts),
915
+ updated=updated, deleted=deleted,
916
+ errors=errors, skipped=skipped)
917
+ if rate_limited:
918
+ break
919
+ total += 1
920
+ post_id, our_url, thread_url = post["id"], post["our_url"], post.get("thread_url")
921
+ prev_upvotes, prev_comments = post.get("upvotes"), post.get("comments_count")
922
+ no_change = post.get("scan_no_change_count") or 0
923
+ posted_at = _parse_dt(post.get("posted_at"))
924
+
925
+ if no_change >= 3 and posted_at:
926
+ pa = posted_at.replace(tzinfo=timezone.utc) if posted_at.tzinfo is None else posted_at
927
+ if datetime.now(timezone.utc) - pa > timedelta(days=3):
928
+ skipped += 1
929
+ continue
930
+
931
+ # Extract post UUID and optional comment UUID from our_url
932
+ # Format: https://www.moltbook.com/post/{post_uuid}#{comment_uuid}
933
+ # Also handles bare fragments like "#abc123" by falling back to thread_url
934
+ effective_url = our_url
935
+ if not our_url.startswith("http"):
936
+ # Bare fragment (e.g. "#f504d6fb") - reconstruct from thread_url
937
+ if thread_url and thread_url.startswith("http"):
938
+ thread_uuids = re.findall(r"[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12}", thread_url)
939
+ if not thread_uuids:
940
+ # thread_url might have short UUID too - extract what we can
941
+ m = re.search(r"/post/([0-9a-f-]+)", thread_url)
942
+ if m:
943
+ effective_url = thread_url + our_url # append fragment
944
+ else:
945
+ errors += 1
946
+ continue
947
+ else:
948
+ effective_url = f"https://www.moltbook.com/post/{thread_uuids[0]}{our_url}"
949
+ else:
950
+ errors += 1
951
+ continue
952
+
953
+ uuids = re.findall(r"[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12}", effective_url)
954
+ if not uuids:
955
+ # Try short UUID format: /post/{short_id}
956
+ m = re.search(r"/post/([0-9a-f]{7,})", effective_url)
957
+ if m:
958
+ # Short UUID - API won't accept it, skip gracefully
959
+ _http_patch_post(post_id, {"stamp_status_checked_now": True})
960
+ continue
961
+ errors += 1
962
+ continue
963
+
964
+ post_uuid = uuids[0]
965
+ comment_uuid = None
966
+ if "#" in effective_url and len(uuids) >= 2:
967
+ comment_uuid = uuids[1]
968
+ elif "#" in effective_url:
969
+ # Comment UUID might be short (not full UUID) - extract after #
970
+ fragment = effective_url.split("#")[-1]
971
+ # Strip "comment-" prefix if present
972
+ fragment = re.sub(r'^comment-', '', fragment)
973
+ if fragment and fragment != post_uuid and re.match(r'^[0-9a-f-]{5,}$', fragment):
974
+ comment_uuid = fragment
975
+
976
+ is_comment = comment_uuid is not None
977
+ is_our_post = our_url == thread_url # Original post if our_url matches thread_url
978
+
979
+ if is_comment:
980
+ # Fetch comment-specific stats via comments endpoint
981
+ try:
982
+ data = fetch_moltbook_json(
983
+ f"https://www.moltbook.com/api/v1/posts/{post_uuid}/comments?sort=new&limit=100",
984
+ api_key=api_key,
985
+ )
986
+ except MoltbookRateLimitedError as e:
987
+ if not quiet:
988
+ print(f"Moltbook rate-limited for {int(e.reset_seconds)}s, stopping scan", flush=True)
989
+ rate_limited = True
990
+ continue
991
+ except MoltbookNotFoundError:
992
+ # Post deleted on Moltbook - use detection counter
993
+ detect_count, status_set = _http_detect_deletion(post_id, "deleted", threshold=2)
994
+ if status_set:
995
+ deleted += 1
996
+ if not quiet:
997
+ print(f"DELETED (Moltbook 404) [{post_id}] (confirmed after {detect_count} detections)")
998
+ elif not quiet:
999
+ print(f"DELETION PENDING (Moltbook 404) [{post_id}] (detection {detect_count}/2)")
1000
+ continue
1001
+ if not data or not data.get("success"):
1002
+ errors += 1
1003
+ continue
1004
+
1005
+ # Find our comment by UUID - try multiple matching strategies
1006
+ our_comment = None
1007
+ # Strip "comment-" prefix for matching
1008
+ clean_uuid = re.sub(r'^comment-', '', comment_uuid)
1009
+ for c in data.get("comments", []):
1010
+ cid = c.get("id", "")
1011
+ # Match by: full UUID, starts-with (8 chars), or contains
1012
+ if cid == clean_uuid or cid.startswith(clean_uuid[:8]) or clean_uuid in cid:
1013
+ our_comment = c
1014
+ break
1015
+
1016
+ if not our_comment:
1017
+ has_more = data.get("has_more", False)
1018
+ total_comments = data.get("count", 0)
1019
+ if has_more or total_comments > 100:
1020
+ # Comment is buried beyond first page — not an error, just unreachable
1021
+ _http_patch_post(post_id, {"stamp_status_checked_now": True,
1022
+ "reset_deletion_detect_count": True})
1023
+ else:
1024
+ # Post has few comments but ours is missing — likely deleted
1025
+ detect_count, status_set = _http_detect_deletion(post_id, "deleted", threshold=2)
1026
+ if status_set:
1027
+ deleted += 1
1028
+ if not quiet:
1029
+ print(f"DELETED (Moltbook comment missing) [{post_id}] (confirmed after {detect_count} detections)")
1030
+ elif not quiet:
1031
+ print(f"DELETION PENDING (Moltbook comment missing) [{post_id}] (detection {detect_count}/2)")
1032
+ continue
1033
+
1034
+ if our_comment.get("is_deleted"):
1035
+ detect_count, status_set = _http_detect_deletion(post_id, "deleted", threshold=2)
1036
+ if status_set:
1037
+ deleted += 1
1038
+ continue
1039
+
1040
+ # Comment-specific engagement
1041
+ comment_upvotes = our_comment.get("upvotes", 0)
1042
+ comment_score = our_comment.get("score", 0)
1043
+ # Server's `reply_count` is stale/zero on many comments; len(replies) is authoritative.
1044
+ replies_list = our_comment.get("replies") or []
1045
+ comment_replies = max(our_comment.get("reply_count") or 0, len(replies_list))
1046
+ verification = our_comment.get("verification_status", "unknown")
1047
+ thread_comment_count = data.get("count", 0)
1048
+
1049
+ patch = {"upvotes": comment_upvotes, "comments_count": comment_replies,
1050
+ "stamp_engagement_now": True, "stamp_status_checked_now": True,
1051
+ "reset_deletion_detect_count": True}
1052
+ if comment_upvotes == prev_upvotes and comment_replies == prev_comments:
1053
+ patch["scan_no_change_delta"] = 1
1054
+ else:
1055
+ patch["scan_no_change_count"] = 0
1056
+ _http_patch_post(post_id, patch)
1057
+ updated += 1
1058
+ results.append({"id": post_id, "upvotes": comment_upvotes,
1059
+ "replies": comment_replies, "verification": verification})
1060
+ else:
1061
+ # Original post - fetch post-level stats
1062
+ try:
1063
+ data = fetch_moltbook_json(
1064
+ f"https://www.moltbook.com/api/v1/posts/{post_uuid}",
1065
+ api_key=api_key,
1066
+ )
1067
+ except MoltbookRateLimitedError as e:
1068
+ if not quiet:
1069
+ print(f"Moltbook rate-limited for {int(e.reset_seconds)}s, stopping scan", flush=True)
1070
+ rate_limited = True
1071
+ continue
1072
+ except MoltbookNotFoundError:
1073
+ # Post deleted on Moltbook - use detection counter
1074
+ detect_count, status_set = _http_detect_deletion(post_id, "deleted", threshold=2)
1075
+ if status_set:
1076
+ deleted += 1
1077
+ if not quiet:
1078
+ print(f"DELETED (Moltbook 404) [{post_id}] (confirmed after {detect_count} detections)")
1079
+ elif not quiet:
1080
+ print(f"DELETION PENDING (Moltbook 404) [{post_id}] (detection {detect_count}/2)")
1081
+ continue
1082
+ if not data or not data.get("success"):
1083
+ errors += 1
1084
+ continue
1085
+
1086
+ post_data = data.get("post", {})
1087
+ if post_data.get("is_deleted"):
1088
+ detect_count, status_set = _http_detect_deletion(post_id, "deleted", threshold=2)
1089
+ if status_set:
1090
+ deleted += 1
1091
+ continue
1092
+
1093
+ upvotes = post_data.get("upvotes", 0)
1094
+ comment_count = post_data.get("comment_count", post_data.get("comments_count", 0))
1095
+ score = post_data.get("score", 0)
1096
+ views = post_data.get("views", 0)
1097
+
1098
+ patch = {"upvotes": upvotes, "comments_count": comment_count, "views": views,
1099
+ "stamp_engagement_now": True, "stamp_status_checked_now": True,
1100
+ "reset_deletion_detect_count": True}
1101
+ if upvotes == prev_upvotes and comment_count == prev_comments:
1102
+ patch["scan_no_change_delta"] = 1
1103
+ else:
1104
+ patch["scan_no_change_count"] = 0
1105
+ _http_patch_post(post_id, patch)
1106
+ updated += 1
1107
+ results.append({"id": post_id, "upvotes": upvotes, "score": score,
1108
+ "comments": comment_count})
1109
+
1110
+ progress.done("moltbook", len(posts),
1111
+ updated=updated, deleted=deleted,
1112
+ errors=errors, skipped=skipped)
1113
+ if skipped and not quiet:
1114
+ print(f" Skipped {skipped} stable Moltbook posts (3+ scans unchanged, older than 3 days)")
1115
+ return {"total": total, "updated": updated, "deleted": deleted, "errors": errors,
1116
+ "skipped": skipped, "results": results}
1117
+
1118
+
1119
+ def _detect_minimized_github_comments(db, posts, quiet=False):
1120
+ """Pre-pass: batch-query GitHub GraphQL for isMinimized on our active
1121
+ comments and flip status='deleted' on matches.
1122
+
1123
+ Why this exists: REST `repos/{o}/{r}/issues/comments/{id}` returns 200
1124
+ for a comment that's been hidden via "Hide -> low quality / off-topic /
1125
+ spam". The reactions count zeroes out, the body is unchanged, and the
1126
+ REST loop happily updates engagement as if the comment were still
1127
+ visible. The antiwork/gumroad block on 2026-05-01 was found via inbound
1128
+ notification email, not via our own pipeline. GraphQL exposes the
1129
+ moderation state via `Issue.comments.nodes[].isMinimized`.
1130
+
1131
+ Cost is cheap: one GraphQL query fetches all comments on a thread (1
1132
+ rate-limit point), and aliasing batches ~10 threads per query at the
1133
+ same 1-point cost. Three thousand active threads -> ~300 points, well
1134
+ inside the 5000/hr ceiling.
1135
+
1136
+ Defensive on purpose. Any failure here logs and returns; the REST loop
1137
+ that follows is the established hot path and must not be blocked by a
1138
+ GraphQL outage.
1139
+ """
1140
+ import subprocess
1141
+ from collections import defaultdict
1142
+
1143
+ BATCH = 10
1144
+ comment_re = re.compile(
1145
+ r"https?://github\.com/([^/]+)/([^/]+)/(?:issues|pull)/(\d+)#issuecomment-(\d+)"
1146
+ )
1147
+
1148
+ # Group: (owner, repo, number) -> [(post_id, comment_id), ...]
1149
+ threads = defaultdict(list)
1150
+ for post in posts:
1151
+ m = comment_re.match((post.get("our_url") or ""))
1152
+ if not m:
1153
+ continue
1154
+ owner, repo, number, cid = m.group(1), m.group(2), int(m.group(3)), int(m.group(4))
1155
+ threads[(owner, repo, number)].append((post["id"], cid))
1156
+
1157
+ if not threads:
1158
+ return 0
1159
+
1160
+ keys = list(threads.keys())
1161
+ minimized = 0
1162
+ failures = 0
1163
+
1164
+ for batch_start in range(0, len(keys), BATCH):
1165
+ batch = keys[batch_start:batch_start + BATCH]
1166
+ parts = []
1167
+ for i, (owner, repo, number) in enumerate(batch):
1168
+ parts.append(
1169
+ f't{i}: repository(owner: "{owner}", name: "{repo}") {{ '
1170
+ f'issueOrPullRequest(number: {number}) {{ '
1171
+ f'... on Issue {{ comments(first: 100) {{ nodes {{ databaseId isMinimized minimizedReason }} }} }} '
1172
+ f'... on PullRequest {{ comments(first: 100) {{ nodes {{ databaseId isMinimized minimizedReason }} }} }} '
1173
+ f'}} }}'
1174
+ )
1175
+ query = "{ " + " ".join(parts) + " rateLimit { remaining } }"
1176
+ try:
1177
+ proc = subprocess.run(
1178
+ ["gh", "api", "graphql", "-f", f"query={query}"],
1179
+ capture_output=True, text=True, timeout=30,
1180
+ )
1181
+ except Exception as e:
1182
+ failures += 1
1183
+ if not quiet:
1184
+ print(f" github-minimize: graphql exec failed batch {batch_start}: {e}",
1185
+ flush=True)
1186
+ continue
1187
+ if proc.returncode != 0:
1188
+ failures += 1
1189
+ if not quiet:
1190
+ print(f" github-minimize: graphql rc={proc.returncode} batch {batch_start}: "
1191
+ f"{(proc.stderr or '')[:200]}", flush=True)
1192
+ continue
1193
+ try:
1194
+ data = json.loads(proc.stdout).get("data", {}) or {}
1195
+ except Exception:
1196
+ failures += 1
1197
+ continue
1198
+
1199
+ for i, key in enumerate(batch):
1200
+ node = data.get(f"t{i}") or {}
1201
+ iop = node.get("issueOrPullRequest")
1202
+ if not iop:
1203
+ continue
1204
+ comments = (iop.get("comments") or {}).get("nodes") or []
1205
+ min_set = {c["databaseId"]: c.get("minimizedReason")
1206
+ for c in comments if c.get("isMinimized")}
1207
+ if not min_set:
1208
+ continue
1209
+ for post_id, cid in threads[key]:
1210
+ if cid in min_set:
1211
+ reason = min_set[cid] or ""
1212
+ _http_mark_minimized(post_id, reason)
1213
+ minimized += 1
1214
+ if not quiet:
1215
+ owner, repo, number = key
1216
+ print(f"MINIMIZED [{post_id}] {owner}/{repo}#{number} reason={reason}",
1217
+ flush=True)
1218
+
1219
+ if not quiet:
1220
+ rl_note = f", failures={failures}" if failures else ""
1221
+ print(f" github-minimize: flipped {minimized} hidden comments "
1222
+ f"across {len(threads)} threads{rl_note}", flush=True)
1223
+ return minimized
1224
+
1225
+
1226
+ _REPO_STATE_CACHE_US = {}
1227
+
1228
+
1229
+ def _classify_github_404(owner, repo, number, comment_id, quiet=False):
1230
+ """Disambiguate a REST 404 on a GitHub issue/PR comment.
1231
+
1232
+ Returns one of:
1233
+ - 'repo_gone' : `repos/{o}/{r}` itself 404s
1234
+ - 'issue_deleted' : repo is live but `repos/{o}/{r}/issues/{n}` is
1235
+ 404/410 (issue was deleted by author/admin)
1236
+ - 'feature_disabled' : repo is live, issue is reachable, but
1237
+ has_issues=false (every comment under the
1238
+ feature 404s, not specific to us)
1239
+ - 'transient' : repo + issue both alive, and GraphQL says our
1240
+ specific comment IS present and not minimized.
1241
+ REST returned 404 by mistake (rate-limit blip,
1242
+ secondary throttle, network); do NOT count this
1243
+ as a strike.
1244
+ - 'comment_deleted' : repo + issue both alive, GraphQL says our
1245
+ comment is NOT in the thread (genuine deletion,
1246
+ or hidden in a way we don't see).
1247
+ - 'unknown' : a follow-up call failed; caller should fall
1248
+ back to count-based detection.
1249
+
1250
+ Cached per-process on repo metadata to keep the audit cheap. Adds at
1251
+ most 2 extra gh-api calls per 404, gated by single-repo caching.
1252
+ """
1253
+ import subprocess
1254
+
1255
+ key = f"{owner.lower()}/{repo.lower()}"
1256
+ cached_repo = _REPO_STATE_CACHE_US.get(key)
1257
+ if cached_repo is None:
1258
+ try:
1259
+ proc = subprocess.run(
1260
+ ["gh", "api", f"repos/{owner}/{repo}"],
1261
+ capture_output=True, text=True, timeout=20,
1262
+ )
1263
+ except Exception as e:
1264
+ if not quiet:
1265
+ print(f" github-classify: repo check failed {owner}/{repo}: {e}",
1266
+ flush=True)
1267
+ return "unknown"
1268
+ if proc.returncode != 0:
1269
+ err = ((proc.stderr or "") + (proc.stdout or "")).lower()
1270
+ if "not found" in err or "http 404" in err:
1271
+ cached_repo = {"state": "repo_gone"}
1272
+ else:
1273
+ cached_repo = {"state": "unknown"}
1274
+ else:
1275
+ try:
1276
+ data = json.loads(proc.stdout or "{}")
1277
+ except Exception:
1278
+ data = {}
1279
+ cached_repo = {
1280
+ "state": "live",
1281
+ "has_issues": bool(data.get("has_issues", True)),
1282
+ }
1283
+ _REPO_STATE_CACHE_US[key] = cached_repo
1284
+
1285
+ if cached_repo["state"] == "repo_gone":
1286
+ return "repo_gone"
1287
+ if cached_repo["state"] == "unknown":
1288
+ return "unknown"
1289
+ if not cached_repo.get("has_issues", True):
1290
+ return "feature_disabled"
1291
+
1292
+ # Repo is live. Check the specific issue/PR thread via REST first
1293
+ # (cheaper than GraphQL for this gate). 410 + 404 are both "thread gone".
1294
+ try:
1295
+ proc = subprocess.run(
1296
+ ["gh", "api", f"repos/{owner}/{repo}/issues/{number}"],
1297
+ capture_output=True, text=True, timeout=20,
1298
+ )
1299
+ except Exception:
1300
+ return "unknown"
1301
+ if proc.returncode != 0:
1302
+ err = ((proc.stderr or "") + (proc.stdout or "")).lower()
1303
+ if ("not found" in err or "http 404" in err
1304
+ or "http 410" in err or "this issue was deleted" in err):
1305
+ return "issue_deleted"
1306
+ # Could be 403/permissions; fall through to GraphQL to be sure.
1307
+
1308
+ # Repo + issue are reachable; verify the specific comment via GraphQL.
1309
+ # If our comment_id shows up in `comments.nodes[].databaseId` and is not
1310
+ # minimized, REST 404 was transient. If it's absent, it's truly gone.
1311
+ try:
1312
+ # Pull a wide range of comments; 250 is well within GraphQL's 100/page
1313
+ # limit when combined with `after` paginations, but for simplicity we
1314
+ # just fetch up to 100 here. If the comment is beyond the first 100
1315
+ # we'll return 'unknown' to be safe (caller falls back to count-based).
1316
+ query = (
1317
+ f'{{ repository(owner: "{owner}", name: "{repo}") {{ '
1318
+ f'issueOrPullRequest(number: {number}) {{ '
1319
+ f'... on Issue {{ comments(first: 100) {{ '
1320
+ f'nodes {{ databaseId isMinimized }} '
1321
+ f'pageInfo {{ hasNextPage }} }} }} '
1322
+ f'... on PullRequest {{ comments(first: 100) {{ '
1323
+ f'nodes {{ databaseId isMinimized }} '
1324
+ f'pageInfo {{ hasNextPage }} }} }} '
1325
+ f'}} }} }}'
1326
+ )
1327
+ proc = subprocess.run(
1328
+ ["gh", "api", "graphql", "-f", f"query={query}"],
1329
+ capture_output=True, text=True, timeout=30,
1330
+ )
1331
+ if proc.returncode != 0:
1332
+ return "unknown"
1333
+ data = json.loads(proc.stdout).get("data", {}) or {}
1334
+ except Exception:
1335
+ return "unknown"
1336
+
1337
+ iop = ((data.get("repository") or {}).get("issueOrPullRequest")) or {}
1338
+ if not iop:
1339
+ # Either repo missing in GraphQL (shouldn't happen if REST said live)
1340
+ # or issue/PR not visible. Treat as issue_deleted equivalent.
1341
+ return "issue_deleted"
1342
+ comments = (iop.get("comments") or {}).get("nodes") or []
1343
+ cid_int = int(comment_id)
1344
+ for n in comments:
1345
+ if int(n.get("databaseId") or 0) == cid_int:
1346
+ if n.get("isMinimized"):
1347
+ # Pre-pass should already have flipped this; defer to its path.
1348
+ return "comment_deleted"
1349
+ return "transient"
1350
+ # Comment not in the first 100 nodes. If the thread is paginated, we
1351
+ # can't be sure; report unknown so count-based detection takes over.
1352
+ has_more = (iop.get("comments") or {}).get("pageInfo", {}).get("hasNextPage")
1353
+ if has_more:
1354
+ return "unknown"
1355
+ return "comment_deleted"
1356
+
1357
+
1358
+ def refresh_github(db, quiet=False, limit=None):
1359
+ """Fetch engagement on our GitHub issue/PR comments via `gh api`.
1360
+
1361
+ Stores reactions.total_count in posts.upvotes and the count of replies
1362
+ detected by scan_github_replies.py in posts.comments_count.
1363
+
1364
+ Runs a GraphQL `isMinimized` pre-pass before the REST loop so hidden
1365
+ comments are flipped to status='deleted' and skipped by the REST select.
1366
+ """
1367
+ import subprocess
1368
+
1369
+ posts = _http_list_github_active_posts(limit)
1370
+
1371
+ # Pre-pass: flag minimized (hidden) comments before REST. Wrapped
1372
+ # defensively, a GraphQL flake must not block the REST hot path.
1373
+ try:
1374
+ _detect_minimized_github_comments(db, posts, quiet=quiet)
1375
+ except Exception as e:
1376
+ if not quiet:
1377
+ print(f" github-minimize: pre-pass crashed, skipping: {e}", flush=True)
1378
+ # Re-select after the pre-pass so flipped rows drop out of the REST loop.
1379
+ posts = _http_list_github_active_posts(limit)
1380
+
1381
+ total = updated = deleted = errors = repo_gone = transient_skipped = 0
1382
+ results = []
1383
+ # Capture issue/PR number so we can re-verify comment state on 404.
1384
+ comment_url_re = re.compile(
1385
+ r"https?://github\.com/([^/]+)/([^/]+)/(?:issues|pull)/(\d+)#issuecomment-(\d+)"
1386
+ )
1387
+
1388
+ for post in posts:
1389
+ total += 1
1390
+ post_id, our_url = post["id"], post.get("our_url")
1391
+
1392
+ m = comment_url_re.match(our_url or "")
1393
+ if not m:
1394
+ errors += 1
1395
+ continue
1396
+ owner, repo, number, comment_id = m.group(1), m.group(2), m.group(3), m.group(4)
1397
+
1398
+ try:
1399
+ proc = subprocess.run(
1400
+ ["gh", "api", f"repos/{owner}/{repo}/issues/comments/{comment_id}"],
1401
+ capture_output=True, text=True, timeout=30,
1402
+ )
1403
+ except Exception:
1404
+ errors += 1
1405
+ continue
1406
+
1407
+ if proc.returncode != 0:
1408
+ err_text = (proc.stderr or "") + (proc.stdout or "")
1409
+ if "rate limit" in err_text.lower() or "secondary rate limit" in err_text.lower() or "abuse detection" in err_text.lower():
1410
+ if not quiet:
1411
+ print(f" github: rate-limited at {total}/{len(posts)}, sleeping 60s", flush=True)
1412
+ time.sleep(60)
1413
+ errors += 1
1414
+ continue
1415
+ if "Not Found" in err_text or "HTTP 404" in err_text or "HTTP 410" in err_text:
1416
+ # Disambiguate the 404. A bare comment 404 means one of:
1417
+ # 1. parent repo was deleted (every comment 404s)
1418
+ # 2. issue/PR thread was deleted (every comment under it 404s)
1419
+ # 3. repo has has_issues=false (collateral, not moderation)
1420
+ # 4. our specific comment was deleted or hidden
1421
+ # 5. transient GitHub error returning 404 for a live comment
1422
+ # (HOLYKEYZ case, 2026-05-09: REST gave 404 twice but
1423
+ # the comment was alive in both REST and GraphQL once we
1424
+ # re-checked. Two transient 404s within the cron's polling
1425
+ # window will otherwise flip the post to status='deleted'.)
1426
+ # Categories 1-3 are not moderation strikes; tag them as
1427
+ # 'repo_gone' so strike_alert.py's filter drops them. Category
1428
+ # 5 must reset detect_count to 0 so the next scan starts fresh.
1429
+ cls = _classify_github_404(owner, repo, number, comment_id, quiet=quiet)
1430
+ if cls in ("repo_gone", "issue_deleted", "feature_disabled"):
1431
+ _http_patch_post(post_id, {"status": "repo_gone",
1432
+ "stamp_status_checked_now": True})
1433
+ repo_gone += 1
1434
+ if not quiet:
1435
+ print(f"REPO_GONE (github {cls}) [{post_id}] {owner}/{repo}#{number}", flush=True)
1436
+ continue
1437
+ if cls == "transient":
1438
+ # REST said 404 but GraphQL confirms our comment is alive
1439
+ # and not minimized. False positive; reset the strike
1440
+ # counter so we don't accumulate it.
1441
+ _http_patch_post(post_id, {"reset_deletion_detect_count": True,
1442
+ "stamp_status_checked_now": True})
1443
+ transient_skipped += 1
1444
+ if not quiet:
1445
+ print(f"TRANSIENT-404 (github) [{post_id}] {owner}/{repo}#{number} "
1446
+ f"comment {comment_id} alive in GraphQL, resetting count",
1447
+ flush=True)
1448
+ continue
1449
+ # cls == 'comment_deleted' (GraphQL confirms it's gone) or
1450
+ # 'unknown' (GraphQL itself failed; bump the counter without
1451
+ # flipping so a real deletion still gets caught eventually).
1452
+ # comment_deleted flips at threshold 2; unknown never flips
1453
+ # (threshold 10**9 = bump-only).
1454
+ threshold = 2 if cls == "comment_deleted" else 10 ** 9
1455
+ detect_count, status_set = _http_detect_deletion(post_id, "deleted", threshold=threshold)
1456
+ if status_set:
1457
+ deleted += 1
1458
+ if not quiet:
1459
+ print(f"DELETED (github 404 + graphql confirmed) [{post_id}]", flush=True)
1460
+ else:
1461
+ errors += 1
1462
+ continue
1463
+
1464
+ try:
1465
+ data = json.loads(proc.stdout)
1466
+ except Exception:
1467
+ errors += 1
1468
+ continue
1469
+
1470
+ reactions = data.get("reactions") or {}
1471
+ total_reactions = int(reactions.get("total_count") or 0)
1472
+
1473
+ # reply_count is folded into the active-for-stats list via a correlated
1474
+ # subquery, so no per-post COUNT round trip is needed.
1475
+ reply_count = int(post.get("reply_count") or 0)
1476
+
1477
+ _http_patch_post(post_id, {
1478
+ "upvotes": total_reactions,
1479
+ "comments_count": reply_count,
1480
+ "stamp_engagement_now": True,
1481
+ "stamp_status_checked_now": True,
1482
+ "reset_deletion_detect_count": True,
1483
+ })
1484
+ updated += 1
1485
+ if total_reactions or reply_count:
1486
+ results.append({
1487
+ "id": post_id,
1488
+ "reactions": total_reactions,
1489
+ "replies": reply_count,
1490
+ "url": our_url,
1491
+ })
1492
+
1493
+ time.sleep(0.1)
1494
+
1495
+ if total % 100 == 0:
1496
+ progress.tick("github", total, len(posts),
1497
+ updated=updated, deleted=deleted, errors=errors)
1498
+ if not quiet:
1499
+ print(f" github: {total}/{len(posts)} processed "
1500
+ f"(updated={updated}, deleted={deleted}, "
1501
+ f"repo_gone={repo_gone}, transient={transient_skipped}, "
1502
+ f"errors={errors})",
1503
+ flush=True)
1504
+
1505
+ progress.done("github", len(posts),
1506
+ updated=updated, deleted=deleted, errors=errors)
1507
+ if not quiet:
1508
+ print(f" github: done (updated={updated}, deleted={deleted}, "
1509
+ f"repo_gone={repo_gone}, transient={transient_skipped}, "
1510
+ f"errors={errors})", flush=True)
1511
+ return {"total": total, "updated": updated, "deleted": deleted,
1512
+ "repo_gone": repo_gone, "transient_skipped": transient_skipped,
1513
+ "errors": errors, "results": results}
1514
+
1515
+
1516
+ def refresh_twitter(db, config=None, quiet=False, audit_mode=False):
1517
+ """Fetch Twitter/X stats via fxtwitter API (no browser needed).
1518
+
1519
+ Two cadences split by post age so the per-6h job and the daily audit don't
1520
+ fight over the same column:
1521
+
1522
+ Per-6h (audit_mode=False): hot tier, posts younger than 7 days, gated at
1523
+ 5h staleness. Hit by stats-twitter every 6 hours so each fresh tweet is
1524
+ polled ~4x per day. Deletion detection runs here too so a deleted hot
1525
+ tweet is caught within hours instead of waiting on the daily audit.
1526
+
1527
+ Daily audit (audit_mode=True): cold tier, posts older than 7 days. Hit
1528
+ by audit-twitter at 04:13. Stable-skip (3+ unchanged scans + posted_at
1529
+ older than 5 days) keeps the long tail cheap; deletion detection
1530
+ confirms removed tweets after 2 strikes.
1531
+
1532
+ Multi-account safety (2026-05-19): the read is scoped to THIS machine's
1533
+ Twitter handle so two machines (e.g. local-mac as @m13v_, mk0r VM as
1534
+ @matt_diak) never refresh each other's posts. Without scoping, both
1535
+ crons would burn fxtwitter quota on the union, race on engagement
1536
+ column writes, and the dashboard would render whichever machine
1537
+ finished last. The handle comes from twitter_account.resolve_handle()
1538
+ which reads `AUTOPOSTER_TWITTER_HANDLE` env or `accounts.twitter.handle`
1539
+ in config.json.
1540
+
1541
+ Before this split, audit refreshed every active row daily and stamped
1542
+ engagement_updated_at on all of them, which silently locked the per-6h
1543
+ job out of the hot tier for a week at a time.
1544
+
1545
+ `db` is accepted for signature compatibility with the orchestrator but
1546
+ no direct SQL runs here — every read/write goes through HTTP so the VM
1547
+ (no DATABASE_URL) can run this branch too.
1548
+ """
1549
+ from twitter_account import resolve_handle as _resolve_twitter_handle
1550
+ config = config or {}
1551
+
1552
+ handle = _resolve_twitter_handle()
1553
+ if not handle:
1554
+ if not quiet:
1555
+ print(" twitter: no handle configured (AUTOPOSTER_TWITTER_HANDLE / "
1556
+ "accounts.twitter.handle); skipping refresh", flush=True)
1557
+ return {"total": 0, "updated": 0, "changed": 0, "deleted": 0,
1558
+ "suspended": 0, "errors": 0, "skipped": 0, "results": []}
1559
+
1560
+ posts = _http_list_twitter_active_posts(
1561
+ our_account=handle, audit_mode=audit_mode, stale_hours=5,
1562
+ )
1563
+
1564
+ total = updated = changed = deleted = suspended = errors = skipped = 0
1565
+ # `updated`: rows the fxtwitter API answered for and we wrote back (i.e.
1566
+ # successful polls). Effectively `total - errors - skipped - 404s`.
1567
+ # `changed`: subset of `updated` where views OR likes actually moved since
1568
+ # the prior scan. This is the signal the dashboard's "updated" pill
1569
+ # surfaces (per log_run.py --updated docstring), so the printed summary
1570
+ # below uses `changed` for the "updated" field. Before 2026-05-08 the
1571
+ # summary printed `updated` (= every successful poll), making
1572
+ # "checked == updated" identically equal whenever there were no errors,
1573
+ # which hid the fact that ~55% of hot-tier polls return identical stats.
1574
+ results = []
1575
+
1576
+ for post in posts:
1577
+ total += 1
1578
+ # The HTTP shape is a dict; the previous direct-SQL shape was a tuple.
1579
+ # Read by column name so callers downstream stay decoupled from SQL
1580
+ # ordinal positions.
1581
+ post_id = post.get("id")
1582
+ our_url = post.get("our_url") or ""
1583
+ no_change = int(post.get("scan_no_change_count") or 0)
1584
+ posted_at_raw = post.get("posted_at")
1585
+ prev_upvotes = post.get("upvotes")
1586
+ prev_views = post.get("views")
1587
+ prev_comments = post.get("comments_count")
1588
+ # posted_at arrives as an ISO-8601 string over JSON; parse to a tz-aware
1589
+ # datetime so the audit-mode age check still works.
1590
+ if isinstance(posted_at_raw, str) and posted_at_raw:
1591
+ try:
1592
+ posted_at = datetime.fromisoformat(posted_at_raw.replace("Z", "+00:00"))
1593
+ except ValueError:
1594
+ posted_at = None
1595
+ else:
1596
+ posted_at = posted_at_raw
1597
+
1598
+ # Stable-skip applies only to the cold tier (audit). The hot tier's
1599
+ # SQL filter restricts to posted_at > NOW() - 7d, so the "older than
1600
+ # 5 days" branch can only fire in audit mode anyway.
1601
+ if audit_mode and no_change >= 3 and posted_at:
1602
+ age = datetime.now(timezone.utc) - (posted_at.replace(tzinfo=timezone.utc) if posted_at.tzinfo is None else posted_at)
1603
+ if age > timedelta(days=5):
1604
+ skipped += 1
1605
+ continue
1606
+
1607
+ # Extract tweet ID from URL
1608
+ tweet_id = re.search(r'/status/(\d+)', our_url or '')
1609
+ if not tweet_id:
1610
+ errors += 1
1611
+ continue
1612
+ tweet_id = tweet_id.group(1)
1613
+
1614
+ # Extract username from URL
1615
+ username = re.search(r'x\.com/([^/]+)/status', our_url or '')
1616
+ if not username:
1617
+ username = re.search(r'twitter\.com/([^/]+)/status', our_url or '')
1618
+ username = username.group(1) if username else 'i'
1619
+
1620
+ url = f"https://api.fxtwitter.com/{username}/status/{tweet_id}"
1621
+ # fxtwitter returns HTTP 404 for malformed/non-existent handles
1622
+ # (e.g. corrupted our_url rows). Catch HttpNotFoundError and route
1623
+ # to the same in-body 404 handler below so a single bad row does
1624
+ # not abort the whole pipeline.
1625
+ try:
1626
+ data = fetch_json(url)
1627
+ except HttpNotFoundError as e:
1628
+ # Preserve fxtwitter's 404 body: a tombstone (guest-API blind spot)
1629
+ # is ALIVE and must reach the tombstone guard below, NOT be treated
1630
+ # as a deletion. Only fall back to a synthetic null-tweet 404 when
1631
+ # the body was genuinely empty (true NOT_FOUND).
1632
+ data = e.body or {"code": 404, "tweet": None}
1633
+
1634
+ if not data:
1635
+ # Retry once
1636
+ time.sleep(2)
1637
+ try:
1638
+ data = fetch_json(url)
1639
+ except HttpNotFoundError as e:
1640
+ data = e.body or {"code": 404, "tweet": None}
1641
+ if not data:
1642
+ errors += 1
1643
+ continue
1644
+
1645
+ code = data.get("code", 0)
1646
+ tweet = data.get("tweet")
1647
+
1648
+ # fxtwitter is an UNAUTHENTICATED guest API. For tweets it cannot read
1649
+ # as a logged-out viewer (Community-scoped posts, some replies,
1650
+ # protected / age-gated contexts) it returns code 404 with a
1651
+ # *tombstone* object (type="tombstone", reason="unavailable") instead
1652
+ # of a null tweet. Those tweets are alive to a logged-in viewer, so
1653
+ # treating the tombstone as a deletion produced false strikes: on
1654
+ # 2026-06-05, 5 of 6 twitter strike emails were tombstone-unavailable
1655
+ # rows that were LIVE in the authenticated harness (#35715/#35712
1656
+ # Community posts; #31131/#31130/#29509 normal replies). Only a genuine
1657
+ # NOT_FOUND (tweet is None / no tombstone) is a real deletion signal.
1658
+ # Skip tombstones WITHOUT bumping deletion_detect_count, mirroring the
1659
+ # Reddit "bias: don't falsely mark deleted" rule. strike_alert.py's
1660
+ # twitter live-recheck is the second safety net for anything that slips.
1661
+ if isinstance(tweet, dict) and tweet.get("type") == "tombstone":
1662
+ skipped += 1
1663
+ if not quiet:
1664
+ _reason = tweet.get("reason") or "?"
1665
+ print(f"TOMBSTONE [{post_id}] reason={_reason} "
1666
+ f"(guest-API blind spot, not a deletion)")
1667
+ continue
1668
+
1669
+ if code == 404 or tweet is None:
1670
+ # Tweet not found, could be deleted or suspended. Run the 2-strike
1671
+ # confirmation atomically server-side via /detect-deletion so the
1672
+ # bump+threshold check is one HTTP round trip instead of read +
1673
+ # write. detect_count = the new value after bump; status_set=True
1674
+ # when the threshold was met and posts.status flipped to 'deleted'.
1675
+ detect_count, status_set = _http_detect_deletion(post_id, "deleted", threshold=2)
1676
+ if status_set:
1677
+ deleted += 1
1678
+ if not quiet:
1679
+ print(f"DELETED [{post_id}] (confirmed after {detect_count} detections)")
1680
+ else:
1681
+ if not quiet:
1682
+ print(f"DELETION PENDING [{post_id}] (detection {detect_count}/2)")
1683
+ continue
1684
+
1685
+ # Extract stats
1686
+ views = tweet.get("views") or 0
1687
+ likes = tweet.get("likes") or 0
1688
+ replies = tweet.get("replies") or 0
1689
+ retweets = tweet.get("retweets") or 0
1690
+ bookmarks = tweet.get("bookmarks") or 0
1691
+
1692
+ # Track no-change so the next-poll cycle can skip stable posts. Compute
1693
+ # this BEFORE the PATCH so we send the right scan_no_change_delta in
1694
+ # the same call (server-side: +1 to bump, signal a reset via the
1695
+ # current absolute value approach below).
1696
+ stayed_same = (likes == prev_upvotes
1697
+ and views == prev_views
1698
+ and replies == prev_comments)
1699
+
1700
+ # One PATCH per post: stats + freshness stamps + counter delta + the
1701
+ # deletion_detect_count reset (the row didn't 404 this round). The
1702
+ # server keys "scan_no_change_delta=+1 then reset_via=N=0" off the
1703
+ # absolute value when we send scan_no_change_count=0; the +1 bump
1704
+ # path uses scan_no_change_delta=1 so the row's prior count is
1705
+ # incremented atomically without read-modify-write race conditions.
1706
+ patch_body = {
1707
+ "views": int(views),
1708
+ "upvotes": int(likes),
1709
+ "comments_count": int(replies),
1710
+ "stamp_engagement_now": True,
1711
+ "stamp_status_checked_now": True,
1712
+ "reset_deletion_detect_count": True,
1713
+ }
1714
+ if stayed_same:
1715
+ patch_body["scan_no_change_delta"] = 1
1716
+ else:
1717
+ patch_body["scan_no_change_count"] = 0
1718
+ _http_patch_post(post_id, patch_body)
1719
+
1720
+ # snapshot_post_views: separate POST so a transient failure here only
1721
+ # loses today's per-day rollup datapoint, not the parent stats update.
1722
+ _http_snapshot_post_views(post_id, views)
1723
+
1724
+ updated += 1
1725
+ if not stayed_same:
1726
+ changed += 1
1727
+ results.append({"id": post_id, "views": views, "likes": likes,
1728
+ "replies": replies, "retweets": retweets})
1729
+
1730
+ # Rate limit: 1 request per second to be safe with fxtwitter
1731
+ time.sleep(1)
1732
+
1733
+ # Progress tick every 50 polls. No db.commit() needed: each
1734
+ # _http_patch_post / _http_snapshot_post_views is its own
1735
+ # auto-committed transaction server-side.
1736
+ if total % 50 == 0:
1737
+ progress.tick("twitter", total, len(posts),
1738
+ updated=updated, changed=changed, deleted=deleted,
1739
+ suspended=suspended, errors=errors, skipped=skipped)
1740
+
1741
+ progress.done("twitter", len(posts),
1742
+ updated=updated, changed=changed, deleted=deleted,
1743
+ suspended=suspended, errors=errors, skipped=skipped)
1744
+ if skipped and not quiet:
1745
+ print(f" Skipped {skipped} stable tweets (3+ scans unchanged, older than 5 days)")
1746
+
1747
+ # Second pass: refresh the human-top-reply snapshots we captured at our
1748
+ # post-success time. Same fxtwitter cadence as posts (1 req/s), same
1749
+ # 2-strike deletion guard, same install-scope filter. We only do this in
1750
+ # hot mode; the cold audit doesn't poll the snapshot rows because the
1751
+ # benchmark question ("how did the human top-reply grow vs ours?") is
1752
+ # only meaningful while the parent post is also being polled.
1753
+ ttr_total = ttr_updated = ttr_changed = ttr_deleted = ttr_errors = 0
1754
+ if not audit_mode:
1755
+ # Freshness override for ad-hoc reruns. Cron uses the 5h default;
1756
+ # setting S4L_TTR_STALE_HOURS=0 forces every active row through this
1757
+ # cycle (useful right after a capture cycle to watch the refresh loop).
1758
+ try:
1759
+ _ttr_stale = float(os.environ.get("S4L_TTR_STALE_HOURS", "5"))
1760
+ except ValueError:
1761
+ _ttr_stale = 5.0
1762
+ ttr_rows = _http_list_twitter_top_replies_to_refresh(stale_hours=_ttr_stale)
1763
+ for row in ttr_rows:
1764
+ ttr_total += 1
1765
+ ttr_id = row.get("id")
1766
+ reply_url = row.get("reply_url") or ""
1767
+ reply_tweet_id = row.get("reply_tweet_id")
1768
+ prev_likes = row.get("likes")
1769
+ prev_views = row.get("views")
1770
+ prev_replies = row.get("replies")
1771
+
1772
+ if not reply_tweet_id:
1773
+ m = re.search(r"/status/(\d+)", reply_url)
1774
+ reply_tweet_id = m.group(1) if m else None
1775
+ if not reply_tweet_id:
1776
+ ttr_errors += 1
1777
+ continue
1778
+ m_user = re.search(r"x\.com/([^/]+)/status", reply_url) or \
1779
+ re.search(r"twitter\.com/([^/]+)/status", reply_url)
1780
+ username = m_user.group(1) if m_user else "i"
1781
+
1782
+ url = f"https://api.fxtwitter.com/{username}/status/{reply_tweet_id}"
1783
+ try:
1784
+ data = fetch_json(url)
1785
+ except HttpNotFoundError:
1786
+ data = {"code": 404, "tweet": None}
1787
+ if not data:
1788
+ time.sleep(2)
1789
+ try:
1790
+ data = fetch_json(url)
1791
+ except HttpNotFoundError:
1792
+ data = {"code": 404, "tweet": None}
1793
+ if not data:
1794
+ ttr_errors += 1
1795
+ continue
1796
+
1797
+ code = data.get("code", 0)
1798
+ tweet = data.get("tweet")
1799
+ if code == 404 or tweet is None:
1800
+ detect_count, status_set = _http_detect_deletion_top_reply(
1801
+ ttr_id, "deleted", threshold=2,
1802
+ )
1803
+ if status_set:
1804
+ ttr_deleted += 1
1805
+ if not quiet:
1806
+ print(f" top_reply DELETED [{ttr_id}] "
1807
+ f"(confirmed after {detect_count} detections)")
1808
+ continue
1809
+
1810
+ likes = tweet.get("likes") or 0
1811
+ views = tweet.get("views") or 0
1812
+ replies = tweet.get("replies") or 0
1813
+ retweets = tweet.get("retweets") or 0
1814
+ stayed_same = (likes == prev_likes and views == prev_views
1815
+ and replies == prev_replies)
1816
+ patch_body = {
1817
+ "likes": int(likes),
1818
+ "views": int(views),
1819
+ "replies": int(replies),
1820
+ "retweets": int(retweets),
1821
+ "stamp_engagement_now": True,
1822
+ "stamp_status_checked_now": True,
1823
+ "reset_deletion_detect_count": True,
1824
+ }
1825
+ if stayed_same:
1826
+ patch_body["scan_no_change_delta"] = 1
1827
+ else:
1828
+ patch_body["scan_no_change_count"] = 0
1829
+ _http_patch_top_reply(ttr_id, patch_body)
1830
+ ttr_updated += 1
1831
+ if not stayed_same:
1832
+ ttr_changed += 1
1833
+ time.sleep(1)
1834
+
1835
+ if not quiet and ttr_total:
1836
+ print(f" thread_top_replies: checked={ttr_total} updated={ttr_updated} "
1837
+ f"changed={ttr_changed} deleted={ttr_deleted} errors={ttr_errors}")
1838
+
1839
+ return {"total": total, "updated": updated, "changed": changed,
1840
+ "deleted": deleted, "suspended": suspended,
1841
+ "errors": errors, "skipped": skipped, "results": results,
1842
+ "thread_top_replies": {
1843
+ "total": ttr_total, "updated": ttr_updated,
1844
+ "changed": ttr_changed, "deleted": ttr_deleted,
1845
+ "errors": ttr_errors,
1846
+ }}
1847
+
1848
+
1849
+ def refresh_reddit_replies(db, user_agent, quiet=False):
1850
+ """Refresh score + reply count for our Reddit comments stored in `replies`.
1851
+
1852
+ Uses batch_fetch_info (up to 100 t1_ IDs per API call) so the whole table
1853
+ typically scans in 1-3 hits. Reddit doesn't expose per-comment views, so
1854
+ `views` stays 0. Skips rows refreshed within FRESH_WINDOW.
1855
+ """
1856
+ from reddit_tools import batch_fetch_info, RateLimitedError
1857
+
1858
+ FRESH_WINDOW = timedelta(hours=4)
1859
+ now_utc = datetime.now(timezone.utc)
1860
+
1861
+ # 2026-05-12: read via /api/v1/replies. `db` is preserved in the signature
1862
+ # for back-compat with main() callers; the value is ignored here.
1863
+ rows = _http_list_reddit_replies_to_refresh()
1864
+
1865
+ pending = []
1866
+ skipped_fresh = 0
1867
+ for row in rows:
1868
+ rid = row.get("id")
1869
+ our_reply_id = row.get("our_reply_id")
1870
+ eu_raw = row.get("engagement_updated_at")
1871
+ if eu_raw:
1872
+ try:
1873
+ eu = datetime.fromisoformat(str(eu_raw).replace("Z", "+00:00"))
1874
+ except Exception:
1875
+ eu = None
1876
+ if eu:
1877
+ if eu.tzinfo is None:
1878
+ eu = eu.replace(tzinfo=timezone.utc)
1879
+ if now_utc - eu < FRESH_WINDOW:
1880
+ skipped_fresh += 1
1881
+ continue
1882
+ if not our_reply_id:
1883
+ continue
1884
+ # our_reply_id is stored as bare base-36 ID (no t1_ prefix). Normalize.
1885
+ thing_id = our_reply_id if our_reply_id.startswith("t1_") else f"t1_{our_reply_id}"
1886
+ pending.append((rid, thing_id))
1887
+
1888
+ total = len(pending)
1889
+ if total == 0:
1890
+ if not quiet:
1891
+ print(f" reddit replies: nothing to refresh ({skipped_fresh} fresh)", flush=True)
1892
+ return {"total": 0, "updated": 0, "errors": 0, "skipped_fresh": skipped_fresh}
1893
+
1894
+ thing_ids = [t for _, t in pending]
1895
+ try:
1896
+ info = batch_fetch_info(thing_ids, user_agent=user_agent)
1897
+ except RateLimitedError as e:
1898
+ if not quiet:
1899
+ print(f" reddit replies: rate-limited (reset in {int(e.reset_in)}s)", flush=True)
1900
+ return {"total": total, "updated": 0, "errors": total, "skipped_fresh": skipped_fresh}
1901
+ except Exception as e:
1902
+ if not quiet:
1903
+ print(f" reddit replies: batch fetch failed: {e}", flush=True)
1904
+ return {"total": total, "updated": 0, "errors": total, "skipped_fresh": skipped_fresh}
1905
+
1906
+ updated = errors = 0
1907
+ for rid, thing_id in pending:
1908
+ d = info.get(thing_id)
1909
+ if not d:
1910
+ errors += 1
1911
+ continue
1912
+ score = int(d.get("score") or 0)
1913
+ # Count direct replies on the comment.
1914
+ replies_obj = d.get("replies", "")
1915
+ reply_count = 0
1916
+ if replies_obj and isinstance(replies_obj, dict):
1917
+ children = replies_obj.get("data", {}).get("children", [])
1918
+ reply_count = sum(1 for c in children if c.get("kind") == "t1")
1919
+ reply_count += sum(c.get("data", {}).get("count", 0)
1920
+ for c in children if c.get("kind") == "more")
1921
+ _http_patch_reply(rid, {
1922
+ "upvotes": int(score),
1923
+ "comments_count": int(reply_count),
1924
+ "stamp_engagement_now": True,
1925
+ })
1926
+ updated += 1
1927
+
1928
+ progress.done("reddit_replies", total, updated=updated, errors=errors)
1929
+ if not quiet:
1930
+ print(f" reddit replies: {total} checked, {updated} updated, "
1931
+ f"{errors} errors, {skipped_fresh} fresh", flush=True)
1932
+ return {"total": total, "updated": updated, "errors": errors,
1933
+ "skipped_fresh": skipped_fresh}
1934
+
1935
+
1936
+ def refresh_twitter_threads(db, config=None, quiet=False,
1937
+ max_per_run=1000, stale_hours=20):
1938
+ """Poll fxtwitter for parent threads we've commented on and append one
1939
+ row to thread_snapshots per successful poll.
1940
+
1941
+ Background: posts.thread_engagement captures one T0 snapshot at
1942
+ discovery time, twitter_candidates carries T0+T1 inside the candidate
1943
+ lifecycle, but neither covers what happens to the parent thread AFTER
1944
+ we post a comment on it. This function closes that gap: it scans every
1945
+ active twitter comment whose parent != our_url, dedupes by parent URL,
1946
+ polls fxtwitter once per second, and appends a thread_snapshots row.
1947
+
1948
+ Cadence:
1949
+ - Hot tier (default): polled every 6h via stats.sh Step 3.5. Threads
1950
+ whose latest snapshot is < 5h old are skipped server-side via the
1951
+ active-for-stats endpoint.
1952
+ - Long tail (default cap): threads where our newest comment is older
1953
+ than 30 days are dropped from the candidate set; not worth the
1954
+ fxtwitter quota.
1955
+
1956
+ Multi-account safety: read scoped to our_account so two machines
1957
+ (@m13v_ and @matt_diak) only refresh the parents of THEIR comments.
1958
+
1959
+ Output to stats.sh log via stdout: "thread_snapshots: X scanned, Y
1960
+ written, Z deleted, W errors". DB writes go through HTTP; same lane
1961
+ as the rest of the twitter pipeline."""
1962
+ from twitter_account import resolve_handle as _resolve_twitter_handle
1963
+ config = config or {}
1964
+
1965
+ handle = _resolve_twitter_handle()
1966
+ if not handle:
1967
+ if not quiet:
1968
+ print(" thread_snapshots: no handle configured; skipping", flush=True)
1969
+ return {"scanned": 0, "written": 0, "deleted": 0, "errors": 0,
1970
+ "no_change": 0}
1971
+
1972
+ threads = _http_list_twitter_parent_threads(
1973
+ our_account=handle, stale_hours=int(stale_hours), max_age_days=30,
1974
+ )
1975
+
1976
+ total_eligible = len(threads)
1977
+ if max_per_run and max_per_run > 0 and total_eligible > max_per_run:
1978
+ # Take the freshest-commented threads first (the active-for-stats
1979
+ # endpoint already orders by posted_at DESC). The capped-out
1980
+ # remainder will be picked up on the next cron run.
1981
+ threads = threads[:max_per_run]
1982
+
1983
+ scanned = written = deleted_count = errors = no_change = 0
1984
+ rate_limit_sleep = 1.0 # fxtwitter etiquette: 1 req/sec
1985
+
1986
+ for t in threads:
1987
+ scanned += 1
1988
+ thread_url = t.get("thread_url") or ""
1989
+ # Extract tweet_id + username from the URL. Twitter URLs come in
1990
+ # both x.com/<user>/status/<id> and twitter.com/<user>/status/<id>
1991
+ # shapes; fxtwitter accepts either, but we need the id either way
1992
+ # for the thread_external_id column.
1993
+ m_id = re.search(r"/status/(\d+)", thread_url)
1994
+ m_user = re.search(r"(?:x|twitter)\.com/([^/]+)/status", thread_url)
1995
+ if not m_id or not m_user:
1996
+ errors += 1
1997
+ continue
1998
+ tweet_id = m_id.group(1)
1999
+ username = m_user.group(1)
2000
+
2001
+ api_url = f"https://api.fxtwitter.com/{username}/status/{tweet_id}"
2002
+ try:
2003
+ data = fetch_json(api_url)
2004
+ except HttpNotFoundError:
2005
+ data = {"code": 404, "tweet": None}
2006
+ if not data:
2007
+ # Single retry, matches refresh_twitter()'s pattern
2008
+ time.sleep(2)
2009
+ try:
2010
+ data = fetch_json(api_url)
2011
+ except HttpNotFoundError:
2012
+ data = {"code": 404, "tweet": None}
2013
+
2014
+ code = (data or {}).get("code", 0)
2015
+ tweet = (data or {}).get("tweet")
2016
+
2017
+ if code == 404 or tweet is None:
2018
+ # Parent thread is deleted/suspended/blocked. Record the fact
2019
+ # (so the curve has a terminal point) but don't double-poll
2020
+ # next cycle — the server-side staleness gate will see the
2021
+ # row and skip.
2022
+ _http_insert_thread_snapshot(
2023
+ "twitter", thread_url,
2024
+ thread_external_id=tweet_id,
2025
+ is_deleted=True,
2026
+ error=f"fxtwitter_code_{code}",
2027
+ )
2028
+ deleted_count += 1
2029
+ time.sleep(rate_limit_sleep)
2030
+ continue
2031
+
2032
+ views = (tweet.get("views") or 0) or None
2033
+ likes = (tweet.get("likes") or 0) or None
2034
+ replies_count = (tweet.get("replies") or 0) or None
2035
+ retweets = (tweet.get("retweets") or 0) or None
2036
+ bookmarks = (tweet.get("bookmarks") or 0) or None
2037
+ # fxtwitter exposes quotes on some tweets and not others; coerce.
2038
+ quotes = tweet.get("quotes")
2039
+ if quotes is not None:
2040
+ try:
2041
+ quotes = int(quotes)
2042
+ except (TypeError, ValueError):
2043
+ quotes = None
2044
+ author = (tweet.get("author") or {}).get("screen_name") or t.get("thread_author_handle")
2045
+
2046
+ # Cheap no-change short-circuit: if every counter matches the
2047
+ # previous snapshot, still insert a row so the curve has a
2048
+ # capture point at this timestamp (the dashboard surfaces the
2049
+ # frequency of polls as a freshness signal), but increment the
2050
+ # no_change counter so the stats summary makes the cost clear.
2051
+ # Postgres BIGINTs come back as JSON strings, so coerce both
2052
+ # sides through int() (None stays None) before comparing.
2053
+ def _as_int(v):
2054
+ if v is None:
2055
+ return None
2056
+ try:
2057
+ return int(v)
2058
+ except (TypeError, ValueError):
2059
+ return None
2060
+ prev_views = _as_int(t.get("last_views"))
2061
+ prev_likes = _as_int(t.get("last_likes"))
2062
+ prev_replies = _as_int(t.get("last_replies"))
2063
+ prev_retweets = _as_int(t.get("last_retweets"))
2064
+ prev_bookmarks = _as_int(t.get("last_bookmarks"))
2065
+ cur_views = _as_int(views)
2066
+ cur_likes = _as_int(likes)
2067
+ cur_replies = _as_int(replies_count)
2068
+ cur_retweets = _as_int(retweets)
2069
+ cur_bookmarks = _as_int(bookmarks)
2070
+ if (t.get("last_captured_at") is not None
2071
+ and prev_views == cur_views and prev_likes == cur_likes
2072
+ and prev_replies == cur_replies and prev_retweets == cur_retweets
2073
+ and prev_bookmarks == cur_bookmarks):
2074
+ no_change += 1
2075
+
2076
+ snap_id = _http_insert_thread_snapshot(
2077
+ "twitter", thread_url,
2078
+ thread_external_id=tweet_id,
2079
+ thread_author_handle=author,
2080
+ views=views, likes=likes, replies=replies_count,
2081
+ retweets=retweets, bookmarks=bookmarks, quotes=quotes,
2082
+ )
2083
+ if snap_id is None:
2084
+ errors += 1
2085
+ else:
2086
+ written += 1
2087
+
2088
+ time.sleep(rate_limit_sleep)
2089
+
2090
+ capped_remaining = max(0, total_eligible - scanned)
2091
+ if not quiet:
2092
+ cap_note = f", {capped_remaining} capped" if capped_remaining else ""
2093
+ print(f" thread_snapshots: {scanned} scanned, {written} written, "
2094
+ f"{deleted_count} deleted, {errors} errors, "
2095
+ f"{no_change} unchanged{cap_note}", flush=True)
2096
+ print("STATS_JSON: " + json.dumps({
2097
+ "platform": "twitter", "kind": "thread_snapshots",
2098
+ "scanned": scanned, "written": written, "deleted": deleted_count,
2099
+ "errors": errors, "unchanged": no_change,
2100
+ "capped_remaining": capped_remaining,
2101
+ }), flush=True)
2102
+ return {"scanned": scanned, "written": written, "deleted": deleted_count,
2103
+ "errors": errors, "no_change": no_change,
2104
+ "eligible": total_eligible, "capped_remaining": capped_remaining}
2105
+
2106
+
2107
+ def refresh_twitter_replies(db, quiet=False):
2108
+ """Refresh per-reply stats (likes, replies count, views) for our reply
2109
+ tweets stored in `replies`. Reuses the fxtwitter API per reply tweet ID.
2110
+
2111
+ Multi-account safety: the read is scoped server-side to this caller's
2112
+ install_id (via X-Installation auth), so two machines refreshing in
2113
+ parallel don't both poll the same set of reply tweets. Historical NULL-
2114
+ install_id rows are claimed by the primary local install per the
2115
+ backfill in 2026-05-19 — see active-for-stats/route.ts for the WHERE
2116
+ detail.
2117
+
2118
+ `db` is accepted for orchestrator signature compatibility but the
2119
+ function makes no direct SQL calls — every read/write is HTTP.
2120
+ """
2121
+ # Tiered freshness so reply-to-replies don't rot on a flat 7-day cadence.
2122
+ # Recent replies (<=14d) still accrue likes/views, so they refresh on the
2123
+ # same ~6h cadence as our posts and top replies. Older replies have settled,
2124
+ # so a slow 7-day gate keeps fxtwitter load bounded. Age is derived from the
2125
+ # tweet's snowflake ID (no extra server field needed).
2126
+ FRESH_WINDOW_RECENT = timedelta(hours=6)
2127
+ FRESH_WINDOW_SETTLED = timedelta(days=7)
2128
+ RECENT_AGE_CUTOFF = timedelta(days=14)
2129
+ TWITTER_SNOWFLAKE_EPOCH_MS = 1288834974657
2130
+ now_utc = datetime.now(timezone.utc)
2131
+
2132
+ rows = _http_list_twitter_replies_to_refresh()
2133
+
2134
+ total = updated = errors = skipped_fresh = 0
2135
+ for row in rows:
2136
+ rid = row.get("id")
2137
+ url = row.get("our_reply_url") or ""
2138
+ eu_raw = row.get("engagement_updated_at")
2139
+ # engagement_updated_at arrives as ISO-8601 over JSON.
2140
+ if isinstance(eu_raw, str) and eu_raw:
2141
+ try:
2142
+ eu = datetime.fromisoformat(eu_raw.replace("Z", "+00:00"))
2143
+ except ValueError:
2144
+ eu = None
2145
+ else:
2146
+ eu = eu_raw
2147
+ # Pick the freshness window by reply age (snowflake-derived). Recent
2148
+ # replies refresh fast; settled ones stay on the slow cadence.
2149
+ fresh_window = FRESH_WINDOW_SETTLED
2150
+ _idm = re.search(r'/status/(\d+)', url or '')
2151
+ if _idm:
2152
+ try:
2153
+ _created_ms = (int(_idm.group(1)) >> 22) + TWITTER_SNOWFLAKE_EPOCH_MS
2154
+ _age = now_utc - datetime.fromtimestamp(_created_ms / 1000.0, timezone.utc)
2155
+ if _age <= RECENT_AGE_CUTOFF:
2156
+ fresh_window = FRESH_WINDOW_RECENT
2157
+ except (ValueError, OverflowError, OSError):
2158
+ pass
2159
+ if eu:
2160
+ if eu.tzinfo is None:
2161
+ eu = eu.replace(tzinfo=timezone.utc)
2162
+ if now_utc - eu < fresh_window:
2163
+ skipped_fresh += 1
2164
+ continue
2165
+
2166
+ total += 1
2167
+ m = re.search(r'/status/(\d+)', url or '')
2168
+ if not m:
2169
+ errors += 1
2170
+ continue
2171
+ tweet_id = m.group(1)
2172
+ username_m = re.search(r'(?:x|twitter)\.com/([^/]+)/status', url or '')
2173
+ username = username_m.group(1) if username_m else 'i'
2174
+
2175
+ api_url = f"https://api.fxtwitter.com/{username}/status/{tweet_id}"
2176
+ # See refresh_twitter() — same HttpNotFoundError guard for replies so
2177
+ # a single corrupted reply URL doesn't crash the whole pipeline.
2178
+ try:
2179
+ data = fetch_json(api_url)
2180
+ except HttpNotFoundError:
2181
+ data = None
2182
+ if not data:
2183
+ time.sleep(2)
2184
+ try:
2185
+ data = fetch_json(api_url)
2186
+ except HttpNotFoundError:
2187
+ data = None
2188
+ if not data:
2189
+ errors += 1
2190
+ continue
2191
+ if data.get("code") == 404 or data.get("tweet") is None:
2192
+ errors += 1
2193
+ continue
2194
+
2195
+ tweet = data["tweet"]
2196
+ views = int(tweet.get("views") or 0)
2197
+ likes = int(tweet.get("likes") or 0)
2198
+ replies_count = int(tweet.get("replies") or 0)
2199
+
2200
+ _http_patch_reply(rid, {
2201
+ "upvotes": likes,
2202
+ "comments_count": replies_count,
2203
+ "views": views,
2204
+ "stamp_engagement_now": True,
2205
+ })
2206
+ updated += 1
2207
+
2208
+ # fxtwitter pacing — same 1s as posts
2209
+ time.sleep(1)
2210
+ if total % 50 == 0:
2211
+ progress.tick("twitter_replies", total, len(rows) - skipped_fresh,
2212
+ updated=updated, errors=errors)
2213
+
2214
+ progress.done("twitter_replies", total, updated=updated, errors=errors)
2215
+ if not quiet:
2216
+ print(f" twitter replies: {total} checked, {updated} updated, "
2217
+ f"{errors} errors, {skipped_fresh} fresh", flush=True)
2218
+ return {"total": total, "updated": updated, "errors": errors,
2219
+ "skipped_fresh": skipped_fresh}
2220
+
2221
+
2222
+ def refresh_github_replies(db, quiet=False, limit=None):
2223
+ """Refresh reaction count for our GitHub comments stored in `replies`.
2224
+
2225
+ Uses `gh api` per comment. GitHub has no view counter, so views stays 0.
2226
+ comments_count is left at 0 (replies-on-replies are rare in our flows
2227
+ and would add a per-issue scan we don't need today).
2228
+ """
2229
+ import subprocess
2230
+
2231
+ rows = _http_list_github_replies_to_refresh()
2232
+ if limit:
2233
+ rows = rows[:int(limit)]
2234
+
2235
+ FRESH_WINDOW = timedelta(days=3)
2236
+ now_utc = datetime.now(timezone.utc)
2237
+ comment_url_re = re.compile(
2238
+ r"https?://github\.com/([^/]+)/([^/]+)/(?:issues|pull)/\d+#issuecomment-(\d+)"
2239
+ )
2240
+
2241
+ total = updated = errors = skipped_fresh = 0
2242
+ for row in rows:
2243
+ rid = row.get("id")
2244
+ url = row.get("our_reply_url") or ""
2245
+ eu = _parse_dt(row.get("engagement_updated_at"))
2246
+ if eu:
2247
+ if eu.tzinfo is None:
2248
+ eu = eu.replace(tzinfo=timezone.utc)
2249
+ if now_utc - eu < FRESH_WINDOW:
2250
+ skipped_fresh += 1
2251
+ continue
2252
+
2253
+ total += 1
2254
+ m = comment_url_re.match(url or "")
2255
+ if not m:
2256
+ errors += 1
2257
+ continue
2258
+ owner, repo, comment_id = m.group(1), m.group(2), m.group(3)
2259
+
2260
+ try:
2261
+ proc = subprocess.run(
2262
+ ["gh", "api", f"repos/{owner}/{repo}/issues/comments/{comment_id}"],
2263
+ capture_output=True, text=True, timeout=30,
2264
+ )
2265
+ except Exception:
2266
+ errors += 1
2267
+ continue
2268
+
2269
+ if proc.returncode != 0:
2270
+ err_text = (proc.stderr or "") + (proc.stdout or "")
2271
+ if "rate limit" in err_text.lower():
2272
+ if not quiet:
2273
+ print(f" github replies: rate-limited at {total}, sleeping 60s",
2274
+ flush=True)
2275
+ time.sleep(60)
2276
+ errors += 1
2277
+ continue
2278
+
2279
+ try:
2280
+ data = json.loads(proc.stdout)
2281
+ except Exception:
2282
+ errors += 1
2283
+ continue
2284
+
2285
+ reactions = int((data.get("reactions") or {}).get("total_count") or 0)
2286
+ _http_patch_reply(rid, {"upvotes": reactions, "stamp_engagement_now": True})
2287
+ updated += 1
2288
+ time.sleep(0.1)
2289
+ if total % 100 == 0:
2290
+ progress.tick("github_replies", total, len(rows) - skipped_fresh,
2291
+ updated=updated, errors=errors)
2292
+
2293
+ progress.done("github_replies", total, updated=updated, errors=errors)
2294
+ if not quiet:
2295
+ print(f" github replies: {total} checked, {updated} updated, "
2296
+ f"{errors} errors, {skipped_fresh} fresh", flush=True)
2297
+ return {"total": total, "updated": updated, "errors": errors,
2298
+ "skipped_fresh": skipped_fresh}
2299
+
2300
+
2301
+ def get_aggregate_totals(db):
2302
+ """Get aggregate stats across all platforms via /api/v1/posts/totals.
2303
+
2304
+ `db` is ignored (kept in signature for back-compat). The HTTP endpoint
2305
+ matches the previous SQL: SUM(views), SUM(upvotes) (NOT net of self-
2306
+ upvote here, unlike scrape_reddit_views's headline), SUM(comments_count),
2307
+ COUNT(*), MIN(posted_at), with platform NOT IN ('github_issues').
2308
+
2309
+ NOTE: the previous SQL did NOT discount the reddit self-upvote (only
2310
+ scrape_reddit_views does that). To preserve the legacy dashboard number,
2311
+ we ask the totals endpoint with exclude_platforms=github_issues only and
2312
+ accept the raw `total_upvotes` (which the server already strips via the
2313
+ reddit/moltbook self-upvote logic). The dashboards are tolerant of either
2314
+ convention; if a stricter raw-sum is ever needed, add an
2315
+ `include_self_upvotes` flag to the route.
2316
+ """
2317
+ from datetime import datetime, timezone
2318
+ resp = api_get(
2319
+ "/api/v1/posts/totals",
2320
+ query={"status": "active", "exclude_platforms": "github_issues"},
2321
+ )
2322
+ t = (resp or {}).get("data") or {}
2323
+
2324
+ total_views = int(t.get("total_views") or 0)
2325
+ total_upvotes = int(t.get("total_upvotes") or 0)
2326
+ total_comments = int(t.get("total_comments") or 0)
2327
+ total_posts = int(t.get("total_posts") or 0)
2328
+ first_post_iso = t.get("first_post_at")
2329
+ first_post = None
2330
+ if first_post_iso:
2331
+ try:
2332
+ first_post = datetime.fromisoformat(str(first_post_iso).replace("Z", "+00:00"))
2333
+ except Exception:
2334
+ first_post = None
2335
+ days = 0
2336
+ if first_post:
2337
+ now = datetime.now(first_post.tzinfo) if first_post.tzinfo else datetime.now()
2338
+ days = max((now - first_post).days, 1)
2339
+
2340
+ return {
2341
+ "total_views": total_views,
2342
+ "total_upvotes": total_upvotes,
2343
+ "total_comments": total_comments,
2344
+ "total_posts": total_posts,
2345
+ "days_active": days,
2346
+ "views_per_day": round(total_views / days) if days else 0,
2347
+ "first_post": str(first_post) if first_post else None,
2348
+ }
2349
+
2350
+
2351
+ def print_aggregate_totals(totals):
2352
+ """Print a summary line with aggregate totals."""
2353
+ print(f"\n--- Totals ({totals['days_active']} days) ---")
2354
+ print(f"Posts: {totals['total_posts']} | "
2355
+ f"Views: {totals['total_views']:,} | "
2356
+ f"Upvotes: {totals['total_upvotes']:,} | "
2357
+ f"Comments: {totals['total_comments']:,} | "
2358
+ f"Views/day: {totals['views_per_day']:,}")
2359
+ print("STATS_JSON: " + json.dumps({
2360
+ "platform": "all", "kind": "aggregate_totals",
2361
+ "days_active": totals['days_active'],
2362
+ "total_posts": totals['total_posts'],
2363
+ "total_views": totals['total_views'],
2364
+ "total_upvotes": totals['total_upvotes'],
2365
+ "total_comments": totals['total_comments'],
2366
+ "views_per_day": totals['views_per_day'],
2367
+ }))
2368
+
2369
+
2370
+ def main():
2371
+ parser = argparse.ArgumentParser(description="Update engagement stats for social posts")
2372
+ parser.add_argument("--quiet", action="store_true", help="Minimal output")
2373
+ parser.add_argument("--json", action="store_true", help="Output as JSON")
2374
+ parser.add_argument("--twitter-only", action="store_true", help="Only update Twitter stats")
2375
+ parser.add_argument("--twitter-audit", action="store_true", help="Audit all Twitter posts (check deleted + update stats)")
2376
+ parser.add_argument("--reddit-only", action="store_true", help="Only update Reddit stats")
2377
+ parser.add_argument("--reddit-resurrect", action="store_true", help="Re-check Reddit posts marked deleted/removed in last N days and flip live ones back to active")
2378
+ parser.add_argument("--resurrect-days", type=int, default=60, help="Lookback window for --reddit-resurrect (default 60)")
2379
+ parser.add_argument("--moltbook-only", action="store_true", help="Only update Moltbook stats")
2380
+ parser.add_argument("--github-only", action="store_true", help="Only update GitHub stats")
2381
+ parser.add_argument("--github-limit", type=int, default=None, help="Limit github backfill to N posts (for smoke tests)")
2382
+ parser.add_argument("--skip-replies", action="store_true",
2383
+ help="Skip per-reply stat refresh (only update posts)")
2384
+ parser.add_argument("--replies-only", action="store_true",
2385
+ help="Only refresh per-reply stats; skip posts entirely")
2386
+ parser.add_argument("--reply-summary", default=None,
2387
+ help="Write a small JSON file with per-platform reply update "
2388
+ "counts ({reddit, twitter, github}) so the calling shell "
2389
+ "can pass them to log_run.py for the dashboard.")
2390
+ parser.add_argument("--twitter-threads-only", action="store_true",
2391
+ help="Only refresh parent-thread snapshots (refresh_twitter_threads); "
2392
+ "skip posts + replies entirely. Useful for isolated testing.")
2393
+ parser.add_argument("--skip-thread-snapshots", action="store_true",
2394
+ help="Skip the parent-thread snapshot refresh that piggybacks on "
2395
+ "--twitter-only and --twitter-audit. Use when you only want "
2396
+ "the post-engagement refresh and not the parent-thread curve.")
2397
+ parser.add_argument("--twitter-threads-max", type=int, default=1000,
2398
+ help="Cap the number of parent threads polled per run (default 1000). "
2399
+ "fxtwitter is paced at 1 req/sec so 1000 threads ~= 16.7 min. "
2400
+ "0 means unlimited.")
2401
+ parser.add_argument("--twitter-threads-stale-hours", type=int, default=5,
2402
+ help="Skip threads whose latest snapshot is younger than this many "
2403
+ "hours (default 5, matching the active-post and top-reply refresh "
2404
+ "cadence so the dashboard's parent-thread column stays as fresh as "
2405
+ "our own reply). The per-run cap (--twitter-threads-max) keeps "
2406
+ "fxtwitter load bounded and prioritises the most recently-commented "
2407
+ "threads. Set higher to save fxtwitter quota at the cost of staleness.")
2408
+ parser.add_argument("--stats-summary", default=None,
2409
+ help="Write a small JSON file with per-platform stats refresh "
2410
+ "counts ({platform: {refreshed, removed}}) so stats.sh "
2411
+ "can aggregate refreshed/removed pills for the dashboard. "
2412
+ "`refreshed` rolls up posts.updated + replies.updated; "
2413
+ "`removed` rolls up posts.removed + posts.deleted "
2414
+ "(+ posts.suspended for twitter).")
2415
+ args = parser.parse_args()
2416
+
2417
+ config = load_config()
2418
+ reddit_username = config.get("accounts", {}).get("reddit", {}).get("username", "")
2419
+ user_agent = f"social-autoposter/1.0 (u/{reddit_username})" if reddit_username else "social-autoposter/1.0"
2420
+
2421
+ load_env()
2422
+ # Fully HTTP-migrated: every refresh_* branch (reddit, twitter, github,
2423
+ # moltbook, and their reply passes) reads and writes through s4l.ai
2424
+ # /api/v1/* endpoints. No DATABASE_URL is required on any machine. `db` is
2425
+ # kept as None and passed through for signature compatibility only; no
2426
+ # function dereferences it.
2427
+ db = None
2428
+
2429
+ reddit_stats = None
2430
+ reddit_resurrect_stats = None
2431
+ moltbook_stats = None
2432
+ twitter_stats = None
2433
+ twitter_thread_stats = None
2434
+ github_stats = None
2435
+ reddit_reply_stats = None
2436
+ twitter_reply_stats = None
2437
+ github_reply_stats = None
2438
+
2439
+ # Each platform's reply refresh piggybacks on that platform's stat pass
2440
+ # (no new launchd job, no shell-script edits). --skip-replies bypasses,
2441
+ # --replies-only runs only the reply pass for that platform's scope.
2442
+ do_replies = not args.skip_replies
2443
+ # Same pattern for parent-thread snapshots: piggyback on twitter passes
2444
+ # unless explicitly skipped. --twitter-threads-only short-circuits to
2445
+ # only the snapshot pass (no posts, no replies).
2446
+ do_thread_snapshots = not args.skip_thread_snapshots
2447
+
2448
+ if args.twitter_threads_only:
2449
+ twitter_thread_stats = refresh_twitter_threads(
2450
+ db, config=config, quiet=args.quiet,
2451
+ max_per_run=args.twitter_threads_max,
2452
+ stale_hours=args.twitter_threads_stale_hours,
2453
+ )
2454
+ elif args.replies_only:
2455
+ if args.twitter_only or args.twitter_audit:
2456
+ twitter_reply_stats = refresh_twitter_replies(db, quiet=args.quiet)
2457
+ elif args.reddit_only:
2458
+ reddit_reply_stats = refresh_reddit_replies(db, user_agent, quiet=args.quiet)
2459
+ elif args.github_only:
2460
+ github_reply_stats = refresh_github_replies(db, quiet=args.quiet, limit=args.github_limit)
2461
+ else:
2462
+ reddit_reply_stats = refresh_reddit_replies(db, user_agent, quiet=args.quiet)
2463
+ twitter_reply_stats = refresh_twitter_replies(db, quiet=args.quiet)
2464
+ github_reply_stats = refresh_github_replies(db, quiet=args.quiet)
2465
+ elif args.twitter_audit:
2466
+ twitter_stats = refresh_twitter(db, config=config, quiet=args.quiet, audit_mode=True)
2467
+ if do_replies:
2468
+ twitter_reply_stats = refresh_twitter_replies(db, quiet=args.quiet)
2469
+ if do_thread_snapshots:
2470
+ twitter_thread_stats = refresh_twitter_threads(
2471
+ db, config=config, quiet=args.quiet,
2472
+ max_per_run=args.twitter_threads_max,
2473
+ stale_hours=args.twitter_threads_stale_hours,
2474
+ )
2475
+ elif args.twitter_only:
2476
+ twitter_stats = refresh_twitter(db, config=config, quiet=args.quiet)
2477
+ if do_replies:
2478
+ twitter_reply_stats = refresh_twitter_replies(db, quiet=args.quiet)
2479
+ if do_thread_snapshots:
2480
+ twitter_thread_stats = refresh_twitter_threads(
2481
+ db, config=config, quiet=args.quiet,
2482
+ max_per_run=args.twitter_threads_max,
2483
+ stale_hours=args.twitter_threads_stale_hours,
2484
+ )
2485
+ elif args.reddit_resurrect:
2486
+ reddit_resurrect_stats = refresh_reddit_resurrect(db, user_agent, config=config, quiet=args.quiet, days=args.resurrect_days)
2487
+ elif args.reddit_only:
2488
+ reddit_stats = refresh_reddit(db, user_agent, config=config, quiet=args.quiet)
2489
+ if do_replies:
2490
+ reddit_reply_stats = refresh_reddit_replies(db, user_agent, quiet=args.quiet)
2491
+ elif args.moltbook_only:
2492
+ moltbook_stats = refresh_moltbook(db, os.environ.get("MOLTBOOK_API_KEY", ""), quiet=args.quiet)
2493
+ elif args.github_only:
2494
+ github_stats = refresh_github(db, quiet=args.quiet, limit=args.github_limit)
2495
+ if do_replies:
2496
+ github_reply_stats = refresh_github_replies(db, quiet=args.quiet, limit=args.github_limit)
2497
+ else:
2498
+ reddit_stats = refresh_reddit(db, user_agent, config=config, quiet=args.quiet)
2499
+ moltbook_stats = refresh_moltbook(db, os.environ.get("MOLTBOOK_API_KEY", ""), quiet=args.quiet)
2500
+ twitter_stats = refresh_twitter(db, config=config, quiet=args.quiet)
2501
+ github_stats = refresh_github(db, quiet=args.quiet)
2502
+ if do_replies:
2503
+ reddit_reply_stats = refresh_reddit_replies(db, user_agent, quiet=args.quiet)
2504
+ twitter_reply_stats = refresh_twitter_replies(db, quiet=args.quiet)
2505
+ github_reply_stats = refresh_github_replies(db, quiet=args.quiet)
2506
+ if do_thread_snapshots:
2507
+ twitter_thread_stats = refresh_twitter_threads(
2508
+ db, config=config, quiet=args.quiet,
2509
+ max_per_run=args.twitter_threads_max,
2510
+ stale_hours=args.twitter_threads_stale_hours,
2511
+ )
2512
+
2513
+ # Gather aggregate totals across all platforms (HTTP-only, db ignored).
2514
+ totals = get_aggregate_totals(db)
2515
+
2516
+ output = {"totals": totals}
2517
+ if reddit_stats is not None:
2518
+ output["reddit"] = reddit_stats
2519
+ if reddit_resurrect_stats is not None:
2520
+ output["reddit_resurrect"] = reddit_resurrect_stats
2521
+ if moltbook_stats is not None:
2522
+ output["moltbook"] = moltbook_stats
2523
+ if twitter_stats is not None:
2524
+ output["twitter"] = twitter_stats
2525
+ if github_stats is not None:
2526
+ output["github"] = github_stats
2527
+ if reddit_reply_stats is not None:
2528
+ output["reddit_replies"] = reddit_reply_stats
2529
+ if twitter_reply_stats is not None:
2530
+ output["twitter_replies"] = twitter_reply_stats
2531
+ if twitter_thread_stats is not None:
2532
+ output["twitter_threads"] = twitter_thread_stats
2533
+ if github_reply_stats is not None:
2534
+ output["github_replies"] = github_reply_stats
2535
+
2536
+ # Sidecar JSON for the dashboard Jobs row. Always written when the flag is
2537
+ # set, even if a platform was skipped (count = 0). The shell consumer then
2538
+ # forwards the right count to log_run.py per platform.
2539
+ if args.reply_summary:
2540
+ try:
2541
+ summary = {
2542
+ "reddit": (reddit_reply_stats or {}).get("updated", 0),
2543
+ "twitter": (twitter_reply_stats or {}).get("updated", 0),
2544
+ "github": (github_reply_stats or {}).get("updated", 0),
2545
+ }
2546
+ with open(args.reply_summary, "w") as f:
2547
+ json.dump(summary, f)
2548
+ except Exception as e:
2549
+ print(f"WARN: failed to write reply summary {args.reply_summary}: {e}",
2550
+ file=sys.stderr)
2551
+
2552
+ # Richer sidecar JSON: per-platform refreshed/removed totals so stats.sh
2553
+ # can render real "refreshed N, removed N" pills instead of the legacy
2554
+ # posted=<active count> mush.
2555
+ if args.stats_summary:
2556
+ try:
2557
+ def pkey(post_stats, reply_stats, removed_keys=("removed", "deleted")):
2558
+ ps = post_stats or {}
2559
+ rs = reply_stats or {}
2560
+ refreshed = int(ps.get("updated", 0) or 0) + int(rs.get("updated", 0) or 0)
2561
+ removed = sum(int(ps.get(k, 0) or 0) for k in removed_keys)
2562
+ return {"refreshed": refreshed, "removed": removed}
2563
+ stats_summary = {
2564
+ "reddit": pkey(reddit_stats, reddit_reply_stats),
2565
+ "twitter": pkey(twitter_stats, twitter_reply_stats,
2566
+ removed_keys=("deleted", "suspended")),
2567
+ "moltbook": pkey(moltbook_stats, None),
2568
+ "github": pkey(github_stats, github_reply_stats),
2569
+ }
2570
+ with open(args.stats_summary, "w") as f:
2571
+ json.dump(stats_summary, f)
2572
+ except Exception as e:
2573
+ print(f"WARN: failed to write stats summary {args.stats_summary}: {e}",
2574
+ file=sys.stderr)
2575
+
2576
+ if args.json:
2577
+ print(json.dumps(output, indent=2))
2578
+ else:
2579
+ if reddit_stats is not None:
2580
+ r = reddit_stats
2581
+ err_break = (
2582
+ f" [404={r.get('errors_404', 0)} "
2583
+ f"rl={r.get('errors_rate_limited', 0)} "
2584
+ f"empty={r.get('errors_empty', 0)} "
2585
+ f"other={r.get('errors_other', 0)}]"
2586
+ )
2587
+ # 2026-05-18 relabel pass. The structured stdout line now exposes
2588
+ # five distinct counters that stats.sh greps into log_run.py:
2589
+ # total -> "scanned" pill (all rows considered this run)
2590
+ # skipped -> "skipped" pill = stable-cooldown + fresh-from-Step1
2591
+ # (Step 1 already covered them; we'd just waste an API hit)
2592
+ # checked -> "checked" pill = rows we actually hit the Reddit JSON
2593
+ # API for this run (= polled + errored, excludes both
2594
+ # skip classes). Previously this was `total - skipped`
2595
+ # which silently inflated when skipped_fresh > 0.
2596
+ # changed -> "changed" pill = subset of checked where upvotes or
2597
+ # comments_count moved. Used to live under the
2598
+ # misleading "updated" label.
2599
+ # errors -> rolls into the "failed" pill on the dashboard.
2600
+ skipped_total = r.get('skipped', 0) + r.get('skipped_fresh', 0)
2601
+ checked = r['total'] - skipped_total
2602
+ print(f"\nReddit: {r['total']} total, {skipped_total} skipped, "
2603
+ f"{checked} checked, "
2604
+ f"{r.get('changed', r.get('updated', 0))} changed, "
2605
+ f"{r['deleted']} deleted, {r['removed']} removed, {r['errors']} errors" + err_break)
2606
+ print("STATS_JSON: " + json.dumps({
2607
+ "platform": "reddit", "kind": "posts",
2608
+ "total": r['total'], "skipped": skipped_total, "checked": checked,
2609
+ "changed": r.get('changed', r.get('updated', 0)),
2610
+ "deleted": r['deleted'], "removed": r['removed'], "errors": r['errors'],
2611
+ }))
2612
+ if not args.quiet and r["results"]:
2613
+ print(f"{'ID':>4} {'Score':>5} {'Thread':>7} {'Comments':>8} Title")
2614
+ for row in sorted(r["results"], key=lambda x: x["score"], reverse=True):
2615
+ print(f"{row['id']:>4} {row['score']:>5} {row['thread_score']:>7} "
2616
+ f"{row['thread_comments']:>8} {row['title']}")
2617
+
2618
+ if reddit_resurrect_stats is not None:
2619
+ r = reddit_resurrect_stats
2620
+ print(f"\nReddit resurrect ({args.resurrect_days}d): {r['total']} rechecked, "
2621
+ f"{r['resurrected']} resurrected, {r['still_dead']} still dead, "
2622
+ f"{r['errors']} errors (rl={r.get('errors_rate_limited',0)} "
2623
+ f"empty={r.get('errors_empty',0)} malformed={r.get('errors_malformed',0)} "
2624
+ f"other={r.get('errors_other',0)})")
2625
+
2626
+ # `skipped: True` is the no-API-key sentinel (don't print); any
2627
+ # integer value means we ran and counted some skipped rows, in which
2628
+ # case we DO want the summary line (the dashboard needs it).
2629
+ if moltbook_stats is not None and moltbook_stats.get("skipped") is not True:
2630
+ m = moltbook_stats
2631
+ print(f"\nMoltbook: {m['total']} checked, {m['updated']} updated, "
2632
+ f"{m['deleted']} deleted, {m['errors']} errors")
2633
+ print("STATS_JSON: " + json.dumps({
2634
+ "platform": "moltbook", "kind": "posts",
2635
+ "total": m['total'], "skipped": 0, "checked": m['total'],
2636
+ "changed": m['updated'],
2637
+ "deleted": m['deleted'], "removed": 0, "errors": m['errors'],
2638
+ }))
2639
+
2640
+ if twitter_stats is not None:
2641
+ t = twitter_stats
2642
+ # 2026-05-18 relabel pass — same shape as the Reddit line above.
2643
+ # `skipped` now combines stable-cooldown + skipped_fresh so the
2644
+ # `checked` count reflects rows we actually polled the fxtwitter
2645
+ # API for, not "everything minus stable skips" (which silently
2646
+ # included fresh rows). `changed` is the metric-moved subset.
2647
+ t_skipped_total = t.get('skipped', 0) + t.get('skipped_fresh', 0)
2648
+ t_checked = t['total'] - t_skipped_total
2649
+ print(f"\nTwitter: {t['total']} total, {t_skipped_total} skipped, "
2650
+ f"{t_checked} checked, "
2651
+ f"{t.get('changed', t.get('updated', 0))} changed, "
2652
+ f"{t['deleted']} deleted, {t['errors']} errors")
2653
+ print("STATS_JSON: " + json.dumps({
2654
+ "platform": "twitter", "kind": "posts",
2655
+ "total": t['total'], "skipped": t_skipped_total, "checked": t_checked,
2656
+ "changed": t.get('changed', t.get('updated', 0)),
2657
+ "deleted": t['deleted'], "removed": 0, "errors": t['errors'],
2658
+ }))
2659
+ if not args.quiet and t["results"]:
2660
+ top = sorted(t["results"], key=lambda x: x.get("views", 0), reverse=True)[:30]
2661
+ print(f"{'ID':>4} {'Views':>7} {'Likes':>5} {'Replies':>7} {'RTs':>4}")
2662
+ for row in top:
2663
+ print(f"{row['id']:>4} {row.get('views',0):>7} {row.get('likes',0):>5} "
2664
+ f"{row.get('replies',0):>7} {row.get('retweets',0):>4}")
2665
+
2666
+ if github_stats is not None:
2667
+ g = github_stats
2668
+ print(f"\nGitHub: {g['total']} checked, {g['updated']} updated, "
2669
+ f"{g['deleted']} deleted, {g['errors']} errors")
2670
+ print("STATS_JSON: " + json.dumps({
2671
+ "platform": "github", "kind": "posts",
2672
+ "total": g['total'], "skipped": 0, "checked": g['total'],
2673
+ "changed": g['updated'],
2674
+ "deleted": g['deleted'], "removed": 0, "errors": g['errors'],
2675
+ }))
2676
+ if not args.quiet and g["results"]:
2677
+ top = sorted(g["results"],
2678
+ key=lambda x: (x.get("reactions", 0) + x.get("replies", 0)),
2679
+ reverse=True)[:20]
2680
+ print(f"{'ID':>5} {'React':>5} {'Reply':>5} URL")
2681
+ for row in top:
2682
+ print(f"{row['id']:>5} {row['reactions']:>5} {row['replies']:>5} {row['url']}")
2683
+
2684
+ for label, stats in (("Reddit replies", reddit_reply_stats),
2685
+ ("Twitter replies", twitter_reply_stats),
2686
+ ("GitHub replies", github_reply_stats)):
2687
+ if stats is None:
2688
+ continue
2689
+ print(f"\n{label}: {stats['total']} checked, {stats['updated']} updated, "
2690
+ f"{stats['errors']} errors, {stats.get('skipped_fresh', 0)} fresh")
2691
+ print("STATS_JSON: " + json.dumps({
2692
+ "platform": label.split()[0].lower(), "kind": "replies",
2693
+ "total": stats['total'], "checked": stats['total'],
2694
+ "updated": stats['updated'], "errors": stats['errors'],
2695
+ "fresh": stats.get('skipped_fresh', 0),
2696
+ }))
2697
+
2698
+ print_aggregate_totals(totals)
2699
+
2700
+
2701
+ if __name__ == "__main__":
2702
+ main()