@m13v/s4l 1.6.197-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1314 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +497 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,718 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ score_twitter_candidates.py
4
+
5
+ Reads raw tweet data (JSON from stdin or file), calculates virality scores,
6
+ and upserts into the twitter_candidates table.
7
+
8
+ Also expires stale pending candidates by flipping status to 'expired'.
9
+ NO PRUNING: rows are kept forever for analytics (skip-reason audit, engagement
10
+ dynamics, project routing review). Per user instruction 2026-05-08, do not
11
+ re-introduce DELETE-by-age here under any retention window.
12
+
13
+ Can be called standalone or piped from the scanner:
14
+ echo '[{...}]' | python3 scripts/score_twitter_candidates.py
15
+ python3 scripts/score_twitter_candidates.py --file /tmp/tweets.json
16
+ python3 scripts/score_twitter_candidates.py --expire-only
17
+ """
18
+
19
+ import argparse
20
+ import json
21
+ import math
22
+ import os
23
+ import re
24
+ import sys
25
+ from datetime import datetime, timezone, timedelta
26
+
27
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
28
+ from http_api import api_get, api_post # noqa: E402
29
+
30
+ # Best-effort dedicated logger for the follow-gate -> skill/logs/follow-gate.log.
31
+ # Guarded so a missing/older helper file can never break scoring (fail-open).
32
+ try:
33
+ import follow_gate_log as _fgl # noqa: E402
34
+ except Exception:
35
+ _fgl = None
36
+ from twitter_account import resolve_handle as _resolve_twitter_handle # noqa: E402
37
+ from project_topics import topics_for_project # noqa: E402
38
+
39
+
40
+ # Freshness window (in hours) for the expire-stale gate that flips stale
41
+ # pending rows to status='expired'. Sourced from the FRESHNESS_HOURS env the
42
+ # cycle exports (run-twitter-cycle.sh) so the expiry ceiling is configured in
43
+ # ONE place. Falls back to 18 when unset (e.g. ad-hoc / --expire-only runs) to
44
+ # preserve the historical default. NOTE: the gate is on discovered_at
45
+ # (discovery age), not tweet_posted_at; for logic D (≤1h discovery freshness)
46
+ # the two are within ~1h of each other.
47
+ EXPIRE_FRESHNESS_HOURS = int(os.environ.get("FRESHNESS_HOURS") or "18")
48
+
49
+
50
+ # Real Twitter snowflake IDs are 18-19 digit numbers with full entropy in the
51
+ # low bits (sequence counter + worker/datacenter ID = bottom 22 bits ≈ bottom
52
+ # 7 decimal digits). An ID ending in 6+ zeros is statistically impossible
53
+ # unless the sequence counter, worker ID, and datacenter ID were all exactly 0
54
+ # at submission AND the timestamp aligned with a power-of-two ms boundary —
55
+ # combined probability ≈ 0. Observed 2026-05-16 (batch twcycle-20260516-080005):
56
+ # the harness scan model fabricates IDs by templating a high-digit prefix and
57
+ # zero-padding, e.g. 2055588000000000000, 2055590000000000000 (sequential by 1).
58
+ # fxtwitter rejects these at T1 ("truncated/invalid status ID and loads no
59
+ # tweet"). Drop them at score time so we don't burn draft tokens or candidate
60
+ # rows on phantom URLs.
61
+ _SNOWFLAKE_OK = re.compile(r"/status/(\d{15,19})(?:[/?#]|$)")
62
+ _TRAILING_ZEROS_FAKE = re.compile(r"0{6,}$")
63
+
64
+
65
+ # Weight on the additive reach-potential term in calculate_virality_score
66
+ # (2026-05-28). Tunable. Larger = a fresh high-follower thread with no
67
+ # engagement yet ranks higher relative to threads with demonstrated velocity.
68
+ # At 0.6, a freshly-posted tweet from a 50k-200k account scores ~4 on reach
69
+ # alone; a 200M account ~5.4; a sub-1k account stays near 0. Set to 0 to fall
70
+ # back to the pure multiplicative (engagement-only) score.
71
+ REACH_POTENTIAL_WEIGHT = 0.6
72
+
73
+
74
+ def looks_like_fabricated_tweet_url(url: str) -> bool:
75
+ """True if the URL's snowflake suffix is the model's fabrication signature.
76
+
77
+ Returns True for:
78
+ - URLs without a parseable /status/<digits> segment
79
+ - URLs whose snowflake ID is outside the plausible 15-19 digit range
80
+ - URLs whose snowflake ID ends in 6 or more zeros (template signature)
81
+ """
82
+ if not url:
83
+ return True
84
+ m = _SNOWFLAKE_OK.search(url)
85
+ if not m:
86
+ return True
87
+ sid = m.group(1)
88
+ if _TRAILING_ZEROS_FAKE.search(sid):
89
+ return True
90
+ return False
91
+
92
+
93
+ def calculate_virality_score(tweet):
94
+ """
95
+ Score a tweet's viral potential. Higher = better candidate to reply to.
96
+
97
+ Signals (from research + production tuning):
98
+ 1. Engagement velocity (eng/hour) - strongest predictor
99
+ 2. Retweet ratio > 0.3 = strong viral signal
100
+ 3. Reply count is weighted heavily (discussion = visibility for our reply)
101
+ 4. Reply-to-like ratio (discussion quality vs one-way broadcast)
102
+ 5. Author followers 5K+ sweet spot, big names not penalized
103
+ 6. Age penalty: exponential decay with 6h half-life (softer than before)
104
+ """
105
+ likes = tweet.get("likes", 0)
106
+ retweets = tweet.get("retweets", 0)
107
+ replies = tweet.get("replies", 0)
108
+ bookmarks = tweet.get("bookmarks", 0)
109
+ views = tweet.get("views", 0)
110
+ followers = tweet.get("author_followers", 0)
111
+
112
+ total_eng = likes + retweets + replies + bookmarks
113
+
114
+ # Age in hours
115
+ age_hours = tweet.get("age_hours", 1)
116
+ if age_hours < 0.1:
117
+ age_hours = 0.1
118
+
119
+ # 1. Engagement velocity (most important)
120
+ velocity = total_eng / age_hours
121
+
122
+ # 2. Retweet ratio (reshare intent)
123
+ rt_ratio = retweets / total_eng if total_eng > 0 else 0
124
+
125
+ # 3. Reply activity bonus (active discussion = more visibility for our reply)
126
+ # 15 replies = +1x, 30 = +2x, 60+ = +4x cap
127
+ reply_bonus = min(replies / 15, 4.0)
128
+
129
+ # 4. Discussion quality (reply:like ratio). High ratio = real discussion.
130
+ # 0.05 ratio = +0.5x, 0.1+ = +1.0x cap
131
+ discussion_ratio = replies / likes if likes > 0 else 0
132
+ discussion_bonus = min(discussion_ratio * 10, 1.0)
133
+
134
+ # 5. Author reach multiplier
135
+ # Sweet spot: 5K+ followers. Big names (KentBeck-class) get full credit,
136
+ # since brand value outweighs the "too competitive" concern.
137
+ if followers < 1000:
138
+ reach_mult = 0.3
139
+ elif followers < 5000:
140
+ reach_mult = 0.6
141
+ elif followers < 50000:
142
+ reach_mult = 1.0
143
+ elif followers < 200000:
144
+ reach_mult = 1.4
145
+ elif followers < 500000:
146
+ reach_mult = 1.3
147
+ else:
148
+ reach_mult = 1.1 # mega accounts still worth it for brand exposure
149
+
150
+ # 6. Age decay: half-life of 6 hours (softened from 3h)
151
+ # 3h = 71%, 6h = 50%, 12h = 25%, 18h = 12.5%
152
+ age_decay = math.exp(-0.1155 * age_hours) # ln(2)/6
153
+
154
+ # 7. Retweet ratio bonus
155
+ rt_bonus = 1.0 + min(rt_ratio * 2, 1.0) # up to 2x for high RT ratio
156
+
157
+ # Engagement-driven score (multiplicative). This collapses to 0 for any
158
+ # tweet with zero engagement, because velocity (= total_eng / age) gates the
159
+ # entire product. That is correct for ranking *demonstrated* momentum.
160
+ engagement_score = velocity * reach_mult * age_decay * rt_bonus * (1 + reply_bonus) * (1 + discussion_bonus)
161
+
162
+ # Reach-potential term (ADDITIVE, 2026-05-28). The multiplicative score above
163
+ # throws away the follower signal whenever engagement is 0: a freshly-posted
164
+ # tweet from a 200M-follower account scored identically (0.0) to a 1-follower
165
+ # nobody, because anything * 0 = 0. That is wrong as a *predictor* — catching
166
+ # a fresh thread on a large account early is real option value (the account
167
+ # reliably draws reach the thread just hasn't accumulated yet). We ADD (not
168
+ # multiply) a reach term so the follower signal survives a zero-engagement
169
+ # velocity. It is monotonic in followers (log10 growth dominates the
170
+ # mega-account reach_mult dip) and decays on the SAME 6h half-life via
171
+ # age_decay, so a stale big-account tweet that STILL has no engagement sinks
172
+ # back toward zero (a real dud), while a fresh one ranks above a fresh nobody.
173
+ # No cap, no cutoff: this only ever raises a score, never removes a candidate.
174
+ reach_potential = math.log10(max(followers, 1)) * reach_mult * age_decay * REACH_POTENTIAL_WEIGHT
175
+
176
+ score = engagement_score + reach_potential
177
+
178
+ return round(score, 2), round(velocity, 2), round(rt_ratio, 3)
179
+
180
+
181
+ def match_project(tweet_text, search_topic, config):
182
+ """Match a tweet to the best project based on topic and content."""
183
+ projects = config.get("projects", [])
184
+
185
+ # If search_topic maps to a specific project, use that
186
+ topic_lower = (search_topic or "").lower()
187
+ text_lower = (tweet_text or "").lower()
188
+
189
+ for proj in projects:
190
+ name = proj.get("name", "")
191
+ topics = [t.lower() for t in topics_for_project(name)]
192
+ # Direct topic match
193
+ for t in topics:
194
+ if t in topic_lower or t in text_lower:
195
+ return name
196
+
197
+ return None
198
+
199
+
200
+ def upsert_candidates(tweets, config, batch_id=None, attempts_map=None, scored_sidecar=None):
201
+ """Score and upsert tweet candidates into DB.
202
+
203
+ If batch_id is provided, also populates T0 engagement columns and tags
204
+ the row with batch_id so Phase 2 of the cycle can re-poll only this batch.
205
+
206
+ If attempts_map is provided (dict keyed by (query, project) -> attempt_id),
207
+ stamps twitter_candidates.search_attempt_id so dashboard per-query stats
208
+ can attribute each posted candidate to the exact discovering search,
209
+ rather than fanning out (batch_id, project_name) across every query the
210
+ batch ran for that project (2026-05-21 bug fix).
211
+
212
+ If scored_sidecar is provided, writes per-query verdict tallies to that
213
+ JSON path so run-twitter-cycle.sh can build the directional
214
+ TRIED_QUERIES_JSON for the next retry attempt's prompt (2026-05-28
215
+ retry-feedback loop). Shape:
216
+ {query_string: {raw, kept_after_age, kept_after_skip}, ...}
217
+ raw = tweets fed to upsert_candidates from the enrich step
218
+ kept_after_age = tweets surviving the FRESHNESS_HOURS_DISCOVER cap
219
+ kept_after_skip = tweets that made it through to api_post insert
220
+ Never raises if the path isn't writable; the verdict step falls back to
221
+ raw == kept_after_age (no all_aged_out distinction) when the sidecar is
222
+ missing.
223
+
224
+ Migrated 2026-05-18 to call the s4l.ai HTTP API:
225
+ - dedup probe -> GET /api/v1/posts/thread-urls?platform=twitter
226
+ - per-tweet upsert -> POST /api/v1/twitter-candidates
227
+ (route handles the ON CONFLICT + peer-cycle race guard server-side)
228
+ - freshness gate -> POST /api/v1/twitter-candidates/expire-stale
229
+ (default 18h window; never deletes rows — status flip only)
230
+ """
231
+ attempts_map = attempts_map or {}
232
+ # Per-query tally for the scored sidecar. We seed `raw` upfront so a query
233
+ # whose every tweet was dropped (stale, fabricated, ceiling) still shows
234
+ # up with raw>0, kept_after_age=0 -> all_aged_out verdict instead of
235
+ # silently disappearing into the kept_after_skip=0 branch.
236
+ sidecar = {}
237
+ if scored_sidecar:
238
+ for _t in tweets:
239
+ _q = (_t.get("query") or "").strip()
240
+ if not _q:
241
+ continue
242
+ ent = sidecar.setdefault(_q, {"raw": 0, "kept_after_age": 0, "kept_after_skip": 0})
243
+ ent["raw"] += 1
244
+ # Get already-posted thread URLs for dedup. Scope per-account so the mk0r
245
+ # VM running as @matt_diak doesn't skip a tweet that @m13v_ posted on
246
+ # (or vice versa). Falls back to unscoped when the resolver can't pin a
247
+ # handle, which preserves the legacy single-machine behavior.
248
+ _twitter_handle = _resolve_twitter_handle()
249
+ _probe_query = {"platform": "twitter"}
250
+ if _twitter_handle:
251
+ _probe_query["our_account"] = _twitter_handle
252
+ posted_resp = api_get("/api/v1/posts/thread-urls", query=_probe_query)
253
+ posted = set((posted_resp.get("data") or {}).get("thread_urls") or [])
254
+
255
+ # Get already-SKIPPED (tweet_url, project) pairs for the per-project skip
256
+ # gate. Claude explicitly rejected these threads for the matched project in
257
+ # a prior cycle (status='skipped'); since the Phase 2b prompt now reserves
258
+ # 'rejected' for permanent, thread-intrinsic reasons (transient cap / dedup
259
+ # / cooldown deferrals are left pending, never skipped), every skipped row
260
+ # is a genuine rejection safe to suppress from future scans permanently.
261
+ # Per-project so a thread skipped as fazm stays eligible if a later scan
262
+ # matches it to a different project. Fail-open: ok_on_404 + try/except so a
263
+ # missing/unavailable endpoint behaves exactly like the pre-feature cycle
264
+ # (no skip filtering) instead of crashing Phase 1.
265
+ skipped_pairs = set()
266
+ if _twitter_handle:
267
+ try:
268
+ _skip_resp = api_get(
269
+ "/api/v1/twitter-candidates/skipped-urls",
270
+ query={"our_account": _twitter_handle},
271
+ ok_on_404=True,
272
+ )
273
+ if _skip_resp.get("_not_found"):
274
+ # 404: endpoint not deployed yet. Explicit so a 0-pair gate is
275
+ # never mistaken for "loaded the set, nothing matched".
276
+ print(
277
+ f"[skip_gate] fail-open: skipped-urls endpoint 404 "
278
+ f"(not deployed) our_account={_twitter_handle}; "
279
+ f"skip filter inactive this cycle",
280
+ file=sys.stderr,
281
+ flush=True,
282
+ )
283
+ else:
284
+ for _pair in (_skip_resp.get("data") or {}).get("pairs") or []:
285
+ _su = (_pair.get("tweet_url") or "").strip()
286
+ if _su:
287
+ skipped_pairs.add((_su, _pair.get("project")))
288
+ except SystemExit as _skip_err:
289
+ # http_api raises SystemExit on terminal HTTP failure (e.g. a 429
290
+ # rate-limit, which is a 4xx). Fail open: an empty set means the
291
+ # gate is inert this cycle rather than crashing Phase 1. Logged
292
+ # explicitly so an inert gate is distinguishable from a real
293
+ # no-match (both otherwise show "already rejected for project: 0").
294
+ skipped_pairs = set()
295
+ print(
296
+ f"[skip_gate] fail-open: skipped-urls fetch failed "
297
+ f"({_skip_err}); skip filter inactive this cycle",
298
+ file=sys.stderr,
299
+ flush=True,
300
+ )
301
+ # Always emit the loaded size so every cycle self-documents whether the
302
+ # gate had real data (N>0) or fell open (N=0). Pairs with N>0 is the
303
+ # positive proof that the check ran against the live skipped set.
304
+ print(
305
+ f"[skip_gate] loaded {len(skipped_pairs)} skipped (url,project) pairs "
306
+ f"for our_account={_twitter_handle or '(unresolved)'}",
307
+ file=sys.stderr,
308
+ flush=True,
309
+ )
310
+
311
+ # Skip threads whose author is someone we already follow. We don't need to
312
+ # win over accounts already in our network — the comment buys no new reach.
313
+ # The follow list is harvested out-of-band (scripts/harvest_twitter_following.py
314
+ # scrapes x.com/<handle>/following) and stored server-side; we just read the
315
+ # set here, scoped to our posting handle. Fail-open exactly like the skip gate
316
+ # above: a missing endpoint / 429 / unresolved handle leaves the set empty so
317
+ # the cycle behaves exactly as it did before this guardrail (2026-06-03).
318
+ followed_handles = set()
319
+ _follow_source = "unresolved"
320
+ if _twitter_handle:
321
+ try:
322
+ _foll_resp = api_get(
323
+ "/api/v1/followed-accounts",
324
+ query={"platform": "twitter", "our_account": _twitter_handle},
325
+ ok_on_404=True,
326
+ )
327
+ if _foll_resp.get("_not_found"):
328
+ _follow_source = "404"
329
+ print(
330
+ f"[follow_gate] fail-open: followed-accounts endpoint 404 "
331
+ f"(not deployed) our_account={_twitter_handle}; "
332
+ f"follow filter inactive this cycle",
333
+ file=sys.stderr,
334
+ flush=True,
335
+ )
336
+ else:
337
+ _follow_source = "ok"
338
+ for _fh in (_foll_resp.get("data") or {}).get("handles") or []:
339
+ _fhs = (_fh or "").strip().lstrip("@").lower()
340
+ if _fhs:
341
+ followed_handles.add(_fhs)
342
+ except SystemExit as _foll_err:
343
+ _follow_source = "error"
344
+ followed_handles = set()
345
+ print(
346
+ f"[follow_gate] fail-open: followed-accounts fetch failed "
347
+ f"({_foll_err}); follow filter inactive this cycle",
348
+ file=sys.stderr,
349
+ flush=True,
350
+ )
351
+ print(
352
+ f"[follow_gate] loaded {len(followed_handles)} followed handles "
353
+ f"for our_account={_twitter_handle or '(unresolved)'}",
354
+ file=sys.stderr,
355
+ flush=True,
356
+ )
357
+
358
+ inserted = updated = skipped = 0
359
+ skipped_fake_id = 0
360
+ skipped_already_rejected = 0
361
+ skipped_followed_author = 0
362
+
363
+ for tweet in tweets:
364
+ url = (tweet.get("tweet_url") or tweet.get("tweetUrl") or "").strip()
365
+ if not url:
366
+ continue
367
+
368
+ # Reject hallucinated snowflake IDs (see looks_like_fabricated_tweet_url
369
+ # docstring). Counted separately so the failure mode is visible in the
370
+ # pipeline log; rolled into `skipped` total for backwards-compat metrics.
371
+ if looks_like_fabricated_tweet_url(url):
372
+ skipped += 1
373
+ skipped_fake_id += 1
374
+ print(f" Drop fabricated snowflake: {url}", file=sys.stderr)
375
+ continue
376
+
377
+ # Skip if we already posted on this thread
378
+ if url in posted:
379
+ skipped += 1
380
+ continue
381
+
382
+ # Skip threads authored by someone we already follow (guardrail
383
+ # 2026-06-03). Same class as the posted-dedup above: an identity-based
384
+ # global skip, independent of project, so it lives here (before age math
385
+ # and scoring). followed_handles is the harvested set loaded once above
386
+ # (empty => gate inert, fail-open). enrich_twitter_candidates.py has
387
+ # already canonicalized tweet["handle"] to the author's screen_name.
388
+ _cand_handle = (tweet.get("handle") or "").strip().lstrip("@").lower()
389
+ if _cand_handle and _cand_handle in followed_handles:
390
+ skipped += 1
391
+ skipped_followed_author += 1
392
+ print(
393
+ f"[follow_gate] skip @{tweet.get('handle')} (followed) url={url}",
394
+ file=sys.stderr,
395
+ flush=True,
396
+ )
397
+ if _fgl:
398
+ _fgl.record_skip(_twitter_handle, _cand_handle, url, batch_id)
399
+ continue
400
+
401
+ # Calculate age
402
+ dt_str = tweet.get("datetime", "")
403
+ if dt_str:
404
+ try:
405
+ posted_at = datetime.fromisoformat(dt_str.replace("Z", "+00:00"))
406
+ age_hours = (datetime.now(timezone.utc) - posted_at).total_seconds() / 3600
407
+ except ValueError:
408
+ posted_at = None
409
+ age_hours = 24 # unknown age, penalize
410
+ else:
411
+ posted_at = None
412
+ age_hours = 24
413
+
414
+ tweet["age_hours"] = age_hours
415
+ tweet["author_followers"] = tweet.get("author_followers", 0)
416
+
417
+ # Hard age cutoff (2026-05-27): defense-in-depth against X's Latest tab
418
+ # silently degrading to "best available" results when our `since_time:`
419
+ # operator yields a sparse window. The pre-search hook
420
+ # (~/.claude/hooks/twitter-search-since-rewrite.py) injects
421
+ # `since_time:<now - FRESHNESS_HOURS_DISCOVER>` into every cycle query,
422
+ # and the harness scrape opens &f=live (Latest tab). In theory those
423
+ # two together cap age at the variant's freshness window. In practice
424
+ # x.com/search?f=live ignores `since_time:` on low-yield queries and
425
+ # falls back to whatever stale tweets it has. Without this cutoff,
426
+ # those stale rows land in twitter_candidates with virality ~0 (the
427
+ # 6h half-life decay floors them), survive into the post_twitter
428
+ # draft prompt, and get chosen when all candidates score near zero.
429
+ # We hard-drop here so they never reach the API, the draft prompt,
430
+ # or any per-row token spend. Reads the same env var the hook reads,
431
+ # so the cutoff matches the variant's window (1h for C/D, 6h for A/B).
432
+ # Falls back to 6h for non-cycle callers (legacy paths). Discovered
433
+ # 2026-05-27 after batches twcycle-20260527-134432 (Mediar) and
434
+ # twcycle-20260527-135430 (paperback-expert) posted under 49-77h-old
435
+ # threads that bypassed both layers.
436
+ try:
437
+ _freshness_cap = int(os.environ.get("FRESHNESS_HOURS_DISCOVER") or "6")
438
+ except ValueError:
439
+ _freshness_cap = 6
440
+ if age_hours > _freshness_cap:
441
+ skipped += 1
442
+ print(
443
+ f"[stale_age_skip] age_hours={age_hours:.1f} cap={_freshness_cap}h "
444
+ f"variant={os.environ.get('TWITTER_CYCLE_VARIANT') or ''} "
445
+ f"url={url}",
446
+ file=sys.stderr,
447
+ flush=True,
448
+ )
449
+ continue
450
+
451
+ # Tally kept_after_age for the verdict sidecar BEFORE the ceiling-D
452
+ # cap below. all_aged_out means "the freshness gate killed everything";
453
+ # ceiling-D is a quality filter that fires after age. Keeping the
454
+ # tallies separate prevents D-cycle queries from looking like aged-out
455
+ # to the next retry's drafter.
456
+ if scored_sidecar:
457
+ _q_age = (tweet.get("query") or "").strip()
458
+ if _q_age and _q_age in sidecar:
459
+ sidecar[_q_age]["kept_after_age"] += 1
460
+
461
+ # Variant D (2026-05-25): 2k-view ceiling cap on parent thread.
462
+ # Bucket analysis on 250+ mature posts showed view-share collapses
463
+ # from ~4% on 500-2k-view threads to ~0.1% on >10k-view threads —
464
+ # our reply is invisible to the audience of large threads. D drops
465
+ # any candidate whose T0 views exceed 2000; A/B/C let everything
466
+ # through unchanged. Comparing posted-quality (views/likes per
467
+ # surviving candidate) between D and C isolates the ceiling effect.
468
+ # No DB row written for rejects: the dashboard already groups by
469
+ # cycle_variant and the stderr marker below captures opportunity
470
+ # cost for later log-based analysis.
471
+ _ceiling_variant = os.environ.get("TWITTER_CYCLE_VARIANT") or ""
472
+ _ceiling_views = tweet.get("views", 0) or 0
473
+ if _ceiling_variant == "D" and _ceiling_views > 2000:
474
+ skipped += 1
475
+ print(
476
+ f"[ceiling_d_skip] views_t0={_ceiling_views} "
477
+ f"likes={tweet.get('likes', 0)} replies={tweet.get('replies', 0)} "
478
+ f"age_hours={age_hours:.2f} url={url}",
479
+ file=sys.stderr,
480
+ flush=True,
481
+ )
482
+ continue
483
+
484
+ score, velocity, rt_ratio = calculate_virality_score(tweet)
485
+
486
+ # Use LLM-assigned project if available, fall back to keyword matching
487
+ project = tweet.get("matched_project") or match_project(
488
+ tweet.get("text", ""),
489
+ tweet.get("search_topic", ""),
490
+ config,
491
+ )
492
+
493
+ # Skip threads Claude already explicitly rejected for THIS project
494
+ # (status='skipped'). Per-project: a thread skipped as fazm can still be
495
+ # picked if this scan matched it to a different project, so we key on
496
+ # (url, project) rather than url alone. Done here (not at the posted
497
+ # dedup above) because the project isn't resolved until this point.
498
+ if (url, project) in skipped_pairs:
499
+ skipped += 1
500
+ skipped_already_rejected += 1
501
+ print(
502
+ f" [skipped_already_rejected] {project}: {url}",
503
+ file=sys.stderr,
504
+ )
505
+ continue
506
+
507
+ body = {
508
+ "tweet_url": url,
509
+ "author_handle": tweet.get("handle", ""),
510
+ "author_followers": tweet.get("author_followers", 0),
511
+ "tweet_text": tweet.get("text", "") or "",
512
+ "tweet_posted_at": posted_at.isoformat() if posted_at else None,
513
+ "likes": tweet.get("likes", 0),
514
+ "retweets": tweet.get("retweets", 0),
515
+ "replies": tweet.get("replies", 0),
516
+ "views": tweet.get("views", 0),
517
+ "bookmarks": tweet.get("bookmarks", 0),
518
+ "engagement_velocity": velocity,
519
+ "retweet_ratio": rt_ratio,
520
+ "virality_score": score,
521
+ "search_topic": tweet.get("search_topic", ""),
522
+ "matched_project": project,
523
+ "batch_id": batch_id,
524
+ "discovery_batch_id": batch_id,
525
+ "cycle_variant": os.environ.get("TWITTER_CYCLE_VARIANT") or None,
526
+ # Stamp the machine's Twitter handle so the (tweet_url, our_account)
527
+ # composite unique gives each account its own candidate row.
528
+ # Without this, account A's 'posted' status on tweet X would lock
529
+ # account B out of the same tweet (ON CONFLICT preserved 'posted').
530
+ # Defaults server-side to 'm13v_' if omitted; new callers should
531
+ # always pass it explicitly.
532
+ "our_account": _twitter_handle or "",
533
+ # Repost provenance (2026-06-04). The scan derives the author from
534
+ # the status URL, so author_handle/tweet_url already point at the
535
+ # ORIGINAL tweet; is_repost flags that it surfaced via a repost and
536
+ # reposted_by names the account that reposted. Only sent when the
537
+ # scan evaluated it (presence-detected server-side).
538
+ "is_repost": bool(tweet.get("is_repost", False)),
539
+ "reposted_by": tweet.get("reposted_by", "") or "",
540
+ }
541
+ # Stamp the exact discovering search_attempt when the scanner gave us
542
+ # the literal query that surfaced this tweet AND the log script wrote
543
+ # an attempts map. Dashboard SQL prefers this column over the legacy
544
+ # (batch_id, project_name) fanout, which credits dud queries with
545
+ # posts they never surfaced.
546
+ _q = (tweet.get("query") or "").strip()
547
+ if _q and attempts_map:
548
+ attempt_id = attempts_map.get((_q, project))
549
+ if attempt_id is None:
550
+ attempt_id = attempts_map.get((_q, None))
551
+ if attempt_id is not None:
552
+ body["search_attempt_id"] = int(attempt_id)
553
+ # T0 columns only stamped when this row was discovered inside a cycle
554
+ # batch, mirroring the conditional in the original SQL.
555
+ if batch_id:
556
+ body["likes_t0"] = tweet.get("likes", 0)
557
+ body["retweets_t0"] = tweet.get("retweets", 0)
558
+ body["replies_t0"] = tweet.get("replies", 0)
559
+ body["views_t0"] = tweet.get("views", 0)
560
+ body["bookmarks_t0"] = tweet.get("bookmarks", 0)
561
+
562
+ try:
563
+ api_post("/api/v1/twitter-candidates", body)
564
+ inserted += 1
565
+ if scored_sidecar:
566
+ _q_kept = (tweet.get("query") or "").strip()
567
+ if _q_kept and _q_kept in sidecar:
568
+ sidecar[_q_kept]["kept_after_skip"] += 1
569
+ try:
570
+ _tweet_iso = body.get("tweet_posted_at") or body.get("tweet_created_at") or ""
571
+ _disc_iso = body.get("discovered_at") or body.get("created_at") or ""
572
+ _url = body.get("tweet_url") or body.get("url") or ""
573
+ _age_h = ""
574
+ if _tweet_iso and _disc_iso:
575
+ from datetime import datetime as _dt
576
+ try:
577
+ _t = _dt.fromisoformat(_tweet_iso.replace("Z", "+00:00"))
578
+ _d = _dt.fromisoformat(_disc_iso.replace("Z", "+00:00"))
579
+ _age_h = f"{(_d.timestamp() - _t.timestamp()) / 3600:.2f}"
580
+ except Exception:
581
+ _age_h = ""
582
+ print(
583
+ f"[twitter_discovery] batch_id={batch_id} "
584
+ f"discovery_batch_id={batch_id} "
585
+ f"cycle_variant={os.environ.get('TWITTER_CYCLE_VARIANT') or ''} "
586
+ f"tweet_age_hours={_age_h} discovered_at={_disc_iso} url={_url}",
587
+ file=sys.stderr,
588
+ flush=True,
589
+ )
590
+ except Exception:
591
+ pass
592
+ except SystemExit as e:
593
+ # http_api raises SystemExit on terminal failure. Keep iterating;
594
+ # the cycle should not die because one URL hit a 4xx validation
595
+ # edge case.
596
+ print(f" Error inserting {url}: {e}", file=sys.stderr)
597
+ continue
598
+
599
+ # Expire old pending candidates past the freshness window. This is a
600
+ # freshness GATE (status flip), not a delete — we keep the row forever
601
+ # for analytics.
602
+ api_post("/api/v1/twitter-candidates/expire-stale", {"freshness_hours": EXPIRE_FRESHNESS_HOURS})
603
+
604
+ # NO PRUNING. We keep every twitter_candidates row forever (chosen, skipped,
605
+ # expired) so we can audit project routing, skip reasons, growth dynamics,
606
+ # and engagement curves over time. Per user instruction (2026-05-08): never
607
+ # add DELETE-by-age back here, regardless of retention window.
608
+
609
+ if _fgl:
610
+ _fgl.record_cycle(_twitter_handle, len(followed_handles), _follow_source, len(tweets), skipped_followed_author, batch_id)
611
+ print(f"Scored: {inserted} upserted, {skipped} skipped (already posted or fabricated ID: {skipped_fake_id}, already rejected for project: {skipped_already_rejected}, followed authors: {skipped_followed_author})")
612
+
613
+ # Emit the verdict sidecar for the retry loop's directional feedback. Best
614
+ # effort: never fatal if the path is unwritable, never overwrites the
615
+ # cycle's other state.
616
+ if scored_sidecar:
617
+ try:
618
+ with open(scored_sidecar, "w") as fh:
619
+ json.dump(sidecar, fh)
620
+ print(
621
+ f"scored_sidecar: wrote {len(sidecar)} query verdicts -> {scored_sidecar}",
622
+ file=sys.stderr,
623
+ )
624
+ except OSError as e:
625
+ print(
626
+ f"scored_sidecar: could not write {scored_sidecar}: {e}",
627
+ file=sys.stderr,
628
+ )
629
+
630
+ return inserted
631
+
632
+
633
+ def main():
634
+ parser = argparse.ArgumentParser()
635
+ parser.add_argument("--file", help="Read tweets from JSON file instead of stdin")
636
+ parser.add_argument("--expire-only", action="store_true", help="Only expire stale pending rows (status flip; no row deletion)")
637
+ parser.add_argument("--batch-id", help="Tag these candidates with a batch id and populate T0 columns")
638
+ parser.add_argument(
639
+ "--attempts",
640
+ help="Path to JSON list [{query, project, attempt_id}, ...] from "
641
+ "log_twitter_search_attempts.py --attempts-out. When provided, "
642
+ "stamps twitter_candidates.search_attempt_id per tweet so the "
643
+ "dashboard can attribute posts to the exact discovering query.",
644
+ )
645
+ parser.add_argument(
646
+ "--scored-sidecar",
647
+ help="Path to write per-query verdict tallies for the retry loop "
648
+ "feedback (2026-05-28). Shape: {query: {raw, kept_after_age, "
649
+ "kept_after_skip}, ...}. Consumed by run-twitter-cycle.sh to "
650
+ "build the directional TRIED_QUERIES_JSON for the next attempt's "
651
+ "drafter prompt.",
652
+ )
653
+ args = parser.parse_args()
654
+
655
+ config_path = os.path.expanduser("~/social-autoposter/config.json")
656
+ config = {}
657
+ if os.path.exists(config_path):
658
+ with open(config_path) as f:
659
+ config = json.load(f)
660
+
661
+ if args.expire_only:
662
+ # Freshness gate only. NO PRUNING — see note in upsert_candidates().
663
+ # Server-side route runs the same UPDATE atomically; client just kicks
664
+ # it off and prints the count.
665
+ resp = api_post(
666
+ "/api/v1/twitter-candidates/expire-stale",
667
+ {"freshness_hours": EXPIRE_FRESHNESS_HOURS},
668
+ )
669
+ expired = (resp.get("data") or {}).get("expired_count", 0)
670
+ print(f"Expired {expired} old pending candidates (no row deletion)")
671
+ return
672
+
673
+ if args.file:
674
+ with open(args.file) as f:
675
+ tweets = json.load(f)
676
+ else:
677
+ tweets = json.load(sys.stdin)
678
+
679
+ if not isinstance(tweets, list):
680
+ tweets = [tweets]
681
+
682
+ attempts_map = {}
683
+ if args.attempts and os.path.exists(args.attempts):
684
+ try:
685
+ with open(args.attempts) as f:
686
+ rows = json.load(f)
687
+ for r in rows or []:
688
+ if not isinstance(r, dict):
689
+ continue
690
+ q = (r.get("query") or "").strip()
691
+ aid = r.get("attempt_id")
692
+ if not q or aid is None:
693
+ continue
694
+ proj = r.get("project") or None
695
+ attempts_map[(q, proj)] = int(aid)
696
+ print(
697
+ f"score_twitter_candidates: loaded {len(attempts_map)} "
698
+ f"(query, project) -> attempt_id entries from {args.attempts}",
699
+ file=sys.stderr,
700
+ )
701
+ except (OSError, ValueError) as e:
702
+ print(
703
+ f"score_twitter_candidates: could not read attempts map "
704
+ f"{args.attempts}: {e}",
705
+ file=sys.stderr,
706
+ )
707
+
708
+ upsert_candidates(
709
+ tweets,
710
+ config,
711
+ batch_id=args.batch_id,
712
+ attempts_map=attempts_map,
713
+ scored_sidecar=args.scored_sidecar,
714
+ )
715
+
716
+
717
+ if __name__ == "__main__":
718
+ main()