@m13v/s4l 1.6.197-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1314 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +497 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,2668 @@
1
+ #!/usr/bin/env python3
2
+ """Reddit posting orchestrator.
3
+
4
+ Spawns a Claude session per post that uses reddit_tools.py (search, fetch) to find
5
+ threads and drafts replies. Python orchestrator handles CDP posting and DB logging.
6
+
7
+ Usage:
8
+ python3 scripts/post_reddit.py
9
+ python3 scripts/post_reddit.py --dry-run # Print prompt without executing
10
+ python3 scripts/post_reddit.py --limit 3 # Post at most 3 comments
11
+ python3 scripts/post_reddit.py --timeout 3600 # Global timeout in seconds
12
+ python3 scripts/post_reddit.py --project Cyrano # Override project selection
13
+ """
14
+
15
+ from __future__ import annotations # PEP 604 unions (str | None) for Python 3.9 launchd
16
+
17
+ import argparse
18
+ import errno
19
+ import json
20
+ import os
21
+ import random
22
+ import re
23
+ import shutil
24
+ import subprocess
25
+ import sys
26
+ import time
27
+ import uuid
28
+
29
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
30
+ from http_api import api_get, api_post, api_patch
31
+ from author_history_block import render as _render_author_history
32
+ from project_topics import topics_for_project
33
+
34
+ REPO_DIR = os.path.expanduser("~/social-autoposter")
35
+ CONFIG_PATH = os.path.join(REPO_DIR, "config.json")
36
+ REDDIT_BROWSER = os.path.join(REPO_DIR, "scripts", "reddit_browser.py")
37
+ REDDIT_BROWSER_LOCK = os.path.join(REPO_DIR, "scripts", "reddit_browser_lock.py")
38
+ REDDIT_TOOLS = os.path.join(REPO_DIR, "scripts", "reddit_tools.py")
39
+
40
+ # Interpreter every child subprocess must run under. A bare PYTHON resolved
41
+ # to the user's system python, which lacks the pipeline deps (Playwright and
42
+ # friends) that live only in the owned uv runtime — so on a fresh box every
43
+ # reddit_browser.py reply died (the same class as the Karol/Twitter bug,
44
+ # 2026-06-22). Honor the authoritative S4L_PYTHON pin (set by the launchd
45
+ # plist), else sys.executable (the owned interpreter the MCP launches us under).
46
+ # Never the literal PYTHON: that re-rolls the PATH dice. Re-exported so
47
+ # grandchildren inherit it.
48
+ PYTHON = os.environ.get("S4L_PYTHON") or sys.executable
49
+ os.environ["S4L_PYTHON"] = PYTHON
50
+ RATELIMIT_FILE = "/tmp/reddit_ratelimit.json"
51
+ PREFLIGHT_WAIT_BUDGET_SECONDS = 180
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # reddit_candidates queue parameters (mirrors twitter_candidates intent).
55
+ #
56
+ # 2026-05-06: persistent queue replaces the ephemeral tmpfile-only flow so
57
+ # transient post failures (CDP timeout, comment_box_not_found, browser crash)
58
+ # get retried on the next cycle's Phase 0 salvage rather than losing the
59
+ # discover+ripen+draft cost as wholesale waste. Permanent failures
60
+ # (thread_locked at submit time, archived, deleted, account_blocked) get
61
+ # marked status='failed' so we never re-evaluate them.
62
+ #
63
+ # Window choices:
64
+ # FRESHNESS_HOURS=24 Reddit threads stay actionable longer than tweets
65
+ # (FRESHNESS_HOURS=6 on Twitter), so the hard-expire
66
+ # cutoff is wider. Past 24h the comment is unlikely
67
+ # to be seen.
68
+ # MAX_ATTEMPTS=3 Cap retry budget so a chronically-broken thread
69
+ # (subreddit gone private mid-cycle, AutoMod glitch)
70
+ # drops out instead of recurring forever.
71
+ # RETRY_BACKOFF_MIN=30 Don't re-attempt a freshly-failed candidate within
72
+ # the same 15-min cycle; let the failure reason
73
+ # stabilize before retrying.
74
+ # DRAFT_TTL_MIN=60 A salvaged candidate whose draft was written < 60
75
+ # min ago re-uses it as-is (skips LLM redraft). Keeps
76
+ # us from paying $0.20-$0.40 of Claude cost twice on
77
+ # the same comment when the post step retries.
78
+ FRESHNESS_HOURS = 24
79
+ MAX_ATTEMPTS = 3
80
+ RETRY_BACKOFF_MIN = 30
81
+ DRAFT_TTL_MIN = 60
82
+
83
+ # Discover-phase search budget. Was hardcoded as "AT MOST 2 searches" inline
84
+ # in build_discover_prompt; bumped to 10 (2026-05-08) so each cycle gets a
85
+ # wider top-of-funnel and the new draft-gate-omit feedback report can steer
86
+ # rephrasings without starving the next attempt of fresh angles. Override via
87
+ # S4L_REDDIT_MAX_SEARCHES env var without code change.
88
+ MAX_DISCOVER_SEARCHES = int(os.environ.get("S4L_REDDIT_MAX_SEARCHES", "3"))
89
+
90
+ # CDP-error → permanence map. Permanent failures mark status='failed' and are
91
+ # never re-evaluated. Transient failures stay status='pending' with
92
+ # attempt_count++; Phase 0 salvages them on the next cycle.
93
+ _PERMANENT_CDP_ERRORS = {
94
+ "thread_locked",
95
+ "thread_archived",
96
+ "thread_not_found",
97
+ "account_blocked_in_sub",
98
+ "no_permalink", # we couldn't verify the post landed; retrying would dupe
99
+ }
100
+ _TRANSIENT_CDP_ERRORS = {
101
+ "all_attempts_failed",
102
+ "comment_box_not_found",
103
+ "not_logged_in",
104
+ }
105
+
106
+ from engagement_styles import (
107
+ VALID_STYLES, get_styles_prompt, get_content_rules, validate_or_register,
108
+ pick_style_for_post, get_voice_relationship_rule,
109
+ )
110
+ # Audience-page routing: tells Claude which curated landing pages exist for the
111
+ # project so it can bake a deep URL (e.g. https://s4l.ai/ghostwriting) into the
112
+ # draft when the thread topic matches. See scripts/audience_pages.py + the
113
+ # landing_pages.audience_pages block in config.json.
114
+ from audience_pages import (
115
+ prompt_block as _audience_prompt_block,
116
+ classify_url_as_audience_page as _audience_classify_url,
117
+ )
118
+
119
+
120
+ # ---------------------------------------------------------------------------
121
+ # reddit_candidates helpers.
122
+ #
123
+ # All DB-touching helpers swallow exceptions and log to stderr. The pipeline
124
+ # remains functional even if the queue table is unreachable; we just lose the
125
+ # salvage benefit for that cycle. This matches the cautious posture of
126
+ # log_post / campaign_bump / log_draft elsewhere in the file.
127
+
128
+ def _subreddit_from_url(thread_url):
129
+ """Pull the bare subreddit name out of a Reddit thread URL, or None."""
130
+ if not thread_url:
131
+ return None
132
+ m = re.search(r"/r/([^/]+)/", thread_url)
133
+ return m.group(1).lower() if m else None
134
+
135
+
136
+ def _db_upsert_discovered_candidate(candidate, batch_id, project_name):
137
+ """INSERT a freshly-discovered candidate row via /api/v1/reddit-candidates.
138
+
139
+ Server-side ON CONFLICT keeps the existing row's status, attempt_count,
140
+ post linkage, AND original T0 intact (see route source); batch_id is
141
+ updated to the current cycle so the dashboard's queue counts surface
142
+ this run.
143
+ """
144
+ thread_url = (candidate.get("thread_url") or "").strip()
145
+ if not thread_url:
146
+ return
147
+ try:
148
+ score_raw = candidate.get("score")
149
+ comments_raw = candidate.get("num_comments")
150
+ body = {
151
+ "thread_url": thread_url,
152
+ "thread_author": candidate.get("thread_author"),
153
+ "thread_title": candidate.get("thread_title"),
154
+ "thread_selftext": candidate.get("selftext") or candidate.get("thread_selftext"),
155
+ "subreddit": _subreddit_from_url(thread_url),
156
+ "matched_project": project_name,
157
+ "search_topic": candidate.get("search_topic"),
158
+ "batch_id": batch_id,
159
+ "draft_engagement_style": candidate.get("engagement_style"),
160
+ "score_t0": int(score_raw) if score_raw is not None else None,
161
+ "comments_t0": int(comments_raw) if comments_raw is not None else None,
162
+ }
163
+ api_post("/api/v1/reddit-candidates", body)
164
+ except Exception as e:
165
+ print(f"[post_reddit] WARNING: upsert candidate failed for {thread_url}: {e}",
166
+ file=sys.stderr)
167
+
168
+
169
+ def _db_save_draft(thread_url, text, engagement_style):
170
+ """Persist a freshly-written draft so a later salvage reuses it.
171
+
172
+ Routes through /api/v1/reddit-candidates/by-thread-url action=save_draft.
173
+ Returns 404 silently when there is no pending row for the URL (e.g. when
174
+ the discover-side INSERT race hadn't completed yet); a save_draft on a
175
+ row that already moved past 'pending' would be a no-op anyway.
176
+ """
177
+ if not thread_url or not text:
178
+ return
179
+ try:
180
+ api_patch(
181
+ "/api/v1/reddit-candidates/by-thread-url",
182
+ {
183
+ "thread_url": thread_url,
184
+ "action": "save_draft",
185
+ "draft_text": text,
186
+ "draft_engagement_style": engagement_style,
187
+ },
188
+ ok_on_404=True,
189
+ )
190
+ except Exception as e:
191
+ print(f"[post_reddit] WARNING: save_draft failed for {thread_url}: {e}",
192
+ file=sys.stderr)
193
+
194
+
195
+ def _db_load_fresh_draft(thread_url):
196
+ """Return (text, style) for a still-fresh draft, or (None, None).
197
+
198
+ Calls /api/v1/reddit-candidates?thread_url=...&has_fresh_draft=true&fresh_draft_minutes=N
199
+ so the server enforces the TTL window at the SQL level.
200
+ """
201
+ if not thread_url:
202
+ return None, None
203
+ try:
204
+ resp = api_get(
205
+ "/api/v1/reddit-candidates",
206
+ query={
207
+ "thread_url": thread_url,
208
+ "has_fresh_draft": "true",
209
+ "fresh_draft_minutes": DRAFT_TTL_MIN,
210
+ "limit": 1,
211
+ },
212
+ )
213
+ rows = ((resp or {}).get("data") or {}).get("candidates") or []
214
+ if rows:
215
+ r = rows[0]
216
+ return r.get("draft_text"), r.get("draft_engagement_style")
217
+ except Exception as e:
218
+ print(f"[post_reddit] WARNING: load_fresh_draft failed for {thread_url}: {e}",
219
+ file=sys.stderr)
220
+ return None, None
221
+
222
+
223
+ def _db_mark_candidate_posted(thread_url, post_id):
224
+ """Mark a candidate as successfully posted with linkage to posts.id.
225
+
226
+ The server-side action=mark_posted runs the same two recovery layers as
227
+ the previous Python implementation: if post_id is NULL, it first tries
228
+ `SELECT id FROM posts WHERE thread_url=...` to recover, then falls back
229
+ to status='failed' with last_failure_reason='log_post_returned_null'.
230
+ See scripts/post_reddit.py CLAUDE.md commentary for the rationale.
231
+ """
232
+ if not thread_url:
233
+ return
234
+ try:
235
+ body = {"thread_url": thread_url, "action": "mark_posted"}
236
+ if post_id is not None:
237
+ body["post_id"] = int(post_id)
238
+ resp = api_patch(
239
+ "/api/v1/reddit-candidates/by-thread-url",
240
+ body,
241
+ ok_on_404=True,
242
+ )
243
+ data = (resp or {}).get("data") or {}
244
+ if data.get("recovery") == "marked_failed_no_post_id":
245
+ print(
246
+ f"[post_reddit] WARNING: log_post returned None and posts.thread_url "
247
+ f"lookup failed for {thread_url}. Marked status='failed' to prevent "
248
+ f"Phase 0 re-post (would dupe). Comment is live on Reddit; backfill "
249
+ f"required for click attribution.",
250
+ file=sys.stderr,
251
+ )
252
+ elif data.get("recovery") == "ok" and post_id is None:
253
+ # Server-side recovery succeeded — log for parity with the prior
254
+ # Python WARNING so dashboard ingestion is unchanged.
255
+ recovered = ((data.get("candidate") or {}).get("post_id"))
256
+ print(
257
+ f"[post_reddit] WARNING: recovered post_id={recovered} via posts.thread_url "
258
+ f"after log_post returned None for {thread_url}",
259
+ file=sys.stderr,
260
+ )
261
+ except Exception as e:
262
+ print(f"[post_reddit] WARNING: mark_posted failed for {thread_url}: {e}",
263
+ file=sys.stderr)
264
+
265
+
266
+ def _db_mark_candidate_attempt(thread_url, reason, permanent=False):
267
+ """Record a failed post attempt via /api/v1/reddit-candidates/by-thread-url.
268
+
269
+ Server-side action=mark_attempt mirrors the previous Python branching
270
+ (permanent vs transient with auto-promote at MAX_ATTEMPTS).
271
+ """
272
+ if not thread_url:
273
+ return
274
+ try:
275
+ api_patch(
276
+ "/api/v1/reddit-candidates/by-thread-url",
277
+ {
278
+ "thread_url": thread_url,
279
+ "action": "mark_attempt",
280
+ "reason": reason,
281
+ "permanent": bool(permanent),
282
+ "max_attempts": MAX_ATTEMPTS,
283
+ },
284
+ ok_on_404=True,
285
+ )
286
+ except Exception as e:
287
+ print(f"[post_reddit] WARNING: mark_attempt failed for {thread_url}: {e}",
288
+ file=sys.stderr)
289
+
290
+
291
+ def _db_phase0_salvage(batch_id, freshness_hours=FRESHNESS_HOURS,
292
+ max_attempts=MAX_ATTEMPTS,
293
+ retry_backoff_min=RETRY_BACKOFF_MIN):
294
+ """Phase 0 via /api/v1/reddit-candidates/phase0-salvage.
295
+
296
+ The route runs the same single-transaction WITH _lock / expired / salvaged
297
+ CTE that this function used to issue directly. Returns (expired, salvaged).
298
+ """
299
+ try:
300
+ resp = api_post(
301
+ "/api/v1/reddit-candidates/phase0-salvage",
302
+ {
303
+ "batch_id": batch_id,
304
+ "freshness_hours": int(freshness_hours),
305
+ "max_attempts": int(max_attempts),
306
+ "retry_backoff_minutes": int(retry_backoff_min),
307
+ },
308
+ )
309
+ data = (resp or {}).get("data") or {}
310
+ return int(data.get("expired_count") or 0), int(data.get("salvaged_count") or 0)
311
+ except Exception as e:
312
+ print(f"[post_reddit] WARNING: phase0 salvage failed: {e}",
313
+ file=sys.stderr)
314
+ return 0, 0
315
+
316
+
317
+ def _db_pick_salvage_candidates(batch_id, limit=1):
318
+ """Pull up to `limit` salvage-eligible rows from a SINGLE project.
319
+
320
+ Routes through /api/v1/reddit-candidates/pick-salvage, which performs
321
+ the same two-step (project picker + atomic claim) inside a single PG
322
+ transaction. The route stamps last_attempt_at=NOW() at pick-time using
323
+ FOR UPDATE SKIP LOCKED so two concurrent post phases can never re-pick
324
+ the same row. See route source for the full SQL.
325
+
326
+ Returns {project_name, decisions:[...], cost:0, salvaged:True, ...} or
327
+ None if no eligible row remains.
328
+ """
329
+ limit = max(1, int(limit or 1))
330
+ try:
331
+ resp = api_post(
332
+ "/api/v1/reddit-candidates/pick-salvage",
333
+ {
334
+ "batch_id": batch_id,
335
+ "max_attempts": MAX_ATTEMPTS,
336
+ "draft_ttl_minutes": DRAFT_TTL_MIN,
337
+ "limit": limit,
338
+ },
339
+ )
340
+ data = (resp or {}).get("data") or {}
341
+ if not data.get("decisions"):
342
+ return None
343
+ return {
344
+ "project_name": data.get("project_name") or "general",
345
+ "decisions": data.get("decisions") or [],
346
+ "cost": float(data.get("cost") or 0.0),
347
+ "salvaged": bool(data.get("salvaged", True)),
348
+ "salvaged_attempt": int(data.get("salvaged_attempt") or 0),
349
+ "salvaged_count": int(data.get("salvaged_count") or 0),
350
+ }
351
+ except Exception as e:
352
+ print(f"[post_reddit] WARNING: pick_salvage_candidates failed: {e}",
353
+ file=sys.stderr)
354
+ return None
355
+
356
+
357
+ # Back-compat shim: older callers (and tests) may still call the singular
358
+ # name. Routes through the multi-row helper with limit=1 so we don't keep
359
+ # two SQL paths in sync.
360
+ def _db_pick_salvage_candidate(batch_id):
361
+ return _db_pick_salvage_candidates(batch_id, limit=1)
362
+
363
+
364
+ def _apply_rate_limit_policy(remaining, reset_seconds, source, budget_seconds):
365
+ """Given current quota, decide: proceed (True), wait then proceed, or skip (False)."""
366
+ if remaining > 2 or reset_seconds <= 0:
367
+ return True
368
+ if reset_seconds > budget_seconds:
369
+ print(f"[post_reddit] Reddit rate-limited ({source}), reset in "
370
+ f"{int(reset_seconds)}s (> {budget_seconds}s budget). Skipping run.")
371
+ return False
372
+ wait = int(reset_seconds) + 3
373
+ print(f"[post_reddit] Reddit rate-limited ({source}), waiting {wait}s "
374
+ f"for reset before spawning Claude...")
375
+ time.sleep(wait)
376
+ return True
377
+
378
+
379
+ def _probe_reddit_quota():
380
+ """One cheap request to Reddit to learn the live quota.
381
+
382
+ Updates RATELIMIT_FILE so downstream reddit_tools.py calls share the
383
+ fresh state. Returns (remaining, reset_seconds) or None on network error.
384
+ """
385
+ import urllib.request
386
+ import urllib.error
387
+ url = "https://old.reddit.com/r/popular.json?limit=1"
388
+ req = urllib.request.Request(
389
+ url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"}
390
+ )
391
+ try:
392
+ resp = urllib.request.urlopen(req, timeout=10)
393
+ remaining = float(resp.headers.get("X-Ratelimit-Remaining", 100))
394
+ reset = float(resp.headers.get("X-Ratelimit-Reset", 0))
395
+ with open(RATELIMIT_FILE, "w") as f:
396
+ json.dump({"remaining": remaining, "reset_at": time.time() + reset}, f)
397
+ return remaining, reset
398
+ except urllib.error.HTTPError as e:
399
+ if e.code == 429:
400
+ reset = float(e.headers.get("X-Ratelimit-Reset", 60))
401
+ with open(RATELIMIT_FILE, "w") as f:
402
+ json.dump({"remaining": 0, "reset_at": time.time() + reset}, f)
403
+ return 0.0, reset
404
+ return None
405
+ except Exception:
406
+ return None
407
+
408
+
409
+ def preflight_rate_limit(budget_seconds=PREFLIGHT_WAIT_BUDGET_SECONDS):
410
+ """Block or bail before spawning Claude if Reddit search is throttled.
411
+
412
+ Strategy:
413
+ 1. Cheap probe to Reddit to read live X-Ratelimit-Remaining headers.
414
+ This catches the case where the shared state file is stale but the
415
+ server still throttles us (10-min rolling window).
416
+ 2. Fall back to the cached state file if the probe network-fails.
417
+ A $0.44 Claude spawn with 5 rate-limited searches is the cost we're
418
+ avoiding; a single probe request is ~300ms.
419
+ """
420
+ probe = _probe_reddit_quota()
421
+ if probe is not None:
422
+ remaining, reset = probe
423
+ print(f"[post_reddit] Reddit quota probe: remaining={remaining:.0f} "
424
+ f"reset_in={int(reset)}s")
425
+ return _apply_rate_limit_policy(remaining, reset, "probe", budget_seconds)
426
+ try:
427
+ with open(RATELIMIT_FILE) as f:
428
+ rl = json.load(f)
429
+ except Exception:
430
+ return True
431
+ wait = int(rl.get("reset_at", 0) - time.time())
432
+ return _apply_rate_limit_policy(
433
+ rl.get("remaining", 100), wait, "cached", budget_seconds,
434
+ )
435
+
436
+
437
+ # ---------------------------------------------------------------------------
438
+ # subreddit_bans audit shape (introduced 2026-05-11)
439
+ # ---------------------------------------------------------------------------
440
+ # Each entry in subreddit_bans.comment_blocked / .thread_blocked is now an
441
+ # object with the audit metadata we wished we'd been recording all along:
442
+ # {"sub": "powerbi", "added_at": "2026-05-11T00:31:49Z",
443
+ # "reason": "account_blocked_in_sub", "project": "WhatsApp MCP"}
444
+ #
445
+ # Pre-migration entries are bare strings; the readers/writers handle both
446
+ # shapes transparently. The migration script
447
+ # (scripts/migrate_subreddit_bans_to_objects.py) backfills existing strings to
448
+ # objects with null metadata.
449
+ #
450
+ # _ban_entry_sub(entry): extract the sub slug from either shape (returns
451
+ # lowercase string or None).
452
+ # _ban_entries_to_subs(L): set of lowercase sub slugs in a ban list.
453
+ # _make_ban_entry(...): build a fresh entry with current UTC timestamp.
454
+
455
+ def _ban_entry_sub(entry) -> str | None:
456
+ """Return the lowercased sub slug from a ban-list entry (str or dict)."""
457
+ if isinstance(entry, str):
458
+ s = entry.strip().lower()
459
+ return s or None
460
+ if isinstance(entry, dict):
461
+ s = (entry.get("sub") or "").strip().lower()
462
+ return s or None
463
+ return None
464
+
465
+
466
+ def _ban_entries_to_subs(entries) -> set[str]:
467
+ out: set[str] = set()
468
+ for e in entries or []:
469
+ s = _ban_entry_sub(e)
470
+ if s:
471
+ out.add(s)
472
+ return out
473
+
474
+
475
+ def _make_ban_entry(sub: str, reason: str | None, project: str | None) -> dict:
476
+ """Build a new ban-list entry with the current UTC timestamp.
477
+
478
+ Stamps the current Reddit account (top-level config.json reddit_account
479
+ .username) so per-account scoping in reddit_tools._load_comment_blocked_subs
480
+ can ignore this entry on other machines posting as a different account.
481
+ Returns account=None if the config has no reddit_account, in which case
482
+ the reader treats the entry as global (back-compat with pre-2026-05-15).
483
+
484
+ Project scope (2026-05-19 cleanup): subreddit_bans.comment_blocked entries
485
+ are ALWAYS account-level by definition: if a sub silently strips the
486
+ comment form (or other account-triggered automod gate) for our account,
487
+ that gate applies regardless of which project's pipeline noticed it.
488
+ Project-specific relevance rejects live in `project_search_excludes`,
489
+ NOT here. So we drop the `project` field semantically (kept as audit
490
+ breadcrumb `noticed_by_project` for forensics, but the reader ignores
491
+ it). Account is the only scope dimension.
492
+ """
493
+ from datetime import datetime, timezone
494
+ account = None
495
+ try:
496
+ with open(CONFIG_PATH) as _f:
497
+ account = (json.load(_f).get("reddit_account") or {}).get("username") or None
498
+ except Exception:
499
+ pass
500
+ return {
501
+ "sub": sub.strip().lower(),
502
+ "added_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
503
+ "reason": reason or None,
504
+ # Kept for audit (who first hit this); reader ignores. Use `account`
505
+ # for actual scoping.
506
+ "noticed_by_project": project or None,
507
+ "account": account,
508
+ }
509
+
510
+
511
+ def mark_comment_blocked(thread_url: str,
512
+ reason: str | None = "account_blocked_in_sub",
513
+ project: str | None = None) -> None:
514
+ """Add a subreddit to config.json subreddit_bans.comment_blocked at runtime.
515
+
516
+ Called when the bot's comment attempt is rejected (no comment form, locked,
517
+ restricted). The sub gets blocked for future comment attempts so the
518
+ drafter never targets it again. Thread-posting eligibility is tracked
519
+ separately in subreddit_bans.thread_blocked.
520
+
521
+ Records audit metadata (added_at / reason / project) on the entry.
522
+ """
523
+ sub_match = re.search(r'/r/([^/]+)/', thread_url)
524
+ if not sub_match:
525
+ return
526
+ sub = sub_match.group(1).lower()
527
+ try:
528
+ with open(CONFIG_PATH) as f:
529
+ config = json.load(f)
530
+ bans = config.setdefault("subreddit_bans", {})
531
+ blocked = bans.setdefault("comment_blocked", [])
532
+ existing = _ban_entries_to_subs(blocked)
533
+ if sub not in existing:
534
+ blocked.append(_make_ban_entry(sub, reason, project))
535
+ blocked.sort(key=lambda e: _ban_entry_sub(e) or "")
536
+ with open(CONFIG_PATH, "w") as f:
537
+ json.dump(config, f, indent=2)
538
+ f.write("\n")
539
+ print(f"[post_reddit] Added r/{sub} to subreddit_bans.comment_blocked "
540
+ f"(reason={reason!r} project={project!r})")
541
+ except Exception as e:
542
+ print(f"[post_reddit] WARNING: could not persist blocked sub r/{sub}: {e}")
543
+
544
+
545
+ # Keywords that indicate a permanent account/subreddit block rather than a
546
+ # transient failure. Case-insensitive match against Claude's abort_reason.
547
+ # Tuned 2026-04-29: broaden to catch mod-rule bans expressed in present tense
548
+ # ("the sub bans software", "no software allowed") in addition to account-level
549
+ # bans ("u/X has been banned"). Each new pattern observed from real abort logs.
550
+ _THREAD_BLOCK_PATTERNS = [
551
+ r"\bbanned\b",
552
+ r"\bbans\b\s+(all|any|every|every kind|posts?|comments?|software|websites?|self[- ]promo|advertising|promotional)",
553
+ r"\bban\b.*\b(software|posts?|websites?|self[- ]promo|advertising)\b",
554
+ r"access was denied",
555
+ r"\b403\b",
556
+ r"link[- ]only",
557
+ r"text posts? (are )?disabled",
558
+ r"text (tab|option) (is )?disabled",
559
+ r"does not allow text",
560
+ r"not allowed to post",
561
+ r"posting.*restricted",
562
+ r"no (software|self[- ]promo|promotional|advertising|ads)",
563
+ r"\bprohibit(ed|s)?\b",
564
+ r"\bremoved\b.*\b(rule|mod)\b", # "would be removed per rule X"
565
+ r"would (get )?removed",
566
+ r"\bnot permitted\b",
567
+ r"approved (submitter|user)s? only",
568
+ r"forbidden",
569
+ ]
570
+
571
+ def _abort_is_permanent_block(abort_reason: str) -> bool:
572
+ """Return True if abort_reason signals a permanent account/sub block."""
573
+ lower = abort_reason.lower()
574
+ for pat in _THREAD_BLOCK_PATTERNS:
575
+ if re.search(pat, lower):
576
+ return True
577
+ return False
578
+
579
+
580
+ def mark_thread_blocked(subreddit: str, abort_reason: str = "",
581
+ project: str | None = None,
582
+ force: bool = False) -> None:
583
+ """Add a subreddit to config.json subreddit_bans.thread_blocked at runtime.
584
+
585
+ Called when a thread-post attempt is permanently blocked (account banned,
586
+ link-only sub, text posts disabled, 403). The sub is skipped by
587
+ pick_thread_target.py on all future runs. Comment eligibility is tracked
588
+ separately in subreddit_bans.comment_blocked.
589
+
590
+ subreddit may be bare ('programming') or prefixed ('r/programming').
591
+
592
+ Records audit metadata (added_at / reason / project) on the entry.
593
+ The reason field captures the abort_reason verbatim (truncated to 280
594
+ chars) so we can audit why the sub got blocked months later.
595
+
596
+ force=True bypasses the abort_reason regex gate (used when an upstream
597
+ signal — e.g. the model's permanent_block=true — has already decided
598
+ this is permanent and the reason text alone wouldn't match the patterns).
599
+ """
600
+ sub = re.sub(r"^r/", "", subreddit, flags=re.IGNORECASE).strip().lower()
601
+ if not sub:
602
+ return
603
+ if not force and abort_reason and not _abort_is_permanent_block(abort_reason):
604
+ return
605
+ reason_str: str | None = (abort_reason or "").strip()[:280] or None
606
+ try:
607
+ with open(CONFIG_PATH) as f:
608
+ config = json.load(f)
609
+ bans = config.setdefault("subreddit_bans", {})
610
+ blocked = bans.setdefault("thread_blocked", [])
611
+ existing = _ban_entries_to_subs(blocked)
612
+ if sub not in existing:
613
+ blocked.append(_make_ban_entry(sub, reason_str, project))
614
+ blocked.sort(key=lambda e: _ban_entry_sub(e) or "")
615
+ with open(CONFIG_PATH, "w") as f:
616
+ json.dump(config, f, indent=2)
617
+ f.write("\n")
618
+ print(f"[post_reddit] Auto-blocked r/{sub} from future thread posts "
619
+ f"(reason={reason_str!r} project={project!r})")
620
+ else:
621
+ print(f"[post_reddit] r/{sub} already in thread_blocked, skipping")
622
+ except Exception as e:
623
+ print(f"[post_reddit] WARNING: could not persist thread-blocked sub r/{sub}: {e}")
624
+
625
+
626
+ def load_config():
627
+ with open(CONFIG_PATH) as f:
628
+ return json.load(f)
629
+
630
+
631
+ def pick_project(platform="reddit", exclude=None):
632
+ try:
633
+ cmd = [PYTHON, os.path.join(REPO_DIR, "scripts", "pick_project.py"),
634
+ "--platform", platform, "--json"]
635
+ if exclude:
636
+ cmd.extend(["--exclude", ",".join(exclude)])
637
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
638
+ if result.returncode == 0 and result.stdout.strip():
639
+ return json.loads(result.stdout.strip())
640
+ except Exception:
641
+ pass
642
+ return None
643
+
644
+
645
+ def get_top_performers(project_name, platform="reddit", style=None):
646
+ """Fetch the top_performers feedback report.
647
+
648
+ 2026-05-19: optional `style` arg passes through to top_performers.py
649
+ as --style so the per-style exemplars section gets restricted to the
650
+ style assigned by pick_style_for_post(). When None, returns the full
651
+ multi-style report (legacy behavior, still used in invent mode and by
652
+ callers that have not flipped to the picker yet).
653
+ """
654
+ try:
655
+ cmd = [PYTHON, os.path.join(REPO_DIR, "scripts", "top_performers.py"),
656
+ "--platform", platform, "--project", project_name]
657
+ if style:
658
+ cmd.extend(["--style", style])
659
+ result = subprocess.run(
660
+ cmd, capture_output=True, text=True, timeout=15,
661
+ )
662
+ if result.returncode == 0:
663
+ return result.stdout.strip()
664
+ except Exception:
665
+ pass
666
+ return ""
667
+
668
+
669
+ def get_top_search_topics(project_name, platform="reddit", limit=8, window_days=30):
670
+ """Return a short text block of best-performing search_topic seeds for this
671
+ project on this platform, or '' if no data yet. See top_search_topics.py."""
672
+ try:
673
+ result = subprocess.run(
674
+ [PYTHON, os.path.join(REPO_DIR, "scripts", "top_search_topics.py"),
675
+ "--project", project_name, "--platform", platform,
676
+ "--window-days", str(window_days), "--limit", str(limit)],
677
+ capture_output=True, text=True, timeout=15,
678
+ )
679
+ if result.returncode == 0:
680
+ return result.stdout.strip()
681
+ except Exception:
682
+ pass
683
+ return ""
684
+
685
+
686
+ def get_omitted_reddit_topics(project_name, limit=10, window_hours=168, min_omits=2):
687
+ """Return a JSON list (as a string) of search_topic seeds that have
688
+ consistently produced threads which survive the ripen gate but get
689
+ OMITTED by the draft-time SELECTION GATE (build_draft_prompt's bridge
690
+ test). These are category-level mismatches the LLM should drop or
691
+ rephrase. See scripts/top_omitted_reddit_topics.py.
692
+
693
+ `min_omits=2` suppresses one-off omits (could be noise) and surfaces
694
+ only seeds where the pattern has repeated.
695
+ """
696
+ try:
697
+ result = subprocess.run(
698
+ [PYTHON, os.path.join(REPO_DIR, "scripts", "top_omitted_reddit_topics.py"),
699
+ "--project", project_name,
700
+ "--window-hours", str(window_hours),
701
+ "--limit", str(limit),
702
+ "--min-omits", str(min_omits)],
703
+ capture_output=True, text=True, timeout=15,
704
+ )
705
+ if result.returncode == 0:
706
+ return result.stdout.strip()
707
+ except Exception:
708
+ pass
709
+ return ""
710
+
711
+
712
+ def get_dud_reddit_queries(project_name, limit=15, window_hours=168):
713
+ """Return a JSON list (as a string) of recent dud Reddit queries for this
714
+ project so build_prompt can paste an anti-list into the LLM scanner.
715
+
716
+ Source: reddit_search_attempts (one row per cmd_search call), surfaced via
717
+ scripts/top_dud_reddit_queries.py. Window mirrors the LinkedIn-style 7d
718
+ default — Reddit cycles fire every 30min, so 7d gives a wide enough sample
719
+ to flag truly dead phrasings without overweighting same-day noise.
720
+ """
721
+ try:
722
+ result = subprocess.run(
723
+ [PYTHON, os.path.join(REPO_DIR, "scripts", "top_dud_reddit_queries.py"),
724
+ "--project", project_name,
725
+ "--window-hours", str(window_hours),
726
+ "--limit", str(limit)],
727
+ capture_output=True, text=True, timeout=15,
728
+ )
729
+ if result.returncode == 0:
730
+ return result.stdout.strip()
731
+ except Exception:
732
+ pass
733
+ return ""
734
+
735
+
736
+ def _recent_comment_text(item):
737
+ """Accept either str (legacy shape) or (id, content) tuple (2026-05-12
738
+ shape) and return the content string. Lets all three prompt builders
739
+ consume recent_comments without caring which shape they got. If
740
+ you're refactoring the upstream shape again, update this one place."""
741
+ if isinstance(item, (list, tuple)) and len(item) >= 2:
742
+ return item[1] or ""
743
+ return item or ""
744
+
745
+
746
+ def _strip_active_suffixes(text, active_campaigns):
747
+ """Remove any active-campaign suffix from `text` (idempotent, trailing-only).
748
+
749
+ Mirrors engage_reddit.strip_active_suffixes (commit 8cdde18) so we have
750
+ the same protection for the post_reddit drafting path. Without this,
751
+ `get_recent_comments()` feeds the LLM prior `posts.our_content` rows
752
+ that already end in the campaign suffix (e.g. " written with s4lai"),
753
+ the LLM copies the literal suffix into its draft because it looks like
754
+ part of our voice, and the tool-level append at line ~2092 stacks a
755
+ SECOND suffix on top. Observed in production 2026-05-18 on Deep_Ad1959
756
+ (reply rows 70412 + 70413) via engage_reddit; same risk exists here.
757
+
758
+ Strips trailing suffix repeatedly so a historically-doubled row also
759
+ collapses to clean text. Active campaign list is passed in by the
760
+ caller so we only strip patterns we're actively using (avoids
761
+ unbounded false-positive matches on incidental phrasing).
762
+ """
763
+ if not text or not active_campaigns:
764
+ return text
765
+ cleaned = text.rstrip()
766
+ changed = True
767
+ while changed:
768
+ changed = False
769
+ for camp in active_campaigns:
770
+ suffix = (camp.get("suffix") or "").strip()
771
+ if suffix and cleaned.endswith(suffix):
772
+ cleaned = cleaned[: -len(suffix)].rstrip()
773
+ changed = True
774
+ return cleaned
775
+
776
+
777
+ def get_recent_comments(limit=5):
778
+ """Recent Reddit posts.our_content via /api/v1/posts.
779
+
780
+ Returns list of (id, content) tuples (2026-05-12 change). The IDs
781
+ feed into the generation_trace audit blob so a later reader can
782
+ JOIN back to the source posts; the content still feeds the prompt
783
+ builders verbatim. Prompt-builders below were updated to accept
784
+ both the old (str) and new (tuple) shapes so any straggler caller
785
+ keeps working without a coordinated change.
786
+
787
+ 2026-05-18: active-campaign suffixes are stripped from `our_content`
788
+ BEFORE returning, so the LLM never sees suffixed exemplars and
789
+ cannot copy the campaign tag into its draft (which would then get
790
+ a SECOND tool-level append, producing "written with s4lai written
791
+ with s4lai"). See `_strip_active_suffixes` docstring.
792
+ """
793
+ resp = api_get(
794
+ "/api/v1/posts",
795
+ query={"platform": "reddit", "limit": int(limit)},
796
+ )
797
+ rows = ((resp or {}).get("data") or {}).get("posts") or []
798
+ raw = [
799
+ (int(r["id"]), r.get("our_content") or "")
800
+ for r in rows
801
+ if r.get("our_content") and r.get("id") is not None
802
+ ]
803
+ # Sanitize exemplars against the currently-active campaign suffixes.
804
+ # If the campaign-load call fails we fall back to raw content (better
805
+ # than crashing the discover/draft pipeline over a degraded API call).
806
+ try:
807
+ active_camps = load_active_reddit_campaigns()
808
+ except Exception as e:
809
+ print(f"[post_reddit] WARNING: load_active_reddit_campaigns failed "
810
+ f"during recent_comments sanitize ({e}); returning raw content",
811
+ file=sys.stderr)
812
+ return raw
813
+ cleaned = []
814
+ for pid, content in raw:
815
+ stripped = _strip_active_suffixes(content, active_camps)
816
+ if stripped:
817
+ cleaned.append((pid, stripped))
818
+ return cleaned
819
+
820
+
821
+ def load_active_reddit_campaigns():
822
+ """Active Reddit campaigns that carry a literal suffix.
823
+
824
+ Tool-level enforcement: the LLM never sees these. We append the suffix to
825
+ the drafted text in Python before posting, so the literal text is
826
+ guaranteed to land on Reddit. sample_rate gates the per-post coin flip
827
+ for concurrent A/B (e.g. 0.5 = ~half of posts get tagged).
828
+
829
+ Calls /api/v1/campaigns?status=active&platform=reddit&has_suffix=true&with_budget_remaining=true.
830
+ """
831
+ resp = api_get(
832
+ "/api/v1/campaigns",
833
+ query={
834
+ "status": "active",
835
+ "platform": "reddit",
836
+ "has_suffix": "true",
837
+ "with_budget_remaining": "true",
838
+ "limit": 500,
839
+ },
840
+ )
841
+ rows = ((resp or {}).get("data") or {}).get("campaigns") or []
842
+ return [
843
+ {
844
+ "id": int(r["id"]),
845
+ "suffix": r.get("suffix"),
846
+ "sample_rate": float(r.get("sample_rate") if r.get("sample_rate") is not None else 1.0),
847
+ }
848
+ for r in rows
849
+ ]
850
+
851
+
852
+ def _angle_str(v):
853
+ if isinstance(v, str):
854
+ return v.strip()
855
+ if isinstance(v, dict):
856
+ return "; ".join(f"{k}: {_angle_str(x)}" for k, x in v.items() if x)
857
+ if isinstance(v, (list, tuple)):
858
+ return ", ".join(_angle_str(x) for x in v if x)
859
+ return str(v) if v else ""
860
+
861
+
862
+ def build_content_angle(project, config):
863
+ """Prefer project-specific positioning over the global config angle.
864
+
865
+ Always appends the project's audience-pages block (when configured) so the
866
+ draft prompt knows which curated landing pages it should link to for
867
+ topic-matched threads. Single source of truth flows through every caller
868
+ that consumes content_angle.
869
+ """
870
+ if project.get("content_angle"):
871
+ base = project["content_angle"]
872
+ else:
873
+ parts = []
874
+ for key in ("description", "differentiator", "icp", "setup"):
875
+ s = _angle_str(project.get(key))
876
+ if s:
877
+ parts.append(s)
878
+
879
+ messaging = project.get("messaging", {}) or {}
880
+ for key in ("lead_with_pain", "solution", "proof"):
881
+ s = _angle_str(messaging.get(key))
882
+ if s:
883
+ parts.append(s)
884
+
885
+ voice = project.get("voice", {}) or {}
886
+ if voice.get("tone"):
887
+ parts.append(f"Voice: {voice['tone']}")
888
+ if voice.get("never"):
889
+ parts.append("Never: " + "; ".join(voice["never"]))
890
+ examples = voice.get("examples") or voice.get("examples_good") or []
891
+ if examples:
892
+ parts.append("Voice examples: " + " | ".join(examples[:3]))
893
+
894
+ base = " ".join(parts) if parts else config.get("content_angle", "")
895
+
896
+ try:
897
+ ap_block = _audience_prompt_block(project.get("name") or "")
898
+ except Exception:
899
+ ap_block = ""
900
+ if ap_block:
901
+ return (base + "\n\n" + ap_block).strip() if base else ap_block.strip()
902
+ return base
903
+
904
+
905
+ def build_discover_prompt(project, config, limit, top_report, recent_comments,
906
+ top_topics_report="", dud_queries_report="",
907
+ omitted_topics_report=""):
908
+ """DISCOVER phase: scan-only. Model picks search queries, runs them in
909
+ OPAQUE mode (never sees thread content), outputs DONE. No fetching, no
910
+ judging, no drafting. The dump_dir harvest converts raw search results
911
+ into candidates passed to ripen.
912
+
913
+ Mirrors Twitter's scan phase: the only Claude work here is choosing
914
+ search queries. Style picking, top_performers filtering, and the
915
+ actual comment drafting all happen in the draft phase (the only
916
+ Claude call in this cycle that writes a comment).
917
+ """
918
+ content_angle = build_content_angle(project, config)
919
+ topics_list = list(topics_for_project(project.get("name") or ""))
920
+ project_json = json.dumps({
921
+ "name": project.get("name"),
922
+ "description": project.get("description"),
923
+ "search_topics": topics_list,
924
+ }, indent=2)
925
+
926
+ recent_ctx = ""
927
+ if recent_comments:
928
+ # _recent_comment_text handles both legacy str and current (id, content) shapes.
929
+ snippets = "\n".join(
930
+ f" - {_recent_comment_text(c)}"
931
+ for c in recent_comments
932
+ if _recent_comment_text(c)
933
+ )
934
+ recent_ctx = f"\nYour last {len(recent_comments)} comments (don't repeat these threads):\n{snippets}\n"
935
+
936
+ top_ctx = ""
937
+ if top_report:
938
+ lines = top_report.split("\n")[:20]
939
+ top_ctx = f"\n## Past performance feedback:\n{chr(10).join(lines)}\n"
940
+
941
+ top_topics_ctx = ""
942
+ if top_topics_report:
943
+ top_topics_ctx = (
944
+ "\n## Past top-performing search topics "
945
+ "(sorted by clicks DESC first, then composite-scored: "
946
+ "clicks*100 + comments + upvotes). "
947
+ "CLICKS ARE THE PRIORITY SIGNAL. Any topic with `clicks > 0` is "
948
+ "GOLD TIER, clicks are the only metric that proves our reply drove "
949
+ "someone to actually visit the project's link. Comments and upvotes "
950
+ "are vanity. If a project in your draft set has a gold-tier topic "
951
+ "in this list, mimic ITS framing (subreddit fit, keyword cluster, "
952
+ "specificity) FIRST before falling back to other styles. The "
953
+ "Δpost / Δskip columns also matter: high Δskip + few posts = the "
954
+ "topic surfaces alive but off-topic threads (reword more narrowly); "
955
+ "low Δskip + few posts = dead supply (drop the topic). Optimize the "
956
+ "entire pipeline for clicks; everything else is leading indicators.\n"
957
+ f"{top_topics_report}\n"
958
+ )
959
+
960
+ dud_queries_ctx = ""
961
+ if dud_queries_report and dud_queries_report.strip() not in ("[]", ""):
962
+ dud_queries_ctx = f"\n## Dead queries (skip these exact phrasings):\n{dud_queries_report}\n"
963
+
964
+ omitted_topics_ctx = ""
965
+ if omitted_topics_report and omitted_topics_report.strip() not in ("[]", ""):
966
+ omitted_topics_ctx = (
967
+ "\n## Category-mismatch seeds (returned alive threads but the draft "
968
+ "SELECTION GATE killed them — i.e. this seed surfaces wrong-audience "
969
+ "subs; rephrase MORE NARROWLY around your project's actual domain, "
970
+ "or drop the seed entirely):\n"
971
+ f"{omitted_topics_report}\n"
972
+ )
973
+
974
+ max_searches = MAX_DISCOVER_SEARCHES
975
+ pick_low = min(2, max_searches)
976
+ pick_high = max_searches
977
+
978
+ return f"""You generate Reddit search queries. The search tool runs in OPAQUE mode this cycle: it dumps every returned thread to a side file for the ripen pipeline and prints back ONLY a one-line summary count. You do NOT see thread content, titles, scores, or URLs. You cannot filter results — the ripen step (numerical delta gate) is the only filter.
979
+
980
+ Topic area: {project_json}
981
+ Content angle: {content_angle}
982
+ {recent_ctx}{top_ctx}{top_topics_ctx}{omitted_topics_ctx}{dud_queries_ctx}
983
+ ## Tool (via Bash)
984
+ - Search: python3 {REDDIT_TOOLS} search "QUERY" --limit 25
985
+ - Search by sub: python3 {REDDIT_TOOLS} search "QUERY" --subreddits AI_Agents,SaaS --time month
986
+ - Search broader time: python3 {REDDIT_TOOLS} search "QUERY" --time month
987
+
988
+ ## What you'll see from the tool
989
+ - stdout: one short line, e.g. `OK: 23 threads passed to ripen pipeline (results not shown)`
990
+ - stderr: `[reddit_search] q="..." raw=25 returned=23 blocked_sub=2 archived=0 locked=0 too_old=0 already_posted_flagged=0 top_score=187 top_comments=48`
991
+
992
+ You can use these counts to decide whether to run another query. You CANNOT
993
+ read the threads themselves. They are already on disk for ripen.
994
+
995
+ ## CRITICAL Bash rules
996
+ - NEVER use run_in_background=true. All commands run foreground.
997
+ - Run AT MOST {max_searches} searches total. Each search dumps up to 25 threads.
998
+ - Do NOT cat, ls, find, or otherwise inspect /tmp or any dump file. The dump
999
+ directory is private to the ripen step. You don't need to know the path.
1000
+ - If rate-limited, stop. The ripen step uses whatever was dumped before the limit.
1001
+
1002
+ ## Steps
1003
+ 1. Pick {pick_low}-{pick_high} concepts from the project's search_topics: {json.dumps(topics_list)}.
1004
+ Rephrase each into a natural Reddit search query (vernacular, pain points).
1005
+ Avoid the dud queries listed above. If a seed appears in the
1006
+ "Category-mismatch seeds" section above, EITHER rephrase it MUCH more
1007
+ narrowly (constrain to your project's exact audience/subreddit) OR skip
1008
+ it and pick a different seed.
1009
+ 2. Run the searches. Watch the stdout/stderr summary for each call. Prefer
1010
+ covering DIFFERENT angles across queries (e.g. don't run 5 near-duplicate
1011
+ rephrasings of one seed).
1012
+ 3. (Optional) If a query returns `returned=0`, you may try ONE more rephrasing.
1013
+ You may also stop early at {pick_low} if your queries returned plenty of
1014
+ results — quality > quota. Never exceed {max_searches} total.
1015
+ 4. Output DONE on its own line.
1016
+
1017
+ ## OUTPUT FORMAT
1018
+ Just output `DONE` on its own line after running your searches. No JSON,
1019
+ no candidate lines, no commentary about thread content (you don't see any).
1020
+ """
1021
+
1022
+
1023
+ def build_draft_prompt(project, config, candidates, top_report, recent_comments,
1024
+ style_assignment=None):
1025
+ """DRAFT phase: write comments only for ripen-survivors.
1026
+
1027
+ `candidates` is the list of decisions that passed the delta gate, each
1028
+ annotated with ripen data (delta_up, delta_comments, composite). Claude
1029
+ fetches each thread, reads context, then writes the best comment.
1030
+
1031
+ 2026-05-19: `style_assignment` is the pick_style_for_post() result the
1032
+ discover phase already wrote into the plan JSON. Forwarding it here so
1033
+ the draft phase enforces the SAME style instead of letting the model
1034
+ free-pick (and overwhelmingly default to pattern_recognizer). When
1035
+ omitted, get_styles_prompt() picks fresh internally (legacy callers).
1036
+ """
1037
+ content_angle = build_content_angle(project, config)
1038
+
1039
+ recent_ctx = ""
1040
+ if recent_comments:
1041
+ # _recent_comment_text handles both legacy str and current (id, content) shapes.
1042
+ snippets = "\n".join(
1043
+ f" - {_recent_comment_text(c)}"
1044
+ for c in recent_comments
1045
+ if _recent_comment_text(c)
1046
+ )
1047
+ recent_ctx = f"\nYour last {len(recent_comments)} comments (don't repeat talking points):\n{snippets}\n"
1048
+
1049
+ top_ctx = ""
1050
+ if top_report:
1051
+ lines = top_report.split("\n")[:20]
1052
+ top_ctx = f"\n## Past performance feedback:\n{chr(10).join(lines)}\n"
1053
+
1054
+ candidate_lines = []
1055
+ for c in candidates:
1056
+ rip = c.get("ripen") or {}
1057
+ delta_info = ""
1058
+ if rip.get("composite") is not None:
1059
+ delta_info = (f" [active: Δup={rip.get('delta_up', 0)},"
1060
+ f" Δcomm={rip.get('delta_comments', 0)},"
1061
+ f" composite={rip.get('composite', 0):.1f} over"
1062
+ f" {rip.get('window_sec', 300)}s]")
1063
+ history_line = ""
1064
+ try:
1065
+ _hb = _render_author_history(
1066
+ "reddit", c.get("thread_author") or "", days=30, limit=5
1067
+ )
1068
+ if _hb:
1069
+ history_line = "\n " + _hb.replace("\n", "\n ")
1070
+ except Exception:
1071
+ pass
1072
+ candidate_lines.append(
1073
+ f" - {c['thread_url']}{delta_info}\n"
1074
+ f" title: {c.get('thread_title', '')}\n"
1075
+ f" suggested style: {c.get('engagement_style', '')}"
1076
+ f"{history_line}"
1077
+ )
1078
+ candidates_block = "\n".join(candidate_lines)
1079
+
1080
+ return f"""You will be handed up to {len(candidates)} Reddit thread(s) that survived the engagement-velocity (ripen) gate. Your job is to draft comments for the ones where you can write something genuinely useful to that audience. Lean toward DRAFTING when the audience overlaps even partially with the project's user, and only OMIT on clear no-bridge cases.
1081
+
1082
+ Content angle: {content_angle}
1083
+ {recent_ctx}{top_ctx}
1084
+ ## Candidate threads (post-ripen):
1085
+ {candidates_block}
1086
+
1087
+ ## SELECTION GATE — soft fits are OK; reject only clear mismatches
1088
+
1089
+ The ripen step proves a thread is alive (people are voting/commenting). It does NOT prove the thread fits the project. Reddit search returns false positives based on raw token overlap (e.g. a search for "no-code app maker" surfaces r/gamemaker shader threads because of the word "maker"; a search for "E2E testing developer productivity QA" can surface a JonBenet murder thread because of how Reddit indexes acronyms). The gate exists to catch those token-overlap false positives, NOT to demand a perfect product fit on every thread.
1090
+
1091
+ For each thread, ask the **bridge test**:
1092
+ "Could a thoughtful person from {project.get('name', 'this project')}'s audience plausibly read my comment and find it useful, regardless of whether they ever try the product?"
1093
+
1094
+ DRAFT it if YES. OMIT only if NO bridge exists at all (clear off-topic / hostile audience / token-overlap false positive). Soft / partial / adjacent fits are GOOD enough — a useful comment in an adjacent sub builds reputation even when no one converts. Don't optimize for purity. Don't artificially cap output. The post-phase will cap actual posting at a reasonable number, so feel free to draft for any thread that passes the soft bridge test.
1095
+
1096
+ DRAFT THESE (broad, inclusive — not just direct hits):
1097
+ - Project: AI test automation (Assrt). Thread: "Playwright selectors keep breaking on every refactor" → direct fit. DRAFT.
1098
+ - Project: AI test automation. Thread: r/QualityAssurance "How are people handling flaky CI tests?" → adjacent topic, same audience. DRAFT.
1099
+ - Project: AI app builder (mk0r). Thread: "I want to prototype a tip calculator without learning React" → direct fit. DRAFT.
1100
+ - Project: AI app builder. Thread: r/SaaS "Indie hackers shipping MVPs in a weekend" → adjacent: same builder mindset. DRAFT (helpful comment about iteration speed).
1101
+ - Project: study tool (Studyly). Thread: r/medschool "best way to handle 200-slide lectures" → direct fit. DRAFT.
1102
+ - Project: study tool. Thread: r/GetStudying "I'm burnt out, can't retain anything" → adjacent: study-habit audience. DRAFT (empathetic comment about active recall, even if no product mention).
1103
+ - Project: home security camera (Cyrano). Thread: r/HomeImprovement "wired vs wireless cameras" → direct fit. DRAFT.
1104
+
1105
+ OMIT THESE (clear no-bridge cases only):
1106
+ - Project: AI test automation. Thread: r/JonBenet "The Absurdity of the BDI Theory" → token-overlap false positive (BDI ≠ a testing acronym here). 1996 murder case audience. NO bridge. OMIT.
1107
+ - Project: AI app builder. Thread: r/BostonSocialClub "Events worth leaving the house for this weekend" → matched on "tried"/"maker". Locals planning weekends. NO bridge. OMIT.
1108
+ - Project: AI app builder. Thread: r/gamemaker "Using surfaces to create paper-like behavior" → GameMaker is a code IDE, not a no-code generator. Audience writes GML shaders. NO bridge. OMIT.
1109
+ - Project: study tool. Thread: r/SubredditDrama "the alternative option is still running" → meta drama, no study angle. OMIT.
1110
+ - Project: study tool. Thread: r/trichotillomania "the trich trance" → medical condition, not studying. OMIT.
1111
+ - Project: study tool. Thread where you've ALREADY commented under any of our accounts (`already_posted=true` or our usernames in the comment list): obvious astroturfing. OMIT.
1112
+ - Any thread where you'd be embarrassed to have your comment shown next to a {project.get('name', 'this project')} link in the same Reddit thread.
1113
+
1114
+ ## Tools (via Bash)
1115
+ - Fetch thread: python3 {REDDIT_TOOLS} fetch "THREAD_URL"
1116
+ - Do NOT run any searches. The threads are already chosen.
1117
+
1118
+ ## CRITICAL Bash rules
1119
+ - NEVER use run_in_background=true. All commands foreground.
1120
+ - Fetch each thread once to read OP and top comments for context. Use the fetched content to apply the SELECTION GATE.
1121
+
1122
+ ## CRITICAL CONTENT RULES (apply only to threads that pass the gate)
1123
+ - Go BIMODAL on length: 1 punchy sentence (<100 chars) OR 4-5 sentences of real substance. Avoid 2-3 sentence middle-ground.
1124
+ - GROUNDING RULE — pick ONE lane per comment:
1125
+ LANE 1 - DISCLOSED STORY: open with a hedge ("hypothetically", "imagine someone running this", "scenario:") then you may invent specifics freely.
1126
+ LANE 2 - NO FABRICATION: every specific (numbers, durations, places, tools) must appear verbatim in the content_angle above. Otherwise drop the specific and pattern-frame ("the part that breaks down is...", "the typical failure mode is...").
1127
+ - VOICE RELATIONSHIP: see the dedicated section below; it governs whether you speak AS the maker or as an outside observer.
1128
+ - NEVER mention product names (fazm, assrt, pieline, cyrano, terminator, mk0r, s4l).
1129
+ - NEVER include URLs or links in your comment text.
1130
+ - Prefer replying to OP (top-level reply). ONE comment per thread.
1131
+ - Statements beat questions. Be authoritative, not inquisitive.
1132
+
1133
+ ## Content rules
1134
+ {get_content_rules("reddit")}
1135
+
1136
+ {get_styles_prompt("reddit", context="posting", assignment=style_assignment)}
1137
+
1138
+ {get_voice_relationship_rule()}
1139
+
1140
+ ## OUTPUT FORMAT
1141
+ For each thread that PASSES the SELECTION GATE, output one JSON object per line:
1142
+ {{"action": "post", "thread_url": "SAME_URL_AS_GIVEN", "reply_to_url": null, "text": "your comment here", "thread_author": "username", "thread_title": "thread title", "engagement_style": "{(style_assignment or {}).get('style') or 'style_name'}", "search_topic": "the seed concept", "new_style": null}}
1143
+
1144
+ For threads that FAIL the gate, simply omit the post JSON above. The shell handles unhandled candidates correctly (Phase 0 salvage on the next cycle re-checks them, and one-strike ripen failure has already pruned dead threads).
1145
+
1146
+ ## OPTIONAL: proposed_excludes (self-improving denylist)
1147
+ When you OMIT a thread because of a recurring CLASS of false-positive (the SUB itself surfaces wrong-audience threads, not just this one thread), you MAY emit a second JSON line for that thread:
1148
+
1149
+ {{"action": "reject", "thread_url": "SAME_URL_AS_GIVEN", "reason": "short reason", "proposed_excludes": ["subreddit:bestofredditorupdates"]}}
1150
+
1151
+ Rules:
1152
+ - proposed_excludes entries MUST use the typed form `subreddit:<slug>` (lowercase, no `r/` prefix). Future shape: `keyword:<word>` is accepted but unused today.
1153
+ - DO emit when: the false-positive is structural — e.g. r/bestofredditorupdates is family drama matching on the word "alternative"; r/hfy is sci-fi narrative matching on the word "spaced"; r/superstonk is GME meme stock matching on "anki" via a random comment. The SUB is the false positive, not just this one post.
1154
+ - DO NOT emit when: this specific thread is bad but the sub is fine in general (e.g. r/{project.get('name', 'project')}'s natural audience like r/medicalschool, r/anki, r/getstudying — never propose excluding a top-performing sub).
1155
+ - Activation gate: a term needs >=2 SEPARATE batches to propose it before it goes live on future Reddit searches. A single mistaken proposal cannot mute a sub. Propose if a thoughtful future cycle would likely agree; otherwise omit.
1156
+ - 1-3 entries per reject is plenty. When in doubt, omit the field. Default (no reject line) is safe.
1157
+
1158
+ Examples of GOOD proposals:
1159
+ - Reject r/bestofredditorupdates "Husband lied" → ["subreddit:bestofredditorupdates"]
1160
+ - Reject r/hfy "The Trial of Humanity" → ["subreddit:hfy"]
1161
+ - Reject r/battlefield6 "GAME UPDATE 1.3.1.0" → ["subreddit:battlefield6"]
1162
+ - Reject r/superstonk "GMERICA acquisition" → ["subreddit:superstonk"]
1163
+ - Reject r/nosleep "cursed doll" → ["subreddit:nosleep"]
1164
+
1165
+ Examples of WRONG proposals (do not emit):
1166
+ - Reject a specific r/nursing thread because OP is venting → DO NOT exclude r/nursing (it's our target audience; just omit this thread)
1167
+ - Reject one r/anki thread that's off-topic → DO NOT exclude r/anki (core ICP)
1168
+
1169
+ Output DONE after all JSONs (both post and reject lines, in any order). Do NOT narrate. Fetch, gate, draft-or-reject, output JSONs, DONE.
1170
+ """
1171
+
1172
+
1173
+ def parse_candidates(output):
1174
+ """Extract action=candidate JSON objects from Claude's discover output."""
1175
+ candidates = []
1176
+ seen_urls = set()
1177
+ for match in re.finditer(r'\{[^{}]*?"action"\s*:\s*"candidate"[^{}]*?\}', output):
1178
+ try:
1179
+ c = json.loads(match.group())
1180
+ url = c.get("thread_url", "")
1181
+ if url and url not in seen_urls:
1182
+ candidates.append(c)
1183
+ seen_urls.add(url)
1184
+ except (json.JSONDecodeError, TypeError):
1185
+ continue
1186
+ return candidates
1187
+
1188
+
1189
+ def build_prompt(project, config, limit, top_report, recent_comments,
1190
+ top_topics_report="", dud_queries_report=""):
1191
+ """Build prompt for Claude to search, evaluate, and draft replies (no posting).
1192
+
1193
+ `dud_queries_report` is a JSON list of recent zero-result queries for this
1194
+ project (see get_dud_reddit_queries). When non-empty, an anti-list block is
1195
+ inserted alongside the positive top_topics_report so the LLM is steered
1196
+ away from phrasings that have already proven flat in the last 7 days.
1197
+ """
1198
+ content_angle = build_content_angle(project, config)
1199
+
1200
+ # DB-backed search_topics (post 2026-05-27 config.json removal).
1201
+ topics_list = list(topics_for_project(project.get("name") or ""))
1202
+
1203
+ project_json = json.dumps({
1204
+ "name": project.get("name"),
1205
+ "description": project.get("description"),
1206
+ "search_topics": topics_list,
1207
+ }, indent=2)
1208
+
1209
+ recent_ctx = ""
1210
+ if recent_comments:
1211
+ # _recent_comment_text handles both legacy str and current (id, content) shapes.
1212
+ snippets = "\n".join(
1213
+ f" - {_recent_comment_text(c)}"
1214
+ for c in recent_comments
1215
+ if _recent_comment_text(c)
1216
+ )
1217
+ recent_ctx = f"""
1218
+ Your last {len(recent_comments)} comments (don't repeat talking points):
1219
+ {snippets}
1220
+ """
1221
+
1222
+ top_ctx = ""
1223
+ if top_report:
1224
+ lines = top_report.split("\n")[:30]
1225
+ top_ctx = f"""
1226
+ ## Feedback from past performance:
1227
+ {chr(10).join(lines)}
1228
+ """
1229
+
1230
+ top_topics_ctx = ""
1231
+ if top_topics_report:
1232
+ top_topics_ctx = f"""
1233
+ ## Past top-performing search topics (sorted by clicks DESC first, then composite-scored: clicks*100 + comments + upvotes)
1234
+ CLICKS ARE THE PRIORITY SIGNAL. Any topic with `clicks > 0` is GOLD TIER, clicks
1235
+ are the only metric that proves our reply drove someone to actually visit the
1236
+ project's link. Comments and upvotes are vanity. If a project in your draft set
1237
+ has a gold-tier topic in this list, mimic ITS framing (subreddit fit, keyword
1238
+ cluster, specificity) FIRST before falling back to other styles. The Δpost /
1239
+ Δskip columns also matter: high Δskip + few posts = topic surfaces alive but
1240
+ off-topic threads (reword more narrowly); low Δskip + few posts = dead supply
1241
+ (drop the topic). Optimize the entire pipeline for clicks; everything else is
1242
+ leading indicators.
1243
+
1244
+ {top_topics_report}
1245
+
1246
+ If none of the top topics match this run's angle, pick any seed from the
1247
+ project's search_topics list. New topics with 0 clicks are fine — we still need
1248
+ to explore — but a gold-tier topic that fits should beat any unproven topic.
1249
+ """
1250
+
1251
+ # NEGATIVE-signal feedback: queries that have produced zero post-filter
1252
+ # candidates in the last 7 days. Mirrors twitter_search_attempts /
1253
+ # top_dud_twitter_queries.py but speaks in terms of (query, subreddits)
1254
+ # since Reddit search is sub-scoped. Keep this list short — Reddit is
1255
+ # more keyword-rigid than Twitter, so even "the same phrase but in a
1256
+ # different sub" can still produce results.
1257
+ dud_queries_ctx = ""
1258
+ if dud_queries_report and dud_queries_report.strip() not in ("[]", ""):
1259
+ dud_queries_ctx = f"""
1260
+ ## Dead queries (DO NOT redraft these — flat for the last 7 days):
1261
+ {dud_queries_report}
1262
+
1263
+ Each entry is a (query, subreddits) phrasing that has returned ZERO usable
1264
+ threads on every recent attempt. Pick fresh wording, a different angle, or a
1265
+ different subreddit slate. Reusing an exact dead phrasing wastes a search
1266
+ slot and burns rate-limit budget for no upside.
1267
+ """
1268
+
1269
+ return f"""Find {limit} Reddit threads where you can add genuine value as someone with expertise in {project.get('name', 'general')}.
1270
+
1271
+ Topic area: {project_json}
1272
+ Content angle: {content_angle}
1273
+
1274
+ Your role: You are a knowledgeable observer of this space. Your comments should be indistinguishable
1275
+ from any other knowledgeable person in the conversation. NEVER mention product names, links, websites,
1276
+ or GitHub repos. Product mentions happen ONLY later in the reply pipeline when people respond to you.
1277
+
1278
+ CRITICAL: every comment picks ONE of two lanes (see the GROUNDING RULE below).
1279
+ LANE 1 - DISCLOSED STORY: open with a hedge ("hypothetically", "imagine someone running this",
1280
+ "scenario:", "say a friend tried") and you may then invent any specifics.
1281
+ LANE 2 - NO FABRICATION: every specific (numbers, durations, places, course names, headcount,
1282
+ named tools) must appear verbatim in this project's content_angle / voice / messaging in
1283
+ config.json. Otherwise drop the specific or pattern-frame ("the part that breaks down is...",
1284
+ "the typical failure mode is...").
1285
+ Never present an invented specific as a personal first-hand claim without a Lane 1 opener.
1286
+ VOICE RELATIONSHIP: see the dedicated section below; it governs whether you speak AS the maker or as an outside observer.
1287
+ {recent_ctx}{top_ctx}{top_topics_ctx}{dud_queries_ctx}
1288
+ {get_styles_prompt("reddit", context="posting", assignment=style_assignment)}
1289
+
1290
+ {get_voice_relationship_rule()}
1291
+
1292
+ ## Tools (via Bash) - ALWAYS foreground, NEVER run_in_background
1293
+ - Search (global, by relevance): python3 {REDDIT_TOOLS} search "QUERY" --limit 15
1294
+ - Search (scoped to specific subs): python3 {REDDIT_TOOLS} search "QUERY" --subreddits AI_Agents,SaaS,smallbusiness --time month
1295
+ - Search (broader time range): python3 {REDDIT_TOOLS} search "QUERY" --time month
1296
+ - Fetch thread: python3 {REDDIT_TOOLS} fetch "THREAD_URL"
1297
+ - Check dedup: python3 {REDDIT_TOOLS} already-posted "THREAD_URL"
1298
+
1299
+ Search defaults to sort=relevance and time=week. Use --time month for broader results. Use --subreddits for targeted sub searches.
1300
+
1301
+ ## Delta gating (new 2026-05-05)
1302
+ Each thread in the search JSON now carries delta fields populated from a
1303
+ persistent reddit_thread_snapshots table:
1304
+ - sightings: how many search cycles have surfaced this exact thread
1305
+ - delta_score: upvote change since first_seen_at
1306
+ - delta_comments: comment change since first_seen_at
1307
+ - delta_window_min: minutes between first_seen_at and now
1308
+ - first_seen_at: when we first saw this thread
1309
+
1310
+ Use these to PREFER threads that are still picking up momentum since we last
1311
+ saw them (positive delta_score with recent activity) over stale threads that
1312
+ peaked hours ago. A thread with sightings>=2 and delta_score<=0 over 60+ min
1313
+ is going cold; skip it for a fresher candidate.
1314
+
1315
+ ## CRITICAL Bash rules
1316
+ - NEVER use run_in_background=true. All bash commands must run foreground and return quickly (under 20s each).
1317
+ - NEVER use `sleep` commands. NEVER run `sleep N && cat ...` to wait for background tasks.
1318
+ - NEVER pipe multiple searches with `&` or `&&`. Run ONE search command at a time, wait for output, then decide next step.
1319
+ - If you see `{{"error": "rate_limited", ...}}` in the output, DO NOT retry that command. Skip it and move on.
1320
+ Rate limits are global. Waiting won't help this session. Use whatever search results you already have.
1321
+ - If you can't find enough threads after 5 search attempts total, draft fewer posts (even 1-2 is fine) rather than searching more.
1322
+
1323
+ ## CRITICAL CONTENT RULES
1324
+ - Study the style performance data in the feedback report below. Pick styles with the highest avg upvotes.
1325
+ - Go BIMODAL on length: either 1 punchy sentence (<100 chars) or 4-5 sentences of real substance. AVOID the 2-3 sentence middle.
1326
+ - GROUNDING has TWO valid forms. Lane 1: open with a disclosure phrase ("hypothetically", "imagine someone running this", "scenario:") and then invent freely. Lane 2: every specific (numbers/places/programs) must be grounded in content_angle/voice/messaging in config.json, or drop the specific and pattern-frame ("the part that breaks down is...", "the typical failure mode is..."). Never present an invented specific as a personal first-hand claim without a Lane 1 opener.
1327
+ - VOICE: see the VOICE RELATIONSHIP section below; it governs whether you speak AS the maker or as an outside observer based on the matched project's voice_relationship field.
1328
+ - NEVER mention product names (fazm, assrt, pieline, cyrano, terminator, mk0r, s4l).
1329
+ - NEVER include URLs or links.
1330
+ - Prefer replying to OP (top-level reply).
1331
+ - ONE comment per thread.
1332
+ - Statements beat questions. Be authoritative, not inquisitive.
1333
+
1334
+ ## Steps
1335
+ 1. Pick 2 concepts from the project's search_topics list: {json.dumps(topics_list)}.
1336
+ These are shared concept seeds across platforms (Twitter, Reddit, GitHub, LinkedIn). Some
1337
+ phrases are tuned for other platforms — rephrase each into natural Reddit search terms
1338
+ (vernacular, problem-framing, pain points) before running the search. Skip already_posted=true threads.
1339
+ 2. Pick {limit} best threads where you have genuine expertise to contribute. Prefer replying to OP. Fetch each one.
1340
+ 3. Draft the comment following the CRITICAL CONTENT RULES above. Quality over quantity.
1341
+ 4. Output each as a JSON object, then DONE. Include the seed concept you used in "search_topic".
1342
+
1343
+ ## Content rules
1344
+ {get_content_rules("reddit")}
1345
+
1346
+ ## CRITICAL OUTPUT FORMAT
1347
+ You MUST output each draft as a raw JSON object on its own line. No commentary before or after. Example:
1348
+
1349
+ {{"action": "post", "thread_url": "https://old.reddit.com/r/sub/comments/abc/title/", "reply_to_url": null, "text": "your comment here", "thread_author": "username", "thread_title": "thread title", "engagement_style": "critic", "search_topic": "the seed concept you picked", "new_style": null}}
1350
+
1351
+ If, and ONLY if, none of the listed styles fits, you may invent one. Set "engagement_style" to your snake_case name AND replace `"new_style": null` with `{{"description": "...", "example": "...", "note": "...", "why_existing_didnt_fit": "..."}}`. Inventing should be rare; prefer an existing style if it's even 80% right.
1352
+
1353
+ After all {limit} JSON objects, output DONE on its own line.
1354
+ Do NOT describe what you are doing. Do NOT narrate. Just search, draft, output JSON, DONE.
1355
+ """
1356
+
1357
+
1358
+ def run_claude(prompt, timeout=600):
1359
+ """Run claude -p in bare mode with Bash tool only (no MCP needed).
1360
+
1361
+ Streams output in real time to stderr (picked up by tee in the shell wrapper)
1362
+ while collecting the full output for JSON parsing.
1363
+ """
1364
+ import time as _time
1365
+ usage = {"input_tokens": 0, "output_tokens": 0, "cache_read": 0, "cache_create": 0, "cost_usd": 0.0}
1366
+ session_id = str(uuid.uuid4())
1367
+ usage["session_id"] = session_id
1368
+ # Set in this process's env so subsequent log_post → reddit_tools.py inherits it
1369
+ os.environ["CLAUDE_SESSION_ID"] = session_id
1370
+ cmd = ["claude", "-p", "--session-id", session_id, "--output-format", "stream-json", "--verbose"]
1371
+ cmd += ["--tools", "Bash,Read"]
1372
+ env = os.environ.copy()
1373
+ env.pop("ANTHROPIC_API_KEY", None) # ensure claude uses OAuth, not API key
1374
+ try:
1375
+ proc = subprocess.Popen(
1376
+ cmd, env=env, stdin=subprocess.PIPE,
1377
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
1378
+ )
1379
+ proc.stdin.write(prompt)
1380
+ proc.stdin.close()
1381
+ collected = []
1382
+ deadline = _time.time() + timeout
1383
+ import select
1384
+ while True:
1385
+ remaining = deadline - _time.time()
1386
+ if remaining <= 0:
1387
+ proc.kill()
1388
+ return False, "TIMEOUT", usage
1389
+ ready, _, _ = select.select([proc.stdout], [], [], min(remaining, 30))
1390
+ if ready:
1391
+ line = proc.stdout.readline()
1392
+ if not line:
1393
+ break
1394
+ collected.append(line)
1395
+ # Stream meaningful events to stderr so tee/log captures them
1396
+ try:
1397
+ evt = json.loads(line.strip())
1398
+ etype = evt.get("type", "")
1399
+ if etype == "assistant":
1400
+ msg = evt.get("message", {})
1401
+ for block in msg.get("content", []):
1402
+ if block.get("type") == "tool_use":
1403
+ print(f"[post_reddit] tool: {block.get('name','')} | {str(block.get('input',{}).get('command',''))[:120]}", file=sys.stderr, flush=True)
1404
+ elif block.get("type") == "text" and block.get("text","").strip():
1405
+ txt = block["text"].strip()[:200]
1406
+ print(f"[post_reddit] {txt}", file=sys.stderr, flush=True)
1407
+ elif etype == "user":
1408
+ # Tool results land in user messages. reddit_tools.py
1409
+ # search emits a `[reddit_search] q=... raw=N returned=R`
1410
+ # line on its own stderr, which Claude Code's Bash tool
1411
+ # bundles into the tool_result content. Forward those
1412
+ # markers into our log so enrichPostCommentsRedditRuns
1413
+ # can derive raw/passed pills per run.
1414
+ msg = evt.get("message", {})
1415
+ for block in msg.get("content", []):
1416
+ if block.get("type") != "tool_result":
1417
+ continue
1418
+ content = block.get("content", "")
1419
+ if isinstance(content, list):
1420
+ content = "".join(c.get("text","") for c in content if isinstance(c, dict))
1421
+ for ln in str(content).splitlines():
1422
+ if ln.startswith("[reddit_search]"):
1423
+ print(ln, file=sys.stderr, flush=True)
1424
+ elif etype == "result":
1425
+ print(f"[post_reddit] done: cost=${evt.get('total_cost_usd',0):.4f}", file=sys.stderr, flush=True)
1426
+ except (json.JSONDecodeError, TypeError):
1427
+ print(f"[post_reddit] {line.rstrip()[:200]}", file=sys.stderr, flush=True)
1428
+ elif proc.poll() is not None:
1429
+ # Process ended, read remaining
1430
+ rest = proc.stdout.read()
1431
+ if rest:
1432
+ collected.append(rest)
1433
+ break
1434
+ else:
1435
+ print(f"[post_reddit] ... still running ({int(_time.time() - (deadline - timeout))}s)", file=sys.stderr, flush=True)
1436
+ proc.wait()
1437
+ # Parse stream-json: collect ALL text blocks (not just the final result)
1438
+ # JSON post decisions can appear in any assistant message, not just the last one
1439
+ all_text_parts = []
1440
+ for line_str in collected:
1441
+ line_str = line_str.strip()
1442
+ if not line_str:
1443
+ continue
1444
+ try:
1445
+ event = json.loads(line_str)
1446
+ etype = event.get("type", "")
1447
+ if etype == "assistant":
1448
+ for block in event.get("message", {}).get("content", []):
1449
+ if block.get("type") == "text":
1450
+ all_text_parts.append(block["text"])
1451
+ elif etype == "result":
1452
+ if event.get("result"):
1453
+ all_text_parts.append(event["result"])
1454
+ usage["cost_usd"] = event.get("total_cost_usd", 0.0)
1455
+ u = event.get("usage", {})
1456
+ usage["input_tokens"] = u.get("input_tokens", 0)
1457
+ usage["output_tokens"] = u.get("output_tokens", 0)
1458
+ usage["cache_read"] = u.get("cache_read_input_tokens", 0)
1459
+ usage["cache_create"] = u.get("cache_creation_input_tokens", 0)
1460
+ except (json.JSONDecodeError, TypeError):
1461
+ pass
1462
+ text_output = "\n".join(all_text_parts) if all_text_parts else "".join(collected)
1463
+ stderr_out = proc.stderr.read() if proc.stderr else ""
1464
+ try:
1465
+ log_args = [PYTHON, os.path.join(REPO_DIR, "scripts", "log_claude_session.py"),
1466
+ "--session-id", session_id, "--script", "post_reddit"]
1467
+ orch_cost = usage.get("cost_usd")
1468
+ if isinstance(orch_cost, (int, float)) and orch_cost > 0:
1469
+ log_args.extend(["--orchestrator-cost-usd", str(orch_cost)])
1470
+ subprocess.run(log_args, capture_output=True, text=True, timeout=30)
1471
+ except Exception as e:
1472
+ print(f"[post_reddit] WARNING: log_claude_session failed: {e}", file=sys.stderr)
1473
+ return proc.returncode == 0, text_output + stderr_out, usage
1474
+ except Exception as e:
1475
+ return False, str(e), usage
1476
+
1477
+
1478
+ def _acquire_browser_lease(timeout: int = 600, ttl: int = 90):
1479
+ """Acquire the reddit-browser lease for THIS row's CDP work.
1480
+
1481
+ Per-post acquire (not per-cycle, per-phase) is the load-bearing migration
1482
+ shipped 2026-05-13. Before this change, run-reddit-search.sh held the
1483
+ lease around the entire `--phase post` invocation, so a 10-row salvage
1484
+ batch monopolised the browser for ~30 min (10 × ~45s post + 9 × 180s
1485
+ between-post sleep) while peers (link-edit-reddit, dm-outreach-reddit,
1486
+ engage-reddit, engage-dm-replies-reddit) sat blocked. Pushing acquire/
1487
+ release down to per-row means lease is only held during the actual CDP
1488
+ posting work (~45s incl. retries), and the 3-min between-post sleeps
1489
+ happen unlocked.
1490
+
1491
+ The MCP wrapper's auto-heartbeat (PreToolUse/PostToolUse hooks bumping
1492
+ `expires_at`) keeps the lease alive during real browser activity, so no
1493
+ manual heartbeat is needed here. Default TTL of 90s leaves enough headroom
1494
+ for post_via_cdp's 5-attempt retry loop with internal sleeps.
1495
+
1496
+ Returns (ok: bool, msg: str). msg is the helper's last stdout line on
1497
+ success, or BUSY/ERROR diagnostic on failure.
1498
+ """
1499
+ try:
1500
+ r = subprocess.run(
1501
+ [PYTHON, REDDIT_BROWSER_LOCK, "acquire",
1502
+ "--timeout", str(timeout), "--ttl", str(ttl)],
1503
+ capture_output=True, text=True, timeout=timeout + 30,
1504
+ )
1505
+ out_lines = [ln for ln in (r.stdout or "").strip().splitlines() if ln]
1506
+ last = out_lines[-1] if out_lines else ""
1507
+ if r.returncode == 0 and last.startswith("OK"):
1508
+ return True, last
1509
+ return False, last or (r.stderr or "").strip()[:200] or f"rc={r.returncode}"
1510
+ except subprocess.TimeoutExpired:
1511
+ return False, "subprocess_timeout"
1512
+ except Exception as e:
1513
+ return False, f"exception:{e}"
1514
+
1515
+
1516
+ def _release_browser_lease() -> None:
1517
+ """Release the reddit-browser lease. Idempotent (NOT_HELD is fine).
1518
+
1519
+ Always called in a `finally` so peers can acquire during the 3-min
1520
+ between-post sleep even if post_via_cdp raised. The lease auto-decays
1521
+ after 90s of idleness anyway (no MCP heartbeats while we're sleeping),
1522
+ but explicit release frees peers immediately.
1523
+ """
1524
+ try:
1525
+ subprocess.run(
1526
+ [PYTHON, REDDIT_BROWSER_LOCK, "release"],
1527
+ capture_output=True, text=True, timeout=10,
1528
+ )
1529
+ except Exception:
1530
+ pass
1531
+
1532
+
1533
+ def post_via_cdp(thread_url, reply_to_url, text):
1534
+ """Post a comment or reply via CDP. Returns parsed JSON result."""
1535
+ # 5 attempts with lock-aware backoff. Lock contention (engage.sh or other
1536
+ # reddit-agent sessions mid-work) gets longer waits since those sessions
1537
+ # have natural gaps every 20-60s between replies. Other errors use a short
1538
+ # retry in case of transient network issues.
1539
+ MAX_ATTEMPTS = 5
1540
+ for attempt in range(MAX_ATTEMPTS):
1541
+ try:
1542
+ target = reply_to_url or thread_url
1543
+ cmd = [PYTHON, REDDIT_BROWSER, "reply" if reply_to_url else "post-comment", target, text]
1544
+ proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
1545
+ cdp_out = proc.stdout.strip()
1546
+ if not cdp_out:
1547
+ # Full stderr (was [:200] until 2026-05-14; truncation hid the
1548
+ # actual exception class/message, leaving cdp_no_response
1549
+ # failures undiagnosable in postmortems).
1550
+ _stderr_full = proc.stderr or ""
1551
+ print(f"[post_reddit] CDP attempt {attempt + 1}: no stdout. stderr:\n{_stderr_full}")
1552
+ if attempt < MAX_ATTEMPTS - 1:
1553
+ time.sleep(10)
1554
+ continue
1555
+ result = json.loads(cdp_out)
1556
+ if result.get("ok"):
1557
+ return result
1558
+ err = result.get("error", "unknown")
1559
+ print(f"[post_reddit] CDP attempt {attempt + 1}: {err}")
1560
+ if err in ("thread_not_found", "thread_locked", "thread_archived", "already_replied", "not_logged_in", "account_blocked_in_sub"):
1561
+ return result # Don't retry these
1562
+ # Lock contention: another reddit-agent session is actively working.
1563
+ # Back off in increasing intervals to catch a natural gap between
1564
+ # their reply drafts. Total wait across 5 attempts: ~2.5 min.
1565
+ if "locked by session" in err.lower():
1566
+ if attempt < MAX_ATTEMPTS - 1:
1567
+ wait = [20, 35, 50, 60][attempt]
1568
+ print(f"[post_reddit] CDP waiting {wait}s for browser lock to free...")
1569
+ time.sleep(wait)
1570
+ continue
1571
+ # Any other error: short sleep then retry
1572
+ if attempt < MAX_ATTEMPTS - 1:
1573
+ time.sleep(5)
1574
+ except (subprocess.TimeoutExpired, subprocess.CalledProcessError, json.JSONDecodeError) as e:
1575
+ print(f"[post_reddit] CDP attempt {attempt + 1} exception: {e}")
1576
+ if attempt < MAX_ATTEMPTS - 1:
1577
+ time.sleep(10)
1578
+ return {"ok": False, "error": "all_attempts_failed"}
1579
+
1580
+
1581
+ def log_post(thread_url, permalink, text, project_name, thread_author, thread_title, reddit_username, engagement_style=None, search_topic=None, generation_trace_path=None, link_source=None):
1582
+ """Log a successful post to the database. Returns the new post_id, or None.
1583
+
1584
+ generation_trace_path (2026-05-12): optional path to a JSON file with
1585
+ the few-shot context Claude saw before drafting (top_performers
1586
+ report, recent comments, top_search_topics). Forwarded to
1587
+ reddit_tools.py as --generation-trace and stored in
1588
+ posts.generation_trace JSONB. File-based (not inline) to keep argv
1589
+ short. Same trace blob is reused for every post produced from this
1590
+ Claude draft, since they all share the same few-shot context.
1591
+
1592
+ link_source (2026-05-17): optional string written to posts.link_source so
1593
+ the dashboard can break out audience-page traffic (e.g.
1594
+ 'audience_page:founder-ghostwriting') from generic homepage links. Set by
1595
+ the post loop after URL wrapping based on which curated landing page
1596
+ (if any) Claude baked into the reply text.
1597
+ """
1598
+ try:
1599
+ cmd = [PYTHON, REDDIT_TOOLS, "log-post",
1600
+ thread_url, permalink or "", text, project_name,
1601
+ thread_author, thread_title,
1602
+ "--account", reddit_username]
1603
+ if engagement_style:
1604
+ cmd.extend(["--engagement-style", engagement_style])
1605
+ if search_topic:
1606
+ cmd.extend(["--search-topic", search_topic])
1607
+ if generation_trace_path:
1608
+ cmd.extend(["--generation-trace", generation_trace_path])
1609
+ if link_source:
1610
+ cmd.extend(["--link-source", link_source])
1611
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
1612
+ try:
1613
+ payload = json.loads((result.stdout or "").strip())
1614
+ return payload.get("post_id")
1615
+ except (json.JSONDecodeError, AttributeError, TypeError):
1616
+ return None
1617
+ except Exception as e:
1618
+ print(f"[post_reddit] WARNING: log-post failed: {e}")
1619
+ return None
1620
+
1621
+
1622
+ def bump_campaigns(table, row_id, campaign_ids):
1623
+ """Attach a row in {posts,replies,dm_messages} to its applied campaigns."""
1624
+ if not row_id or not campaign_ids:
1625
+ return
1626
+ bump = os.path.join(REPO_DIR, "scripts", "campaign_bump.py")
1627
+ for cid in campaign_ids:
1628
+ try:
1629
+ subprocess.run(
1630
+ [PYTHON, bump,
1631
+ "--table", table, "--id", str(row_id), "--campaign-id", str(cid)],
1632
+ capture_output=True, text=True, timeout=15,
1633
+ )
1634
+ except Exception as e:
1635
+ print(f"[post_reddit] WARNING: campaign_bump failed (id={row_id} c={cid}): {e}")
1636
+
1637
+
1638
+ def parse_post_decisions(output):
1639
+ """Extract JSON post decisions from Claude's output, deduplicated by thread_url."""
1640
+ decisions = []
1641
+ seen_urls = set()
1642
+ for match in re.finditer(r'\{[^{}]*?"action"\s*:\s*"post"[^{}]*?\}', output):
1643
+ try:
1644
+ decision = json.loads(match.group())
1645
+ url = decision.get("thread_url", "")
1646
+ if decision.get("text") and url and url not in seen_urls:
1647
+ decisions.append(decision)
1648
+ seen_urls.add(url)
1649
+ except (json.JSONDecodeError, TypeError):
1650
+ continue
1651
+ return decisions
1652
+
1653
+
1654
+ def parse_reject_decisions(output):
1655
+ """Extract action='reject' JSON lines from the draft prompt (2026-05-11).
1656
+
1657
+ Reject lines may carry a `proposed_excludes` array of typed exclude terms
1658
+ (`subreddit:<slug>` or `keyword:<word>`). These get fed to
1659
+ project_excludes.propose() so the 2-batch activation gate accumulates
1660
+ them without auto-trusting a single false rejection. The "thread itself
1661
+ is bad" reasons (no proposed_excludes) are still parsed for audit but
1662
+ have no side effect on the denylist.
1663
+
1664
+ Multiline-safe regex (the `proposed_excludes` array may contain commas
1665
+ and span lines if Claude pretty-prints). Each JSON parse failure is
1666
+ silently dropped — the JSON shape stamp `"action":"reject"` is the only
1667
+ discriminator, so reject lines that don't parse are simply ignored.
1668
+ """
1669
+ rejects = []
1670
+ seen_urls = set()
1671
+ for match in re.finditer(
1672
+ r'\{[^{}]*?"action"\s*:\s*"reject"[^{}]*?\}',
1673
+ output, flags=re.DOTALL,
1674
+ ):
1675
+ try:
1676
+ r = json.loads(match.group())
1677
+ url = r.get("thread_url", "")
1678
+ if not url or url in seen_urls:
1679
+ continue
1680
+ rejects.append(r)
1681
+ seen_urls.add(url)
1682
+ except (json.JSONDecodeError, TypeError):
1683
+ continue
1684
+ return rejects
1685
+
1686
+
1687
+ def _propose_excludes_from_rejects(rejects, project_name, batch_id, candidates_by_url):
1688
+ """Forward Claude-proposed excludes into project_search_excludes (reddit).
1689
+
1690
+ Mirrors the twitter cycle's behavior at run-twitter-cycle.sh:929-966:
1691
+ each proposed term is normalize/validated by project_excludes.propose()
1692
+ against the platform's allowed kinds and the project's reserved-keyword
1693
+ list. The activation gate (>=2 distinct batch_ids) is enforced inside
1694
+ propose(); a single false-rejection in this cycle cannot mute a sub.
1695
+
1696
+ Best-effort: import / DB failures are logged once and the post pipeline
1697
+ continues. The propose() side effect is not on the critical path for
1698
+ posting; if it dies, the only consequence is that we don't accumulate
1699
+ new exclude proposals this cycle.
1700
+
1701
+ Returns a dict with counters for logging.
1702
+ """
1703
+ if not rejects or not project_name:
1704
+ return {"rejects_seen": len(rejects or []), "proposed": 0,
1705
+ "inserted": 0, "bumped": 0, "rejected": 0, "active_now": 0}
1706
+ counters = {"rejects_seen": len(rejects), "proposed": 0,
1707
+ "inserted": 0, "bumped": 0, "rejected": 0, "active_now": 0}
1708
+ try:
1709
+ scripts_dir = os.path.dirname(os.path.abspath(__file__))
1710
+ if scripts_dir not in sys.path:
1711
+ sys.path.insert(0, scripts_dir)
1712
+ import project_excludes as pe
1713
+ except Exception as e:
1714
+ print(f"[post_reddit] WARN: project_excludes import failed: {e}",
1715
+ file=sys.stderr, flush=True)
1716
+ return counters
1717
+
1718
+ for r in rejects:
1719
+ url = r.get("thread_url") or ""
1720
+ terms = r.get("proposed_excludes") or []
1721
+ if not isinstance(terms, list):
1722
+ continue
1723
+ reason = (r.get("reason") or "")[:500]
1724
+ cand = candidates_by_url.get(url) or {}
1725
+ # candidate_id is the reddit_candidates.id for audit purposes; falls
1726
+ # back to None when the candidate object doesn't carry it through.
1727
+ cand_id = cand.get("id") or cand.get("candidate_id")
1728
+ for t in terms[:5]: # cap so a runaway prompt can't spam the table
1729
+ counters["proposed"] += 1
1730
+ try:
1731
+ out = pe.propose(
1732
+ "reddit", project_name, t,
1733
+ candidate_id=cand_id,
1734
+ batch_id=batch_id,
1735
+ reason=reason or None,
1736
+ )
1737
+ except Exception as e:
1738
+ print(f"[post_reddit] WARN: propose failed term={t!r}: {e}",
1739
+ file=sys.stderr, flush=True)
1740
+ counters["rejected"] += 1
1741
+ continue
1742
+ action = out.get("action") or ""
1743
+ if not out.get("ok"):
1744
+ counters["rejected"] += 1
1745
+ elif action == "inserted":
1746
+ counters["inserted"] += 1
1747
+ elif action in ("bumped", "duplicate_batch"):
1748
+ counters["bumped"] += 1
1749
+ if out.get("active"):
1750
+ counters["active_now"] += 1
1751
+ return counters
1752
+
1753
+
1754
+ # Stopwords stripped before computing query<->thread topical overlap. Kept small
1755
+ # and generic: these are the high-frequency English glue words that cause the
1756
+ # Reddit `sort=relevance` leak (a chatty natural-language query like "claude
1757
+ # artifacts built me a little tool to track my habits" matches an unrelated BORU
1758
+ # thread purely on shared words like "me", "to", "my", "a", "little"). We do NOT
1759
+ # strip domain words here — only structural filler — so the surviving overlap is
1760
+ # a real topical signal.
1761
+ _OVERLAP_STOPWORDS = frozenset("""
1762
+ a an the and or but if then else of to in on at for with without from by about into
1763
+ over under again further is are was were be been being am do does did doing have has
1764
+ had having i me my myself we our ours you your yours he him his she her it its they
1765
+ them their this that these those what which who whom whose how when where why all any
1766
+ both each few more most other some such no nor not only own same so than too very can
1767
+ will just dont don't should now get got make made want need like really actually
1768
+ something someone anyone everyone thing things stuff lot lots little bit kind sort
1769
+ """.split())
1770
+
1771
+ # Token must be >=3 chars to count toward overlap (drops "ai" etc.? no — keep 2+
1772
+ # but exclude pure stopwords). We use 2 to keep short domain tokens like "db", "os".
1773
+ _OVERLAP_MIN_LEN = 2
1774
+
1775
+
1776
+ def _overlap_tokens(text):
1777
+ """Lowercase alphanumeric tokens of length >= _OVERLAP_MIN_LEN, minus stopwords."""
1778
+ if not text:
1779
+ return set()
1780
+ toks = re.findall(r"[a-z0-9]+", text.lower())
1781
+ return {t for t in toks if len(t) >= _OVERLAP_MIN_LEN and t not in _OVERLAP_STOPWORDS}
1782
+
1783
+
1784
+ def _topical_overlap(query, title, selftext):
1785
+ """Fraction of distinct content tokens in `query` that also appear in the
1786
+ thread's title+selftext. 0.0 = no shared topical token (likely relevance-sort
1787
+ garbage), 1.0 = every query content word is present in the thread.
1788
+
1789
+ This is a *soft signal* used only to rank/prioritize candidates, never to hard-
1790
+ drop them — per the conservative directive, we isolate + surface the garbage
1791
+ rather than silently filtering it.
1792
+ """
1793
+ q = _overlap_tokens(query)
1794
+ if not q:
1795
+ return 0.0
1796
+ body = _overlap_tokens((title or "") + " " + (selftext or ""))
1797
+ if not body:
1798
+ return 0.0
1799
+ return len(q & body) / len(q)
1800
+
1801
+
1802
+ def _discover_iteration(args, config, reddit_username, already_picked):
1803
+ """DISCOVER phase: search and select threads. No drafting.
1804
+
1805
+ Returns {project_name, decisions: [candidates], cost, session_id} where
1806
+ each candidate has thread_url, title, author, search_topic but NO text
1807
+ field (drafting happens in the draft phase). cost is always 0.0 and
1808
+ session_id None: as of 2026-06-01 discover is fully programmatic (Python
1809
+ builds the query bank and runs reddit_tools.cmd_search directly; no Claude
1810
+ session). Uses `decisions` key for downstream-phase compatibility.
1811
+ """
1812
+ if args.project:
1813
+ project = None
1814
+ for p in config.get("projects", []):
1815
+ if p["name"].lower() == args.project.lower():
1816
+ project = p
1817
+ break
1818
+ if not project:
1819
+ print(f"[post_reddit] ERROR: project '{args.project}' not found")
1820
+ return None
1821
+ else:
1822
+ project = pick_project("reddit", exclude=already_picked)
1823
+ if not project:
1824
+ print(f"[post_reddit] No eligible project left (already picked: {already_picked})")
1825
+ return None
1826
+
1827
+ project_name = project.get("name", "general")
1828
+ print(f"[post_reddit] Project: {project_name}")
1829
+
1830
+ # 2026-05-11: surface the per-project sub denylist for visibility in run
1831
+ # logs (twitter cycle does the equivalent at run-twitter-cycle.sh:410).
1832
+ # The actual *enforcement* happens server-side in reddit_tools._load_
1833
+ # comment_blocked_subs via the S4L_REDDIT_PROJECT env var set below.
1834
+ # mark_used stamps last_used_at on every active term so decay (60d
1835
+ # unused → prune) only fires on terms that truly stopped contributing.
1836
+ try:
1837
+ scripts_dir = os.path.dirname(os.path.abspath(__file__))
1838
+ if scripts_dir not in sys.path:
1839
+ sys.path.insert(0, scripts_dir)
1840
+ import project_excludes as _pe
1841
+ _split = _pe.active_excludes_by_kind("reddit", project_name)
1842
+ _active_subs = _split.get("subreddit") or []
1843
+ _active_kws = _split.get("keyword") or []
1844
+ if _active_subs or _active_kws:
1845
+ _sub_preview = ",".join(_active_subs[:8]) + ("..." if len(_active_subs) > 8 else "")
1846
+ _kw_preview = ",".join(_active_kws[:8]) + ("..." if len(_active_kws) > 8 else "")
1847
+ print(
1848
+ f"[project_excludes] platform=reddit project={project_name} "
1849
+ f"active_subs={len(_active_subs)} active_keywords={len(_active_kws)} "
1850
+ f"subs=[{_sub_preview}] keywords=[{_kw_preview}]"
1851
+ )
1852
+ # Stamp last_used_at so decay doesn't prune still-live terms.
1853
+ # mark_used wants the FULL typed-term form (subreddit:foo).
1854
+ _full_terms = (
1855
+ [f"subreddit:{s}" for s in _active_subs]
1856
+ + [f"keyword:{k}" for k in _active_kws]
1857
+ )
1858
+ try:
1859
+ _pe.mark_used("reddit", project_name, _full_terms)
1860
+ except Exception as e:
1861
+ print(f"[project_excludes] WARN: mark_used failed: {e}", file=sys.stderr)
1862
+ except Exception as e:
1863
+ # Visibility-only path. Never fail discover because of it.
1864
+ print(f"[project_excludes] WARN: active-excludes log failed: {e}", file=sys.stderr)
1865
+
1866
+ # 2026-06-01: discover is now FULLY PROGRAMMATIC (no Claude session).
1867
+ # Previously discover burned an entire Claude session in OPAQUE mode just
1868
+ # to pick query phrasings and fire reddit_tools.py search calls whose
1869
+ # results Claude never even saw (the dump_dir harvest below is what
1870
+ # actually feeds candidates). Query selection + search execution are both
1871
+ # deterministic, so we now build the query bank in Python (mirroring the
1872
+ # Twitter cycle: scan = deterministic Python, Claude only enters at draft)
1873
+ # and run each search via reddit_tools.cmd_search directly. The picker
1874
+ # (engagement style) still fires once at the start of the draft phase —
1875
+ # the only Claude call that actually writes a comment.
1876
+ #
1877
+ # reddit_query_bank pulls proven query phrasings from
1878
+ # /api/v1/search-topics/ranked?platform=reddit (on Reddit the harvested
1879
+ # search_topic IS the raw query string) ranked clicks-first, then appends
1880
+ # config.json seeds for cold-start coverage, deduped by normalized core.
1881
+ import reddit_query_bank as _rqb
1882
+ max_searches = int(os.environ.get("S4L_REDDIT_MAX_SEARCHES", "6") or "6")
1883
+ bank = _rqb.build_bank(project_name, limit=max_searches)
1884
+ queries = [(b.get("query") or "").strip() for b in bank if (b.get("query") or "").strip()]
1885
+ n_proven = sum(1 for b in bank if b.get("source") == "proven")
1886
+ n_seed = len(bank) - n_proven
1887
+ print(f"[discover_bank] project={project_name} queries={len(queries)} "
1888
+ f"proven={n_proven} seed={n_seed} cap={max_searches} :: {queries}")
1889
+
1890
+ if args.dry_run:
1891
+ print(f"=== DRY RUN discover (project={project_name}) ===")
1892
+ for i, q in enumerate(queries, 1):
1893
+ print(f" {i}. {q}")
1894
+ print("=== END DRY RUN ===")
1895
+ return {"project_name": project_name, "decisions": [], "cost": 0.0, "dry_run": True}
1896
+
1897
+ if not queries:
1898
+ print(f"[post_reddit] discover: no queries for project={project_name} "
1899
+ f"(empty bank: no proven queries and no config seeds)")
1900
+ return {"project_name": project_name, "decisions": [], "cost": 0.0,
1901
+ "error": "no_queries"}
1902
+
1903
+ plan_batch_id = f"reddit-discover-{project_name}-{int(time.time())}-{uuid.uuid4().hex[:8]}"
1904
+ os.environ["S4L_REDDIT_PROJECT"] = project_name
1905
+ os.environ["S4L_REDDIT_BATCH_ID"] = plan_batch_id
1906
+
1907
+ # Opaque-results discover (post 2026-05-07 refactor): create a private
1908
+ # dump dir and tell reddit_tools.py via env var to write thread JSON
1909
+ # there instead of stdout. Claude only sees count summaries, never
1910
+ # individual threads, so it cannot pre-filter the way it did in the
1911
+ # 20:16:39 cycle (returned 0 of 39 expected). After Claude exits we
1912
+ # harvest every dumped file directly into the candidate plan.
1913
+ import tempfile as _tempfile
1914
+ import shutil as _shutil
1915
+ import glob as _glob
1916
+ dump_dir = _tempfile.mkdtemp(prefix=f"reddit-discover-{project_name}-")
1917
+ os.environ["S4L_REDDIT_DUMP_DIR"] = dump_dir
1918
+
1919
+ print(f"[post_reddit] Starting programmatic discover "
1920
+ f"(queries={len(queries)}, limit={args.limit}, dump_dir={dump_dir})")
1921
+ import reddit_tools as _rt
1922
+ import types as _types
1923
+ start = time.time()
1924
+ searches_ok = 0
1925
+ try:
1926
+ for q in queries:
1927
+ sargs = _types.SimpleNamespace(
1928
+ query=q,
1929
+ limit=int(args.limit or 25),
1930
+ sort="relevance",
1931
+ time="week",
1932
+ subreddits=None,
1933
+ )
1934
+ try:
1935
+ _rt.cmd_search(sargs) # writes result-*.json into dump_dir
1936
+ searches_ok += 1
1937
+ except SystemExit as se:
1938
+ # cmd_search may sys.exit on a hard rate-limit / stop. Halt the
1939
+ # loop but KEEP whatever already dumped (harvested below).
1940
+ print(f"[post_reddit] discover search halted on {q!r}: "
1941
+ f"SystemExit({getattr(se, 'code', '?')})")
1942
+ break
1943
+ except Exception as e:
1944
+ # One bad query (transient 500, parse error) must not kill the
1945
+ # whole discover. Skip it and continue with the rest of the bank.
1946
+ print(f"[post_reddit] discover search failed for {q!r}: {e}",
1947
+ file=sys.stderr)
1948
+ finally:
1949
+ # Always unset so a subsequent (non-discover) reddit_tools call in
1950
+ # this process doesn't accidentally inherit dump mode.
1951
+ os.environ.pop("S4L_REDDIT_DUMP_DIR", None)
1952
+ elapsed = time.time() - start
1953
+ print(f"[post_reddit] Discover ran {searches_ok}/{len(queries)} searches "
1954
+ f"in {elapsed:.0f}s ($0.0000)")
1955
+
1956
+ # Harvest the dump dir: every cmd_search call that returned threads wrote a
1957
+ # result-*.json. Even if a later query halted the loop, earlier searches'
1958
+ # dumps are still valid candidates.
1959
+ candidates = []
1960
+ seen_urls = set()
1961
+ dump_files = sorted(_glob.glob(os.path.join(dump_dir, "result-*.json")))
1962
+ print(f"[post_reddit] Discover dump dir contains {len(dump_files)} file(s)")
1963
+ for dump_path in dump_files:
1964
+ try:
1965
+ with open(dump_path) as df:
1966
+ payload = json.load(df)
1967
+ except Exception as e:
1968
+ print(f"[post_reddit] WARN: skipping unreadable dump {dump_path}: {e}",
1969
+ file=sys.stderr)
1970
+ continue
1971
+ query = payload.get("query") or ""
1972
+ for t in payload.get("threads") or []:
1973
+ url = t.get("url") or ""
1974
+ if not url or url in seen_urls:
1975
+ continue
1976
+ seen_urls.add(url)
1977
+ candidates.append({
1978
+ "action": "candidate",
1979
+ "thread_url": url,
1980
+ "thread_title": t.get("title") or "",
1981
+ "thread_author": t.get("author") or "",
1982
+ "selftext": t.get("selftext") or "", # captured for analytics + future relevance gates
1983
+ "score": int(t.get("score") or 0),
1984
+ "num_comments": int(t.get("num_comments") or 0),
1985
+ "search_topic": query,
1986
+ })
1987
+ # Best-effort cleanup; the OS will eventually reap /tmp anyway.
1988
+ try:
1989
+ _shutil.rmtree(dump_dir, ignore_errors=True)
1990
+ except Exception:
1991
+ pass
1992
+
1993
+ # Zero successful searches AND nothing harvested = real search-layer
1994
+ # failure (rate-limit / all queries 500'd). Return an error so the runner
1995
+ # counts it failed (rc 5). If searches ran but simply found no fresh
1996
+ # threads, candidates is empty WITHOUT an error → rc 6 (skipped).
1997
+ if searches_ok == 0 and not candidates:
1998
+ print(f"[post_reddit] Discover FAILED: 0/{len(queries)} searches succeeded, "
1999
+ f"no candidates harvested")
2000
+ return {"project_name": project_name, "decisions": [], "cost": 0.0,
2001
+ "error": "no_search_results"}
2002
+
2003
+ print(f"[post_reddit] Discover harvested {len(candidates)} candidate(s) from dump dir")
2004
+ if not candidates:
2005
+ print(f"[post_reddit] No candidates dumped — {searches_ok}/{len(queries)} "
2006
+ f"searches ran but returned no fresh threads")
2007
+
2008
+ # --- Topical-overlap scoring + top-N cap (replaces the old ripen momentum
2009
+ # gate, retired 2026-06-01 to align with the Twitter pipeline which dropped
2010
+ # its inter-phase momentum sleep on 2026-05-31). Reddit's sort=relevance
2011
+ # leaks high-engagement OFF-topic threads that share only structural English
2012
+ # words with a chatty natural-language query (e.g. an on-topic query about a
2013
+ # habit-tracking tool matching an unrelated BORU drama thread). Without the
2014
+ # ripen stage thinning the set over 30 min, we instead sort by a topical-
2015
+ # overlap signal and keep the top N so draft spends its budget on the most
2016
+ # on-topic + active threads. We do NOT hard-drop low-overlap rows: every
2017
+ # harvested candidate is still persisted to the queue for analytics + salvage;
2018
+ # the cap is a soft prioritization only (conservative per user directive —
2019
+ # isolate + surface the garbage in logs rather than silently filtering it).
2020
+ DISCOVER_CAP = int(os.environ.get("S4L_REDDIT_DISCOVER_CAP", "25") or "25")
2021
+ for c in candidates:
2022
+ ov = _topical_overlap(c.get("search_topic"), c.get("thread_title"), c.get("selftext"))
2023
+ # velocity proxy: comments weighted 4x upvotes, echoing the old ripen
2024
+ # composite (Δup + 4·Δcomments) but on absolute counts since we no longer
2025
+ # sample momentum over a time window.
2026
+ vel = int(c.get("score") or 0) + 4 * int(c.get("num_comments") or 0)
2027
+ c["topical_overlap"] = round(ov, 3)
2028
+ c["velocity"] = vel
2029
+ # Primary sort: overlap desc (on-topic first). Tiebreak: velocity desc.
2030
+ ranked = sorted(candidates, key=lambda c: (c["topical_overlap"], c["velocity"]), reverse=True)
2031
+ selected = ranked[:DISCOVER_CAP] if DISCOVER_CAP > 0 else ranked
2032
+
2033
+ # [discover_harvest] marker: surface the overlap distribution so relevance-sort
2034
+ # garbage is visible in logs. overlap_zero = rows sharing NO content token with
2035
+ # the query = almost certainly leak; if these dominate the harvest we know the
2036
+ # query/search is misfiring without having dropped anything.
2037
+ n_zero = sum(1 for c in candidates if c["topical_overlap"] == 0.0)
2038
+ n_low = sum(1 for c in candidates if 0.0 < c["topical_overlap"] < 0.34)
2039
+ n_mid = sum(1 for c in candidates if 0.34 <= c["topical_overlap"] < 0.67)
2040
+ n_high = sum(1 for c in candidates if c["topical_overlap"] >= 0.67)
2041
+ cut = selected[-1]["topical_overlap"] if selected else 0.0
2042
+ print(f"[discover_harvest] project={project_name} harvested={len(candidates)} "
2043
+ f"selected={len(selected)} cap={DISCOVER_CAP} cutoff_overlap={cut:.3f} "
2044
+ f"overlap_zero={n_zero} low={n_low} mid={n_mid} high={n_high}")
2045
+ for c in selected:
2046
+ print(f"[discover_harvest] ov={c['topical_overlap']:.2f} vel={c['velocity']:>5} "
2047
+ f"q={(c.get('search_topic') or '')[:40]!r} :: {(c.get('thread_title') or '')[:70]!r}")
2048
+
2049
+ # Persist freshly-discovered candidates to reddit_candidates so a
2050
+ # transient post failure on a later phase can be retried by the next
2051
+ # cycle's Phase 0 salvage. Best-effort: if the queue write fails, the
2052
+ # tmpfile flow still works for this cycle, we just lose the salvage
2053
+ # benefit. See module-level _db_upsert_discovered_candidate. We persist
2054
+ # ALL harvested candidates (not just the capped `selected`) so the queue
2055
+ # keeps full history per the no-pruning rule.
2056
+ queue_batch = getattr(args, "batch_id", None) or plan_batch_id
2057
+ if not args.dry_run and candidates:
2058
+ for c in candidates:
2059
+ _db_upsert_discovered_candidate(c, queue_batch, project_name)
2060
+
2061
+ # Backfill seed on reddit_search_attempts rows from this batch so the
2062
+ # Search Queries dashboard can join attempts → posts via search_topic.
2063
+ # Use the top-ranked selected candidate's search_topic so the seed reflects
2064
+ # what actually flows into draft.
2065
+ if selected and plan_batch_id:
2066
+ seed = (selected[0].get("search_topic") or "").strip()
2067
+ if seed:
2068
+ try:
2069
+ api_patch(
2070
+ "/api/v1/reddit-search-attempts",
2071
+ {"batch_id": plan_batch_id, "seed": seed},
2072
+ )
2073
+ except Exception as e:
2074
+ print(f"[post_reddit] WARNING: seed backfill failed: {e}", file=sys.stderr)
2075
+
2076
+ return {"project_name": project_name, "decisions": selected,
2077
+ "cost": 0.0, "session_id": None,
2078
+ "phase": "discover"}
2079
+
2080
+
2081
+ def _draft_iteration(plan, config, reddit_username):
2082
+ """DRAFT phase: write comments for ripen-survivors only.
2083
+
2084
+ `plan` is the ripen-filtered discover output. Each decision has thread_url
2085
+ + ripen annotations. Claude fetches each thread and writes the comment.
2086
+ Returns the plan with `text` added to each decision (i.e. ready for _post_iteration).
2087
+
2088
+ Salvage shortcut (2026-05-06): for each candidate we first check if a
2089
+ still-fresh draft exists in reddit_candidates (drafted < DRAFT_TTL_MIN min
2090
+ ago, written by a prior cycle whose post phase failed transiently). If
2091
+ every candidate has a fresh draft, we skip the Claude session entirely
2092
+ and merge the persisted text in. Mirrors twitter_post_plan.py's "EXISTING
2093
+ DRAFT" reuse path; saves $0.20-$0.40 per salvaged candidate.
2094
+ """
2095
+ project_name = plan.get("project_name", "general")
2096
+ candidates = [d for d in (plan.get("decisions") or []) if d.get("thread_url")]
2097
+ if not candidates:
2098
+ return plan
2099
+
2100
+ # Salvage shortcut: check each candidate for a still-fresh persisted draft
2101
+ # before paying the LLM cost. If ALL candidates are covered, skip Claude
2102
+ # and return the merged plan immediately. Order matters here: we must
2103
+ # consult the DB before building the Claude prompt so we don't waste
2104
+ # tokens prepping a session we won't run.
2105
+ fresh_drafts = {}
2106
+ for c in candidates:
2107
+ # An in-memory draft_text from _db_pick_salvage_candidate also counts.
2108
+ if c.get("draft_text"):
2109
+ fresh_drafts[c["thread_url"]] = (
2110
+ c["draft_text"],
2111
+ c.get("engagement_style") or "reused",
2112
+ )
2113
+ continue
2114
+ text, style = _db_load_fresh_draft(c["thread_url"])
2115
+ if text:
2116
+ fresh_drafts[c["thread_url"]] = (text, style or c.get("engagement_style") or "reused")
2117
+
2118
+ if fresh_drafts and len(fresh_drafts) == len(candidates):
2119
+ print(f"[post_reddit] Draft shortcut: all {len(candidates)} candidate(s) "
2120
+ f"have fresh drafts (<{DRAFT_TTL_MIN}m), skipping Claude session.")
2121
+ merged = []
2122
+ for c in candidates:
2123
+ text, style = fresh_drafts[c["thread_url"]]
2124
+ merged_d = dict(c)
2125
+ merged_d["text"] = text
2126
+ merged_d["engagement_style"] = style
2127
+ merged_d["action"] = "post"
2128
+ merged_d.setdefault("reply_to_url", None)
2129
+ merged.append(merged_d)
2130
+ plan = dict(plan)
2131
+ plan["decisions"] = merged
2132
+ plan["draft_cost"] = 0.0
2133
+ plan["phase"] = "draft"
2134
+ plan["draft_reused"] = True
2135
+ # Build a "reused draft" marker trace so the audit row isn't empty.
2136
+ # We can't recover the exact context the prior cycle's Claude saw,
2137
+ # but the current top_performers/recent_comments document what the
2138
+ # few-shot prompt WOULD have contained had we redrafted. The
2139
+ # reused_from_prior_cycle flag tells future auditors "this is
2140
+ # current-cycle context, not what produced the draft" — without it
2141
+ # the trace would look like Claude saw this report and chose to
2142
+ # reuse, which it didn't (Claude wasn't invoked at all). Marker
2143
+ # also gives 100% trace coverage on the platform so SQL queries
2144
+ # don't have to special-case NULL rows.
2145
+ try:
2146
+ from generation_trace import build_trace, write_trace_tempfile
2147
+ top_report = get_top_performers(project_name)
2148
+ recent_comments = get_recent_comments()
2149
+ trace = build_trace(
2150
+ platform="reddit",
2151
+ project_name=project_name,
2152
+ prompt_chars=0, # no Claude call this cycle
2153
+ top_performers_text=top_report or "",
2154
+ top_search_topics_text="",
2155
+ recent_comment_ids=[pid for pid, _ in (recent_comments or [])],
2156
+ model="reused_from_prior_cycle",
2157
+ min_score_floor=10,
2158
+ extras={
2159
+ "reused_from_prior_cycle": True,
2160
+ "draft_ttl_min": DRAFT_TTL_MIN,
2161
+ "reused_candidate_count": len(candidates),
2162
+ },
2163
+ )
2164
+ trace_path = write_trace_tempfile(trace, prefix="reddit_reused_trace_")
2165
+ if trace_path:
2166
+ plan["generation_trace_path"] = trace_path
2167
+ print(f"[post_reddit] Reused-draft trace marker: {trace_path}")
2168
+ except Exception as e:
2169
+ print(f"[post_reddit] WARNING: reused-draft trace build failed "
2170
+ f"({e}); proceeding without trace")
2171
+ return plan
2172
+
2173
+ project = None
2174
+ config_projects = config.get("projects", [])
2175
+ for p in config_projects:
2176
+ if p["name"].lower() == project_name.lower():
2177
+ project = p
2178
+ break
2179
+ if not project:
2180
+ print(f"[post_reddit] WARNING: project '{project_name}' not found in config, drafting with generic context")
2181
+ project = {"name": project_name}
2182
+
2183
+ # 2026-05-19: pick the engagement style HERE — draft is the only
2184
+ # Claude call in the Reddit cycle that actually writes a comment, so
2185
+ # this is where the picker belongs. (Discover is scan-only opaque
2186
+ # mode; it never sees thread content and never drafts text, so a
2187
+ # picker there would just be useless decoration.)
2188
+ # Mirrors the Twitter engage cycle: pick once → filter top_performers
2189
+ # to the assigned style → embed the assignment block in the prompt →
2190
+ # JSON example shows the literal assigned style name. End-to-end
2191
+ # adherence comes from those three lined-up signals.
2192
+ style_assignment = pick_style_for_post("reddit", context="posting")
2193
+ picked_style = style_assignment.get("style")
2194
+ print(f"[post_reddit] draft style assigned: mode={style_assignment['mode']} "
2195
+ f"style={picked_style or '(invent)'}")
2196
+ top_report = get_top_performers(project_name, style=picked_style)
2197
+ recent_comments = get_recent_comments()
2198
+ # We don't have a Reddit equivalent of top_search_topics_report in
2199
+ # the draft phase (the discover phase loads it for the search step).
2200
+ # Pass empty string; the trace audit still captures top_performers
2201
+ # and recent_comments, which is the bulk of the few-shot context.
2202
+ prompt = build_draft_prompt(project, config, candidates, top_report, recent_comments,
2203
+ style_assignment=style_assignment)
2204
+
2205
+ # Build the generation_trace audit blob: what Claude is about to see.
2206
+ # Captured BEFORE the Claude call so we never end up with a post row
2207
+ # missing its trace if Claude errors out. The path is stashed in
2208
+ # `plan` so the post-phase (_post_iteration → log_post) can forward
2209
+ # it to reddit_tools.py for INSERT into posts.generation_trace.
2210
+ # Same trace reused for every post produced from this draft session.
2211
+ try:
2212
+ from generation_trace import build_trace, write_trace_tempfile
2213
+ trace = build_trace(
2214
+ platform="reddit",
2215
+ project_name=project_name,
2216
+ prompt_chars=len(prompt or ""),
2217
+ top_performers_text=top_report or "",
2218
+ top_search_topics_text="", # Reddit draft phase doesn't surface this
2219
+ recent_comment_ids=[pid for pid, _ in (recent_comments or [])],
2220
+ model=None,
2221
+ min_score_floor=10, # PLATFORM_MIN_SCORE['reddit']
2222
+ )
2223
+ trace_path = write_trace_tempfile(trace, prefix="reddit_gen_trace_")
2224
+ if trace_path:
2225
+ plan["generation_trace_path"] = trace_path
2226
+ print(f"[post_reddit] Generation trace: {trace_path} "
2227
+ f"({os.path.getsize(trace_path)} bytes)")
2228
+ except Exception as e:
2229
+ # Audit row is nice-to-have, never a blocker.
2230
+ print(f"[post_reddit] WARNING: generation_trace build failed "
2231
+ f"({e}); proceeding without trace")
2232
+
2233
+ print(f"[post_reddit] Starting draft session for {len(candidates)} thread(s)...")
2234
+ start = time.time()
2235
+ ok, output, usage = run_claude(prompt, timeout=600)
2236
+ elapsed = time.time() - start
2237
+ print(f"[post_reddit] Draft finished in {elapsed:.0f}s (${usage['cost_usd']:.4f})")
2238
+
2239
+ if not ok:
2240
+ print(f"[post_reddit] Draft FAILED: {output[:300]}")
2241
+ plan["draft_error"] = "claude_failed"
2242
+ plan["draft_cost"] = usage["cost_usd"]
2243
+ return plan
2244
+
2245
+ drafted = parse_post_decisions(output)
2246
+ print(f"[post_reddit] Draft produced {len(drafted)} post(s)")
2247
+
2248
+ # 2026-05-11: parse optional action=reject lines and forward any
2249
+ # `proposed_excludes` arrays into project_search_excludes via the
2250
+ # activation gate (>=2 distinct batches required before a term goes
2251
+ # live). Self-improving denylist mirroring twitter's behavior. Errors
2252
+ # here MUST NOT kill the draft phase; the post pipeline is the critical
2253
+ # path. See parse_reject_decisions / _propose_excludes_from_rejects.
2254
+ try:
2255
+ rejects = parse_reject_decisions(output)
2256
+ if rejects:
2257
+ cand_by_url = {c.get("thread_url"): c for c in candidates if c.get("thread_url")}
2258
+ counters = _propose_excludes_from_rejects(
2259
+ rejects, project_name, plan.get("batch_id"), cand_by_url,
2260
+ )
2261
+ if counters["proposed"]:
2262
+ print(
2263
+ f"[post_reddit] reject lines={counters['rejects_seen']} "
2264
+ f"proposed={counters['proposed']} inserted={counters['inserted']} "
2265
+ f"bumped={counters['bumped']} rejected={counters['rejected']} "
2266
+ f"active_now={counters['active_now']}"
2267
+ )
2268
+ except Exception as e:
2269
+ print(f"[post_reddit] WARN: reject-line processing failed: {e}", file=sys.stderr)
2270
+
2271
+ # Merge text back into the original candidates by thread_url so we
2272
+ # preserve ripen annotations, search_topic, etc. from discover phase.
2273
+ # Each freshly-written draft is also persisted to reddit_candidates so a
2274
+ # later salvage iteration can reuse it without paying the LLM cost again.
2275
+ by_url = {d["thread_url"]: d for d in drafted}
2276
+ merged = []
2277
+ for c in candidates:
2278
+ url = c.get("thread_url", "")
2279
+ drafted_d = by_url.get(url)
2280
+ if drafted_d and drafted_d.get("text"):
2281
+ merged_d = dict(c)
2282
+ merged_d["text"] = drafted_d["text"]
2283
+ merged_d["reply_to_url"] = drafted_d.get("reply_to_url")
2284
+ merged_d["thread_author"] = drafted_d.get("thread_author") or c.get("thread_author")
2285
+ merged_d["thread_title"] = drafted_d.get("thread_title") or c.get("thread_title")
2286
+ merged_d["engagement_style"] = drafted_d.get("engagement_style") or c.get("engagement_style")
2287
+ merged_d["action"] = "post"
2288
+ merged.append(merged_d)
2289
+ _db_save_draft(url, merged_d["text"], merged_d.get("engagement_style"))
2290
+ else:
2291
+ # Claude OMITted this thread (build_draft_prompt's SELECTION GATE
2292
+ # decided no plausible bridge between the thread's audience and
2293
+ # the project — token-overlap false positive, off-topic sub, etc.).
2294
+ # Mark status='failed' with reason='draft_gate_omit' so Phase 0
2295
+ # salvage on the next cycle stops re-pulling it. Without this the
2296
+ # same dead thread would keep clearing ripen (engagement is real)
2297
+ # and burning ~$0.05/cycle on a fetch + gate decision that always
2298
+ # lands the same way. Mirrors the one-strike rule at ripen time,
2299
+ # applied at draft time for active-but-unfit threads.
2300
+ print(f"[post_reddit] Draft gate OMIT for {url}: marking status=failed")
2301
+ _db_mark_candidate_attempt(url, reason="draft_gate_omit", permanent=True)
2302
+
2303
+ plan = dict(plan)
2304
+ plan["decisions"] = merged
2305
+ plan["draft_cost"] = usage["cost_usd"]
2306
+ plan["draft_session_id"] = usage.get("session_id")
2307
+ plan["phase"] = "draft"
2308
+ # Stash the picker assignment so _post_iteration (which runs in a
2309
+ # separate process via JSON-serialized plan) can pass it to
2310
+ # validate_or_register for USE-mode drift coercion + INVENT-mode gating.
2311
+ plan["style_assignment"] = style_assignment
2312
+ return plan
2313
+
2314
+
2315
+ def _post_iteration(plan, reddit_username):
2316
+ """Execute browser CDP posts for the decisions in plan. Returns (posted, failed)."""
2317
+ project_name = plan["project_name"]
2318
+ decisions = plan.get("decisions") or []
2319
+ # Picker assignment was stamped by _draft_iteration; survives JSON
2320
+ # serialization across the draft→post process boundary. Used below
2321
+ # in validate_or_register for USE-mode drift coercion + INVENT-mode
2322
+ # gating. Fallback to {} for plans drafted before this field landed.
2323
+ style_assignment = plan.get("style_assignment") or {}
2324
+
2325
+ if not decisions:
2326
+ return 0, 0
2327
+
2328
+ # 2026-05-08: post-phase cap REMOVED per user instruction. Three serial
2329
+ # gates already filter the candidate pool (search-time blocks,
2330
+ # ripen composite floor, softened LLM relevance gate). Anything that
2331
+ # survives all three has earned its post; an arbitrary 10/cycle cap was
2332
+ # just throwing away qualified work. If Reddit rate-limits start firing
2333
+ # under runaway-cycle conditions, revisit by adding a per-minute throttle
2334
+ # to _post_iteration's loop body, NOT a hard count cap.
2335
+
2336
+ # In two-phase mode (plan in process A, post in process B), the env var
2337
+ # set by run_claude in process A is gone. Re-export here so log_post →
2338
+ # reddit_tools.py log-post stamps posts.claude_session_id correctly and
2339
+ # the dashboard activity feed can join to claude_sessions for cost.
2340
+ plan_session_id = plan.get("session_id")
2341
+ if plan_session_id:
2342
+ os.environ["CLAUDE_SESSION_ID"] = plan_session_id
2343
+
2344
+ active_campaigns = load_active_reddit_campaigns()
2345
+ if active_campaigns:
2346
+ for c in active_campaigns:
2347
+ print(f"[post_reddit] active campaign id={c['id']} "
2348
+ f"sample_rate={c['sample_rate']:.3f} suffix={c['suffix']!r}")
2349
+
2350
+ posted = 0
2351
+ failed = 0
2352
+
2353
+ for i, decision in enumerate(decisions):
2354
+ thread_url = decision["thread_url"]
2355
+ reply_to_url = decision.get("reply_to_url")
2356
+ text = decision["text"]
2357
+ thread_author = decision.get("thread_author", "unknown")
2358
+ thread_title = decision.get("thread_title", "unknown")
2359
+ # validate_or_register: in USE mode, coerces any drifted style name
2360
+ # back to the assigned one (so picker authority is preserved even if
2361
+ # the drafter ignores the assignment). In INVENT mode (5% slot),
2362
+ # registers the new style into engagement_styles_registry via
2363
+ # /api/v1/engagement-styles/registry. assigned_style/assigned_mode
2364
+ # come from pick_style_for_post() above; without them the picker's
2365
+ # choice would be silently overridable by the model.
2366
+ engagement_style, _style_action = validate_or_register(
2367
+ decision,
2368
+ source_post={
2369
+ "platform": "reddit",
2370
+ "post_url": thread_url,
2371
+ "post_id": None,
2372
+ "model": decision.get("model"),
2373
+ },
2374
+ assigned_style=(style_assignment or {}).get("style"),
2375
+ assigned_mode=(style_assignment or {}).get("mode"),
2376
+ )
2377
+ search_topic = decision.get("search_topic") or None
2378
+
2379
+ applied_campaign_ids = []
2380
+ for camp in active_campaigns:
2381
+ if random.random() < camp["sample_rate"]:
2382
+ text = text + camp["suffix"]
2383
+ applied_campaign_ids.append(camp["id"])
2384
+ if applied_campaign_ids:
2385
+ print(f"[post_reddit] applied campaigns {applied_campaign_ids} (suffix appended)")
2386
+
2387
+ # Audience-page detection (2026-05-17). Inspect the unwrapped text for
2388
+ # any URL that exactly matches a curated audience-page (e.g.
2389
+ # https://s4l.ai/ghostwriting). When found, posts.link_source is
2390
+ # stamped 'audience_page:<angle>' for the row so the dashboard can
2391
+ # break out curated traffic from generic homepage links. Detection
2392
+ # runs BEFORE wrap_text_for_post because wrapping rewrites the URLs
2393
+ # to /r/<code> short links; classify_url_as_audience_page() needs
2394
+ # the original target URL.
2395
+ audience_page_link_source = None
2396
+ try:
2397
+ for _url_m in re.finditer(r'https?://[^\s)\]>"\']+', text):
2398
+ _raw = _url_m.group(0).rstrip('.,);!?]')
2399
+ _angle = _audience_classify_url(_raw, project_name)
2400
+ if _angle:
2401
+ audience_page_link_source = f"audience_page:{_angle}"
2402
+ break
2403
+ except Exception as _e:
2404
+ print(f"[post_reddit] WARNING: audience-page classify raised ({_e})")
2405
+
2406
+ # URL-wrap the final text (URLs in suffix included). Mints into
2407
+ # post_links with NULL post_id; we backfill after log_post returns
2408
+ # below. On wrap failure, post unwrapped — losing attribution is
2409
+ # better than failing a post that already passed planning.
2410
+ minted_session = None
2411
+ try:
2412
+ from dm_short_links import wrap_text_for_post, utm_only_text
2413
+ wrap_res = wrap_text_for_post(text=text, platform="reddit",
2414
+ project_name=project_name)
2415
+ if wrap_res.get("ok"):
2416
+ text = wrap_res["text"]
2417
+ minted_session = wrap_res.get("minted_session")
2418
+ if wrap_res.get("codes"):
2419
+ print(f"[post_reddit] wrapped {len(wrap_res['codes'])} URL(s): "
2420
+ f"{wrap_res['codes']}")
2421
+ else:
2422
+ print(f"[post_reddit] WARNING: URL wrap failed "
2423
+ f"({wrap_res.get('error')}); falling back to UTM-only")
2424
+ text = utm_only_text(text=text, platform="reddit", project_name=project_name)
2425
+ except Exception as e:
2426
+ print(f"[post_reddit] WARNING: URL wrap raised ({e}); falling back to UTM-only")
2427
+ try:
2428
+ from dm_short_links import utm_only_text
2429
+ text = utm_only_text(text=text, platform="reddit", project_name=project_name)
2430
+ except Exception as ee:
2431
+ print(f"[post_reddit] WARNING: UTM-only fallback also failed ({ee}); posting unwrapped")
2432
+
2433
+ # Per-row reddit-browser lease (2026-05-13). Acquire JUST around the
2434
+ # CDP work, release before this row's DB post-processing and the 3-min
2435
+ # between-post sleep. Peers (link-edit, dm-outreach, engage,
2436
+ # engage-dm-replies) can use the browser during our sleeps and DB
2437
+ # writes instead of sitting blocked until the whole batch finishes.
2438
+ lease_ok, lease_msg = _acquire_browser_lease(timeout=600, ttl=90)
2439
+ if not lease_ok:
2440
+ print(f"[post_reddit] {i + 1}/{len(decisions)} LEASE: {lease_msg}; skipping post")
2441
+ failed += 1
2442
+ # Treat lease-acquire failure as TRANSIENT so phase0 salvages
2443
+ # the row next cycle (it's not the candidate's fault that a
2444
+ # peer pipeline held the browser too long).
2445
+ _db_mark_candidate_attempt(thread_url, "lease_acquire_timeout", permanent=False)
2446
+ if i < len(decisions) - 1:
2447
+ time.sleep(180)
2448
+ continue
2449
+
2450
+ try:
2451
+ print(f"[post_reddit] Posting {i + 1}/{len(decisions)}: {thread_title[:50]}...")
2452
+ result = post_via_cdp(thread_url, reply_to_url, text)
2453
+ finally:
2454
+ _release_browser_lease()
2455
+
2456
+ if result.get("ok"):
2457
+ if result.get("already_replied"):
2458
+ print(f"[post_reddit] DEDUP: already posted in this thread")
2459
+ # Treat dedup as a successful queue resolution: the row should
2460
+ # come out of 'pending' so Phase 0 stops salvaging it.
2461
+ _db_mark_candidate_posted(thread_url, None)
2462
+ continue
2463
+ permalink = result.get("permalink", "")
2464
+ if not permalink or not permalink.startswith("http"):
2465
+ print(f"[post_reddit] SKIPPED LOG: no valid permalink captured (got: {permalink!r})")
2466
+ failed += 1
2467
+ # No-permalink is permanent: the post may have actually
2468
+ # landed but we can't verify it; retrying would dupe.
2469
+ _db_mark_candidate_attempt(thread_url, "no_permalink", permanent=True)
2470
+ continue
2471
+ new_post_id = log_post(thread_url, permalink, text, project_name,
2472
+ thread_author, thread_title, reddit_username,
2473
+ engagement_style=engagement_style,
2474
+ search_topic=search_topic,
2475
+ # Forward the trace blob built during draft phase.
2476
+ # Same trace for every post in this plan because they
2477
+ # all saw the same few-shot context. None when the
2478
+ # draft phase used a reused/cached draft (no Claude
2479
+ # call) — that's fine, audit just records no trace.
2480
+ generation_trace_path=plan.get("generation_trace_path"),
2481
+ link_source=audience_page_link_source)
2482
+ bump_campaigns("posts", new_post_id, applied_campaign_ids)
2483
+ # Backfill post_links.post_id for the codes minted at wrap time
2484
+ # so /api/short-links/<code> resolver knows which post each
2485
+ # click attributes to. Idempotent; no-op when minted_session is
2486
+ # None (post had no URLs).
2487
+ if minted_session and new_post_id:
2488
+ try:
2489
+ from dm_short_links import backfill_post_id
2490
+ backfill_post_id(minted_session=minted_session,
2491
+ post_id=new_post_id)
2492
+ except Exception as e:
2493
+ print(f"[post_reddit] WARNING: backfill_post_id failed ({e})")
2494
+ posted += 1
2495
+ print(f"[post_reddit] POSTED: {permalink}")
2496
+ _db_mark_candidate_posted(thread_url, new_post_id)
2497
+ else:
2498
+ err = result.get("error", "unknown")
2499
+ failed += 1
2500
+ print(f"[post_reddit] CDP FAILED: {err}")
2501
+ if err == "account_blocked_in_sub":
2502
+ # project=None: account-level ban applies across ALL projects,
2503
+ # not just the one currently posting. Backfill of 28 existing
2504
+ # project-scoped entries applied 2026-05-19.
2505
+ mark_comment_blocked(thread_url, reason=err, project=None)
2506
+ # Classify the CDP error for queue retry. Unknown errors default
2507
+ # to TRANSIENT so we don't permanently kill candidates on a new
2508
+ # error string we haven't classified yet; the MAX_ATTEMPTS cap
2509
+ # auto-promotes them to 'failed' after 3 retries anyway.
2510
+ permanent = err in _PERMANENT_CDP_ERRORS
2511
+ _db_mark_candidate_attempt(thread_url, err, permanent=permanent)
2512
+
2513
+ if i < len(decisions) - 1:
2514
+ time.sleep(180) # 3 min gap between posts within a single Claude session
2515
+
2516
+ return posted, failed
2517
+
2518
+
2519
+ def main():
2520
+ parser = argparse.ArgumentParser(description="Reddit posting orchestrator")
2521
+ parser.add_argument("--dry-run", action="store_true", help="Print prompt without executing")
2522
+ parser.add_argument("--limit", type=int, default=3, help="Max comments per Claude session (default: 3)")
2523
+ parser.add_argument("--timeout", type=int, default=3600, help="Timeout for Claude session")
2524
+ parser.add_argument("--project", default=None, help="Override project selection")
2525
+ parser.add_argument("--phase",
2526
+ choices=["discover", "draft", "post", "phase0", "salvage"],
2527
+ required=True,
2528
+ help="discover: search+select threads only (no drafting), writes JSON to --out. "
2529
+ "draft: write comments for ripen-survivors from --in, writes JSON to --out. "
2530
+ "post: read JSON from --in and post via CDP. "
2531
+ "phase0: hard-expire stale pending rows + re-assign salvageable rows "
2532
+ "to --batch-id. Prints `expired=N salvaged=M` for the orchestrator. "
2533
+ "salvage: pull ONE salvage-eligible row (already re-assigned to "
2534
+ "--batch-id by phase0) and write it as a discover-shape JSON to --out. "
2535
+ "Exits 0 with a candidate, 6 if nothing salvageable.")
2536
+ parser.add_argument("--out", default=None,
2537
+ help="Output JSON path (--phase discover, --phase draft, --phase salvage)")
2538
+ parser.add_argument("--in", dest="in_path", default=None,
2539
+ help="Input JSON path (--phase draft, --phase post)")
2540
+ parser.add_argument("--exclude", default="", help="Comma-separated project names to exclude")
2541
+ parser.add_argument("--batch-id", dest="batch_id", default=None,
2542
+ help="Cycle-level batch_id (e.g. rdcycle-YYYYMMDD-HHMMSS). Used by "
2543
+ "--phase phase0 / --phase salvage / --phase discover to attribute "
2544
+ "rows in reddit_candidates and reddit_batches. Required for "
2545
+ "phase0 and salvage; optional for discover (defaults to a "
2546
+ "per-discover synthetic id).")
2547
+ args = parser.parse_args()
2548
+
2549
+ config = load_config()
2550
+ reddit_username = config.get("accounts", {}).get("reddit", {}).get("username", "Deep_Ad1959")
2551
+
2552
+ if args.phase == "phase0":
2553
+ # Hard-expire stale pending rows + re-assign salvageable rows to the
2554
+ # current cycle's batch_id. Single advisory-lock'd transaction so two
2555
+ # concurrent cycles can't double-salvage the same row. Output is the
2556
+ # one line `expired=N salvaged=M` parsed by run-reddit-search.sh.
2557
+ if not args.batch_id:
2558
+ print("[post_reddit] ERROR: --phase phase0 requires --batch-id", file=sys.stderr)
2559
+ sys.exit(2)
2560
+ expired, salvaged = _db_phase0_salvage(args.batch_id)
2561
+ print(f"expired={expired} salvaged={salvaged}")
2562
+ return
2563
+
2564
+ if args.phase == "salvage":
2565
+ # Pull up to --limit salvage-eligible rows (already re-assigned to
2566
+ # args.batch_id by phase0) from a SINGLE project and write a
2567
+ # discover-shape JSON to --out. The shell can then feed that file
2568
+ # to ripen → draft → post like a normal candidate batch.
2569
+ if not args.out:
2570
+ print("[post_reddit] ERROR: --phase salvage requires --out PATH", file=sys.stderr)
2571
+ sys.exit(2)
2572
+ if not args.batch_id:
2573
+ print("[post_reddit] ERROR: --phase salvage requires --batch-id", file=sys.stderr)
2574
+ sys.exit(2)
2575
+ salvage_limit = max(1, int(args.limit or 1))
2576
+ plan = _db_pick_salvage_candidates(args.batch_id, limit=salvage_limit)
2577
+ if not plan:
2578
+ print("[post_reddit] salvage: no eligible pending rows for this cycle")
2579
+ sys.exit(6)
2580
+ with open(args.out, "w") as f:
2581
+ json.dump(plan, f)
2582
+ urls = [d["thread_url"] for d in plan["decisions"]]
2583
+ print(f"[post_reddit] SALVAGED {plan['salvaged_count']} candidate(s) "
2584
+ f"(max attempt={plan['salvaged_attempt']}/{MAX_ATTEMPTS}) "
2585
+ f"project={plan['project_name']} urls={urls}")
2586
+ return
2587
+
2588
+ if args.phase == "discover":
2589
+ if not args.out:
2590
+ print("[post_reddit] ERROR: --phase discover requires --out PATH", file=sys.stderr)
2591
+ sys.exit(2)
2592
+ if not preflight_rate_limit():
2593
+ print("[post_reddit] rate-limited, discover skipped")
2594
+ sys.exit(3)
2595
+ excluded = [x.strip() for x in args.exclude.split(",") if x.strip()]
2596
+ plan = _discover_iteration(args, config, reddit_username, excluded)
2597
+ if plan is None:
2598
+ sys.exit(4)
2599
+ with open(args.out, "w") as f:
2600
+ json.dump(plan, f)
2601
+ if plan.get("dry_run"):
2602
+ sys.exit(0)
2603
+ if plan.get("error"):
2604
+ sys.exit(5)
2605
+ if not plan.get("decisions"):
2606
+ sys.exit(6)
2607
+ return
2608
+
2609
+ if args.phase == "draft":
2610
+ if not args.in_path or not os.path.exists(args.in_path):
2611
+ print(f"[post_reddit] ERROR: --phase draft requires --in PATH (got {args.in_path!r})",
2612
+ file=sys.stderr)
2613
+ sys.exit(2)
2614
+ if not args.out:
2615
+ print("[post_reddit] ERROR: --phase draft requires --out PATH", file=sys.stderr)
2616
+ sys.exit(2)
2617
+ with open(args.in_path) as f:
2618
+ plan = json.load(f)
2619
+ if not plan.get("decisions"):
2620
+ print("[post_reddit] draft: no survivors in plan, nothing to draft")
2621
+ sys.exit(6)
2622
+ plan = _draft_iteration(plan, config, reddit_username)
2623
+ with open(args.out, "w") as f:
2624
+ json.dump(plan, f)
2625
+ if plan.get("draft_error"):
2626
+ sys.exit(5)
2627
+ if not plan.get("decisions"):
2628
+ sys.exit(6)
2629
+ return
2630
+
2631
+ if args.phase == "post":
2632
+ if not args.in_path or not os.path.exists(args.in_path):
2633
+ print(f"[post_reddit] ERROR: --phase post requires --in PATH (got {args.in_path!r})", file=sys.stderr)
2634
+ sys.exit(2)
2635
+ with open(args.in_path) as f:
2636
+ plan = json.load(f)
2637
+ # Hard preflight: _post_iteration shells to reddit_browser.py, the only
2638
+ # Playwright importer on this rail. If the resolved interpreter can't
2639
+ # import it the owned runtime is missing/half-provisioned and every post
2640
+ # would die with CDP_ERROR. Fail LOUD with a distinct signal instead.
2641
+ # Gated on real decisions so an empty plan still exits clean.
2642
+ if plan.get("decisions"):
2643
+ _chk = subprocess.run(
2644
+ [PYTHON, "-c", "import playwright"],
2645
+ capture_output=True, text=True,
2646
+ )
2647
+ if _chk.returncode != 0:
2648
+ print(f"[post_reddit] FATAL runtime_incomplete: interpreter {PYTHON!r} "
2649
+ f"cannot import playwright — the owned Python runtime is missing or "
2650
+ f"unprovisioned. Run the `runtime` install (action:'install') before "
2651
+ f"posting. stderr: {(_chk.stderr or '').strip()[:300]}", file=sys.stderr)
2652
+ sys.exit(3)
2653
+ try:
2654
+ posted, failed = _post_iteration(plan, reddit_username)
2655
+ print(f"[post_reddit] phase=post project={plan.get('project_name')} posted={posted} failed={failed}")
2656
+ finally:
2657
+ # Clean up the generation_trace temp file. By this point every
2658
+ # post that landed has the trace JSONB persisted to its row,
2659
+ # so the on-disk file is redundant. Best-effort delete.
2660
+ try:
2661
+ from generation_trace import cleanup_trace_tempfile
2662
+ cleanup_trace_tempfile(plan.get("generation_trace_path"))
2663
+ except Exception:
2664
+ pass
2665
+
2666
+
2667
+ if __name__ == "__main__":
2668
+ main()