@m13v/s4l 1.6.197-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1336 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +513 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,956 @@
1
+ #!/usr/bin/env python3
2
+ """Reddit CLI tools for Claude to call via Bash.
3
+
4
+ Commands:
5
+ python3 scripts/reddit_tools.py search "security cameras" [--limit 10] [--sort relevance] [--time week]
6
+ python3 scripts/reddit_tools.py search "automation" --subreddits AI_Agents,SaaS,smallbusiness --time month
7
+ python3 scripts/reddit_tools.py fetch <thread_url>
8
+ python3 scripts/reddit_tools.py log-post <thread_url> <our_permalink> <our_text> <project> <thread_author> <thread_title>
9
+ python3 scripts/reddit_tools.py already-posted <thread_url>
10
+ """
11
+
12
+ import argparse
13
+ import json
14
+ import os
15
+ import sys
16
+ import time
17
+ import urllib.request
18
+ import urllib.parse
19
+ from datetime import datetime, timezone
20
+
21
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
22
+ from http_api import api_get, api_post
23
+ from version import read_version as read_autoposter_version
24
+ try:
25
+ from account_resolver import resolve as _resolve_account
26
+ except Exception:
27
+ def _resolve_account(_platform): # type: ignore[unused-arg]
28
+ return None
29
+
30
+ USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"
31
+
32
+ # Persistent rate limit file to share state across invocations
33
+ RATELIMIT_FILE = "/tmp/reddit_ratelimit.json"
34
+
35
+
36
+ def _read_ratelimit():
37
+ try:
38
+ with open(RATELIMIT_FILE) as f:
39
+ return json.load(f)
40
+ except Exception:
41
+ return {"remaining": 100, "reset_at": 0}
42
+
43
+
44
+ def _write_ratelimit(remaining, reset_seconds):
45
+ reset_at = time.time() + reset_seconds
46
+ with open(RATELIMIT_FILE, "w") as f:
47
+ json.dump({"remaining": remaining, "reset_at": reset_at}, f)
48
+
49
+
50
+ class RateLimitedError(Exception):
51
+ """Raised when Reddit API returns 429. Contains reset seconds."""
52
+ def __init__(self, reset_seconds):
53
+ self.reset_seconds = reset_seconds
54
+ super().__init__(f"rate_limited_wait_{int(reset_seconds)}s")
55
+
56
+
57
+ # Maximum time a single tool invocation is allowed to wait for rate limit to clear.
58
+ # Longer waits are returned as errors so Claude can skip and try something else.
59
+ # 90s stays under Claude's default 120s bash timeout while absorbing the common
60
+ # short-reset case (resets are usually 10-60s after a single burst).
61
+ MAX_INLINE_WAIT_SECONDS = 90
62
+
63
+
64
+ def _wait_if_needed():
65
+ rl = _read_ratelimit()
66
+ if rl["remaining"] <= 2 and rl["reset_at"] > time.time():
67
+ wait = int(rl["reset_at"] - time.time()) + 2
68
+ if wait > MAX_INLINE_WAIT_SECONDS:
69
+ raise RateLimitedError(wait)
70
+ print(f"Rate limit near zero, waiting {wait}s...", file=sys.stderr)
71
+ time.sleep(wait)
72
+
73
+
74
+ def _fetch_via_browser(url):
75
+ """Fetch a Reddit URL through the reddit-harness logged-in Chrome.
76
+
77
+ Returns the raw response body (str) on HTTP 200, else None so the caller
78
+ falls back to urllib. This is the 2026-05-29 transport swap: Reddit began
79
+ 403ing urllib/curl on *.json from residential IPs on 2026-05-28, but a
80
+ same-origin fetch() from inside the logged-in harness browser returns 200.
81
+
82
+ Gated by REDDIT_FETCH_BACKEND: default ("harness") uses the browser first;
83
+ set REDDIT_FETCH_BACKEND=urllib to force the legacy path (e.g. for debugging).
84
+ Also short-circuits to None when REDDIT_CDP_URL is unset AND no harness is
85
+ expected, so plain `urllib`-only environments are unaffected.
86
+ """
87
+ if os.environ.get("REDDIT_FETCH_BACKEND", "harness").lower() == "urllib":
88
+ return None
89
+ try:
90
+ from reddit_browser_fetch import browser_get_json
91
+ except Exception as e:
92
+ sys.stderr.write(f"[reddit_tools] browser fetch unavailable ({e}); urllib fallback\n")
93
+ return None
94
+ try:
95
+ body, status = browser_get_json(url)
96
+ if status == 200 and body:
97
+ return body
98
+ sys.stderr.write(f"[reddit_tools] browser fetch status={status} for {url[:80]}; urllib fallback\n")
99
+ except Exception as e:
100
+ sys.stderr.write(f"[reddit_tools] browser fetch error ({e}); urllib fallback\n")
101
+ return None
102
+
103
+
104
+ def _do_request(url):
105
+ """Make a Reddit API request with rate limit handling.
106
+
107
+ Primary transport is the reddit-harness browser (see _fetch_via_browser);
108
+ urllib is the silent fallback. On 429 (urllib path): raises RateLimitedError
109
+ immediately if the reset would require a long wait, else absorbs short waits.
110
+ """
111
+ _wait_if_needed()
112
+ # Browser-first (bypasses Reddit's urllib 403 wall). Falls through to urllib
113
+ # if the harness is down or returns a non-200.
114
+ _body = _fetch_via_browser(url)
115
+ if _body is not None:
116
+ try:
117
+ return json.loads(_body)
118
+ except Exception:
119
+ sys.stderr.write(f"[reddit_tools] browser body not JSON for {url[:80]}; urllib fallback\n")
120
+ req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
121
+ try:
122
+ resp = urllib.request.urlopen(req, timeout=20)
123
+ remaining = float(resp.headers.get("X-Ratelimit-Remaining", 100))
124
+ reset = float(resp.headers.get("X-Ratelimit-Reset", 0))
125
+ _write_ratelimit(remaining, reset)
126
+ return json.loads(resp.read())
127
+ except urllib.error.HTTPError as e:
128
+ if e.code == 429:
129
+ reset = float(e.headers.get("X-Ratelimit-Reset", 60))
130
+ _write_ratelimit(0, reset)
131
+ if reset > MAX_INLINE_WAIT_SECONDS:
132
+ raise RateLimitedError(reset)
133
+ print(f"Rate limited. Waiting {int(reset)+2}s...", file=sys.stderr)
134
+ time.sleep(int(reset) + 2)
135
+ # Retry once
136
+ resp = urllib.request.urlopen(req, timeout=20)
137
+ remaining = float(resp.headers.get("X-Ratelimit-Remaining", 100))
138
+ reset2 = float(resp.headers.get("X-Ratelimit-Reset", 0))
139
+ _write_ratelimit(remaining, reset2)
140
+ return json.loads(resp.read())
141
+ raise
142
+
143
+
144
+ def batch_fetch_info(thing_ids, user_agent=USER_AGENT):
145
+ """Fetch metadata for up to 100 Reddit thing IDs in a single API call.
146
+
147
+ Args:
148
+ thing_ids: list of full thing IDs like ["t3_abc123", "t3_def456", "t1_xyz"]
149
+ user_agent: User-Agent header
150
+
151
+ Returns:
152
+ dict mapping thing_id -> post/comment data dict
153
+ """
154
+ results = {}
155
+ # Process in chunks of 100 (Reddit's max per request)
156
+ for i in range(0, len(thing_ids), 100):
157
+ chunk = thing_ids[i:i + 100]
158
+ ids_str = ",".join(chunk)
159
+ url = f"https://old.reddit.com/api/info.json?id={ids_str}"
160
+ _wait_if_needed()
161
+ # Browser-first transport (Reddit 403s urllib on *.json). urllib fallback.
162
+ _body = _fetch_via_browser(url)
163
+ if _body is not None:
164
+ try:
165
+ data = json.loads(_body)
166
+ for child in data.get("data", {}).get("children", []):
167
+ cd = child.get("data", {})
168
+ name = cd.get("name")
169
+ if name:
170
+ results[name] = cd
171
+ continue
172
+ except Exception:
173
+ sys.stderr.write("[reddit_tools] browser info.json not JSON; urllib fallback\n")
174
+ req = urllib.request.Request(url, headers={"User-Agent": user_agent})
175
+ try:
176
+ resp = urllib.request.urlopen(req, timeout=30)
177
+ remaining = float(resp.headers.get("X-Ratelimit-Remaining", 100))
178
+ reset = float(resp.headers.get("X-Ratelimit-Reset", 0))
179
+ _write_ratelimit(remaining, reset)
180
+ data = json.loads(resp.read())
181
+ except urllib.error.HTTPError as e:
182
+ if e.code == 429:
183
+ reset = float(e.headers.get("X-Ratelimit-Reset", 60))
184
+ _write_ratelimit(0, reset)
185
+ if reset > MAX_INLINE_WAIT_SECONDS:
186
+ raise RateLimitedError(reset)
187
+ print(f"Rate limited. Waiting {int(reset)+2}s...", file=sys.stderr)
188
+ time.sleep(int(reset) + 2)
189
+ resp = urllib.request.urlopen(req, timeout=30)
190
+ remaining = float(resp.headers.get("X-Ratelimit-Remaining", 100))
191
+ reset2 = float(resp.headers.get("X-Ratelimit-Reset", 0))
192
+ _write_ratelimit(remaining, reset2)
193
+ data = json.loads(resp.read())
194
+ else:
195
+ raise
196
+
197
+ for child in data.get("data", {}).get("children", []):
198
+ d = child.get("data", {})
199
+ name = d.get("name", "")
200
+ results[name] = d
201
+
202
+ return results
203
+
204
+
205
+ def _ban_entry_to_slug(entry):
206
+ """Extract the sub slug from a comment_blocked / thread_blocked entry.
207
+
208
+ Entries are either bare strings (pre-2026-05-11 shape) or audit dicts
209
+ {"sub": "foo", "added_at": ..., "reason": ..., "project": ...}.
210
+ Returns lowercased slug or None.
211
+ """
212
+ if isinstance(entry, str):
213
+ s = entry.strip().lower()
214
+ return s or None
215
+ if isinstance(entry, dict):
216
+ s = (entry.get("sub") or "").strip().lower()
217
+ return s or None
218
+ return None
219
+
220
+
221
+ def _load_comment_blocked_subs(project_name=None):
222
+ """Load subreddits where we cannot post comments.
223
+
224
+ Reads subreddit_bans.comment_blocked plus exclusions.subreddits. Used by
225
+ search/fetch so the comment-drafting agent never sees these subs as
226
+ candidates in the first place.
227
+
228
+ subreddit_bans.thread_blocked is NOT read here — a sub can block new
229
+ thread creation while still allowing comments, so it must not leak into
230
+ the comment pipeline.
231
+
232
+ Per-project layer (added 2026-05-11): when project_name is provided, also
233
+ pulls active `subreddit:<slug>` excludes from project_search_excludes
234
+ (platform='reddit'). These are LLM-proposed and have cleared the 2-batch
235
+ activation gate. Failures here MUST NOT break search: if project_excludes
236
+ import / DB call fails for any reason, we fall back to the global list
237
+ alone so the pipeline degrades gracefully.
238
+
239
+ Scope model (2026-05-19 cleanup):
240
+ - subreddit_bans.comment_blocked entries are ALWAYS account-level.
241
+ The ONLY scope dimension is the entry's `account` field. An entry
242
+ tagged with a specific account applies only on machines posting
243
+ as that account; entries with account=null apply globally (back-
244
+ compat with pre-2026-05-15 data). The legacy `project` field on
245
+ these entries is IGNORED — the gate is account-level by nature
246
+ (sub automod strips the comment form for the account, not the
247
+ project). The originating project is preserved on the entry as
248
+ `noticed_by_project` for audit only.
249
+ - Project-specific relevance rejects (e.g. "studyly thinks
250
+ r/medicalschool is off-topic") live in project_search_excludes
251
+ (the per-project layer above), NOT in comment_blocked.
252
+
253
+ Handles both ban-list shapes: bare-string entries (pre-2026-05-11) and
254
+ {"sub": ..., "added_at": ..., "reason": ..., "account": ...} audit dicts.
255
+ """
256
+ try:
257
+ config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "config.json")
258
+ with open(config_path) as f:
259
+ config = json.load(f)
260
+ # Per-account scoping (2026-05-15): a ban applies only to the account
261
+ # that triggered it. Different machines may post the same project as
262
+ # different accounts (laptop=Deep_Ad1959, sandbox VM=StreetRefuse7512);
263
+ # without this filter, account A's real ban would suppress a sub for
264
+ # account B that has no such ban. Entries with account=null are
265
+ # treated as global (apply regardless), preserving pre-2026-05-15 data.
266
+ current_account = (config.get("reddit_account") or {}).get("username") or None
267
+ blocked = set()
268
+ bans = config.get("subreddit_bans") or {}
269
+ if isinstance(bans, dict):
270
+ for entry in bans.get("comment_blocked") or []:
271
+ slug = _ban_entry_to_slug(entry)
272
+ if not slug:
273
+ continue
274
+ entry_account = None
275
+ if isinstance(entry, dict):
276
+ entry_account = entry.get("account") or None
277
+ # Account filter is the ONLY scope dimension (2026-05-19
278
+ # cleanup). If entry is tagged with a specific account and
279
+ # it's not the current one, this ban doesn't apply on this
280
+ # machine — different accounts have different automod
281
+ # fingerprints. Entries with account=null are global
282
+ # (apply on every account; pre-2026-05-15 back-compat).
283
+ #
284
+ # The legacy `project` field is intentionally ignored:
285
+ # comment_blocked is an ACCOUNT-LEVEL gate by definition.
286
+ # If a sub silently strips this account's comment form,
287
+ # every project running this account hits the same gate.
288
+ # Project-specific relevance rejects live in
289
+ # project_search_excludes, not here. The writer now stores
290
+ # the originating project as `noticed_by_project` for
291
+ # audit only.
292
+ if (entry_account is not None and current_account is not None
293
+ and entry_account.lower() != current_account.lower()):
294
+ continue
295
+ blocked.add(slug)
296
+ blocked.update(s.lower() for s in config.get("exclusions", {}).get("subreddits", []))
297
+
298
+ # Per-project self-improving sub denylist (2026-05-11). Reads
299
+ # project_search_excludes where platform='reddit' and term starts
300
+ # with 'subreddit:'. Only active terms (passed the 2-batch gate) are
301
+ # returned by active_excludes_by_kind, so a one-off false reject can't
302
+ # mute a sub.
303
+ if project_name:
304
+ try:
305
+ import project_excludes as _pe
306
+ split = _pe.active_excludes_by_kind('reddit', project_name)
307
+ for sub in (split.get('subreddit') or []):
308
+ if sub:
309
+ blocked.add(sub.lower())
310
+ except Exception as e:
311
+ print(f"[reddit_search] WARN: project_excludes load failed: {e}",
312
+ file=sys.stderr, flush=True)
313
+ return blocked
314
+ except Exception:
315
+ return set()
316
+
317
+
318
+ def _load_config_subreddits():
319
+ """Load the subreddit list from config.json for scoped searches."""
320
+ try:
321
+ config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "config.json")
322
+ with open(config_path) as f:
323
+ config = json.load(f)
324
+ return config.get("subreddits", [])
325
+ except Exception:
326
+ return []
327
+
328
+
329
+ def _build_search_url(query, sort, limit, time_filter, subreddits=None):
330
+ """Build Reddit search URL with optional subreddit scoping."""
331
+ quality_suffix = " self:yes nsfw:no"
332
+ full_query = query + quality_suffix
333
+ encoded = urllib.parse.quote(full_query)
334
+ params = f"q={encoded}&sort={sort}&t={time_filter}&limit={limit}&type=link&raw_json=1"
335
+ if subreddits:
336
+ multi_sub = "+".join(subreddits)
337
+ return f"https://www.reddit.com/r/{multi_sub}/search.json?{params}&restrict_sr=on"
338
+ return f"https://www.reddit.com/search.json?{params}"
339
+
340
+
341
+ def _parse_search_results(data, already_posted, blocked_subs):
342
+ """Parse Reddit search JSON into thread list.
343
+
344
+ Returns (threads, stats) where stats counts the per-reason drops so the
345
+ caller (cmd_search) can emit a `[reddit_search]` marker to stderr that the
346
+ dashboard's reddit-run enricher parses to surface raw/passed/dropped pills
347
+ (mirroring linkedin_search_attempts.candidates_dropped_below_floor and
348
+ twitter_search_attempts.tweets_found, see bin/server.js enrichers).
349
+ """
350
+ threads = []
351
+ stats = {"raw": 0, "blocked_sub": 0, "archived": 0, "locked": 0, "too_old": 0,
352
+ "already_posted_flagged": 0}
353
+ top_score = 0
354
+ top_comments = 0
355
+ for child in data.get("data", {}).get("children", []):
356
+ post = child.get("data", {})
357
+ stats["raw"] += 1
358
+ subreddit = post.get("subreddit", "").lower()
359
+ if subreddit in blocked_subs:
360
+ stats["blocked_sub"] += 1
361
+ continue
362
+ created = post.get("created_utc", 0)
363
+ age_hours = (datetime.now(timezone.utc).timestamp() - created) / 3600 if created else 999
364
+ permalink = f"https://old.reddit.com{post.get('permalink', '')}"
365
+ already = permalink in already_posted
366
+ entry = {
367
+ "subreddit": f"r/{post.get('subreddit', '')}",
368
+ "url": permalink,
369
+ "title": post.get("title", ""),
370
+ "author": post.get("author", ""),
371
+ "score": post.get("score", 0),
372
+ "num_comments": post.get("num_comments", 0),
373
+ "age_hours": round(age_hours, 1),
374
+ "selftext": post.get("selftext", ""),
375
+ "already_posted": already,
376
+ }
377
+ if already:
378
+ # HARD FILTER (added 2026-05-08): drop already-posted threads at parse
379
+ # time. Previously this only attached a SKIP marker to the entry and
380
+ # let it flow through to the LLM gate, which cost ~$0.20/thread to
381
+ # confirm "yep, already posted" — observed 6+/cycle on studyly. The
382
+ # set of permalinks comes from `posts.thread_url` (every comment
383
+ # we've ever landed), so an entry here means we definitely already
384
+ # engaged this thread; posting again = obvious astroturfing.
385
+ stats["already_posted_flagged"] += 1
386
+ continue
387
+ if post.get("archived"):
388
+ stats["archived"] += 1
389
+ continue
390
+ if age_hours > 4320:
391
+ stats["too_old"] += 1
392
+ continue
393
+ if post.get("locked"):
394
+ stats["locked"] += 1
395
+ continue
396
+ if entry["score"] > top_score:
397
+ top_score = entry["score"]
398
+ if entry["num_comments"] > top_comments:
399
+ top_comments = entry["num_comments"]
400
+ threads.append(entry)
401
+ stats["returned"] = len(threads)
402
+ stats["top_score"] = top_score
403
+ stats["top_comments"] = top_comments
404
+ return threads, stats
405
+
406
+
407
+ def _log_search_and_attach_deltas(query, subreddits_csv, project_name, batch_id, threads, stats):
408
+ """Dual-write feedback loop side effect of cmd_search.
409
+
410
+ 1. Inserts ONE reddit_search_attempts row capturing (query, subreddits,
411
+ project, raw count, post-filter count, top metrics) so
412
+ top_dud_reddit_queries.py can later surface phrases that consistently
413
+ return zero candidates.
414
+ 2. UPSERTs one reddit_thread_snapshots row per returned thread keyed by
415
+ thread_url. On second sight, computes delta_score / delta_comments /
416
+ delta_window_min from first_seen_* and mutates the threads list in
417
+ place, attaching those fields to each thread dict so the LLM sees:
418
+ "+15 upvotes / +4 comments since first seen 32min ago"
419
+ This is the entire delta-gating loop — no separate T1 fetch job.
420
+
421
+ Failures here MUST NOT break the search command. The whole point is to be
422
+ a passive side effect; dropping a snapshot row is preferable to failing the
423
+ whole call and starving the post pipeline.
424
+ """
425
+ try:
426
+ # 1) Server computes deltas + persists thread snapshots in one call,
427
+ # then returns the threads array with delta_score / delta_comments /
428
+ # delta_window_min / sightings / first_seen_at attached. We mutate
429
+ # in place to preserve the prior contract (caller's `threads` list).
430
+ snap_payload = [
431
+ {
432
+ "url": t.get("url"),
433
+ "score": int(t.get("score") or 0),
434
+ "num_comments": int(t.get("num_comments") or 0),
435
+ "subreddit": (t.get("subreddit") or "").lstrip("r/"),
436
+ "title": (t.get("title") or "")[:500],
437
+ }
438
+ for t in threads
439
+ if t.get("url")
440
+ ]
441
+ if snap_payload:
442
+ resp = api_post(
443
+ "/api/v1/reddit-thread-snapshots",
444
+ {"threads": snap_payload},
445
+ )
446
+ enriched = ((resp or {}).get("data") or {}).get("threads") or []
447
+ by_url = {e.get("url"): e for e in enriched if e.get("url")}
448
+ for t in threads:
449
+ u = t.get("url")
450
+ if not u:
451
+ continue
452
+ e = by_url.get(u)
453
+ if not e:
454
+ continue
455
+ for k in ("delta_score", "delta_comments", "delta_window_min",
456
+ "sightings", "first_seen_at"):
457
+ if k in e:
458
+ t[k] = e[k]
459
+
460
+ # 2) One row per query attempt
461
+ api_post(
462
+ "/api/v1/reddit-search-attempts",
463
+ {
464
+ "query": query,
465
+ "subreddits": subreddits_csv or None,
466
+ "project_name": project_name or None,
467
+ "candidates_raw": int(stats.get("raw") or 0),
468
+ "candidates_post_filter": int(stats.get("returned") or 0),
469
+ "top_score": int(stats.get("top_score") or 0),
470
+ "top_comments": int(stats.get("top_comments") or 0),
471
+ "batch_id": batch_id or None,
472
+ },
473
+ )
474
+ except Exception as e:
475
+ # Side-effect-only logging: never raise. Print once to stderr so
476
+ # the run log shows the failure without breaking the search.
477
+ print(f"[reddit_search] WARN: feedback log failed: {e}", file=sys.stderr, flush=True)
478
+
479
+
480
+ def cmd_search(args):
481
+ """Search Reddit and return threads as JSON.
482
+
483
+ Uses sort=relevance by default for topically relevant results.
484
+ Supports --subreddits to scope search to specific subs via restrict_sr.
485
+ Supports --time to filter by recency (hour, day, week, month, year, all).
486
+
487
+ Side effects (introduced 2026-05-05):
488
+ - Logs one row to reddit_search_attempts per call (project + batch_id are
489
+ pulled from env so the LLM tool-call signature stays unchanged).
490
+ - Upserts one row to reddit_thread_snapshots per returned thread; attaches
491
+ delta_score / delta_comments / delta_window_min to each thread in the
492
+ stdout JSON when the same thread reappears across cycles. This feeds
493
+ Claude a "thread is gaining traction" gating signal without a Twitter-
494
+ style 2-phase staging refactor.
495
+ """
496
+ query = args.query
497
+ time_filter = args.time
498
+
499
+ # Load already-posted URLs for filtering via /api/v1/posts/thread-urls.
500
+ # Scope per-account so two machines running different Reddit identities
501
+ # (e.g. Deep_Ad1959 on Mac, Sea_Comparison_1799 on mk0r VM) don't skip
502
+ # threads on each other's behalf. Falls back to unscoped when the
503
+ # resolver can't pin a handle (legacy single-machine behavior).
504
+ _reddit_account = _resolve_account("reddit")
505
+ _probe_q = {"platform": "reddit"}
506
+ if _reddit_account:
507
+ _probe_q["our_account"] = _reddit_account
508
+ try:
509
+ resp = api_get("/api/v1/posts/thread-urls", query=_probe_q)
510
+ urls = ((resp or {}).get("data") or {}).get("thread_urls") or []
511
+ already_posted = {u for u in urls if u}
512
+ except Exception as e:
513
+ print(f"[reddit_search] WARN: thread-urls fetch failed: {e}", file=sys.stderr)
514
+ already_posted = set()
515
+
516
+ # Read project env BEFORE building the blocked-subs set so per-project
517
+ # excludes (subreddit:<slug> rows in project_search_excludes) layer onto
518
+ # the global denylist. The same env var is reused below for the feedback-
519
+ # log side effect, so this reordering is free.
520
+ project_env = os.environ.get("S4L_REDDIT_PROJECT") or None
521
+ batch_env = os.environ.get("S4L_REDDIT_BATCH_ID") or None
522
+
523
+ # Compute global vs project-augmented denylist sizes so the stderr marker
524
+ # below shows how much of the block bucket came from the per-project
525
+ # layer. Empty diff means project_search_excludes had no active sub rows
526
+ # for this project (which is the normal state for new projects).
527
+ blocked_subs_global = _load_comment_blocked_subs(project_name=None)
528
+ blocked_subs = _load_comment_blocked_subs(project_name=project_env)
529
+ project_block_extra = len(blocked_subs) - len(blocked_subs_global)
530
+
531
+ # Determine subreddit scoping
532
+ target_subs = None
533
+ if args.subreddits:
534
+ target_subs = [s.lstrip("r/") for s in args.subreddits.split(",")]
535
+
536
+ url = _build_search_url(query, args.sort, args.limit, time_filter, subreddits=target_subs)
537
+ data = _do_request(url)
538
+ threads, stats = _parse_search_results(data, already_posted, blocked_subs)
539
+ stats["project_block_extra"] = project_block_extra
540
+ _log_search_and_attach_deltas(
541
+ query, args.subreddits, project_env, batch_env, threads, stats,
542
+ )
543
+
544
+ # Emit a single-line marker on stderr so post_reddit.py can forward it into
545
+ # run-reddit-search-*.log, where the dashboard's enrichPostCommentsRedditRuns
546
+ # parses it for the raw/passed pills. Stdout JSON contract extended with
547
+ # delta_* keys per thread (additive, parsers ignore unknown keys).
548
+ safe_q = query.replace('"', '\\"')[:120]
549
+ print(
550
+ f'[reddit_search] q="{safe_q}" raw={stats["raw"]} returned={stats["returned"]} '
551
+ f'blocked_sub={stats["blocked_sub"]} archived={stats["archived"]} '
552
+ f'locked={stats["locked"]} too_old={stats["too_old"]} '
553
+ f'already_posted_flagged={stats["already_posted_flagged"]} '
554
+ f'top_score={stats["top_score"]} top_comments={stats["top_comments"]} '
555
+ f'project_block_extra={stats.get("project_block_extra", 0)}',
556
+ file=sys.stderr, flush=True,
557
+ )
558
+
559
+ # Opaque-results discover mode (post 2026-05-07 refactor): when
560
+ # S4L_REDDIT_DUMP_DIR is set, write the full threads JSON to a unique
561
+ # file in that directory and print ONLY a one-line summary to stdout.
562
+ # This prevents Claude (running this tool from the discover prompt) from
563
+ # ever seeing thread content, which it would otherwise filter despite
564
+ # explicit "emit every thread" instructions. The orchestrator
565
+ # (_discover_iteration in post_reddit.py) globs the dump dir after Claude
566
+ # exits and reads every dumped file directly into the candidate plan.
567
+ dump_dir = os.environ.get("S4L_REDDIT_DUMP_DIR")
568
+ if dump_dir and os.path.isdir(dump_dir):
569
+ import tempfile as _tempfile
570
+ fd, dump_path = _tempfile.mkstemp(
571
+ dir=dump_dir, prefix="result-", suffix=".json"
572
+ )
573
+ try:
574
+ with os.fdopen(fd, "w") as df:
575
+ json.dump({"query": query, "threads": threads, "stats": stats}, df)
576
+ except Exception as e:
577
+ # If dump fails, fall back to stdout so the cycle isn't silently broken.
578
+ print(f"[reddit_search] WARN: dump failed, falling back to stdout: {e}",
579
+ file=sys.stderr, flush=True)
580
+ print(json.dumps(threads, indent=2))
581
+ return
582
+ # Tell Claude only the count, not the content. No file path so Claude
583
+ # can't `cat` it. The stderr [reddit_search] line above already gives
584
+ # the full breakdown (raw/returned/blocked/etc.) for query-quality
585
+ # decisions.
586
+ print(f"OK: {stats['returned']} threads passed to ripen pipeline (results not shown)")
587
+ return
588
+
589
+ print(json.dumps(threads, indent=2))
590
+
591
+
592
+ def _html_postable_check(thread_url):
593
+ """Second-opinion check against old.reddit.com HTML.
594
+
595
+ Reddit's JSON `locked` and `archived` flags sometimes miss HTML-only
596
+ lock states. Concretely seen on r/Entrepreneur where AutoMod renders
597
+ `.locked-tagline` on the thread page while the JSON payload reports
598
+ `locked=false`. This is cheap: one unauthenticated GET, ~1s, counts
599
+ against the same rate-limit window as the JSON call above.
600
+
601
+ Returns one of: "locked", "archived", "ok", or None on network error.
602
+ """
603
+ import re as _re
604
+ try:
605
+ url = thread_url.replace("www.reddit.com", "old.reddit.com").rstrip("/") + "/"
606
+ _wait_if_needed()
607
+ # Browser-first transport (Reddit 403s urllib). urllib fallback below.
608
+ html = _fetch_via_browser(url)
609
+ if html is None:
610
+ req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
611
+ resp = urllib.request.urlopen(req, timeout=15)
612
+ remaining = float(resp.headers.get("X-Ratelimit-Remaining", 100))
613
+ reset = float(resp.headers.get("X-Ratelimit-Reset", 0))
614
+ _write_ratelimit(remaining, reset)
615
+ html = resp.read().decode("utf-8", errors="ignore")
616
+ # Scope the lock check to the post header only. r/Entrepreneur (and
617
+ # similar subs) sticky an AutoMod comment that is itself locked,
618
+ # rendering `<span class="locked-tagline">locked comment</span>`
619
+ # inside the `.commentarea` div. Matching against that produces a
620
+ # false-positive on every thread in the sub and silently kills all
621
+ # candidates (see 2026-05-13 PieLine run: 10 ripen-survivors, 10
622
+ # false html_locked drops). Slice to the prefix before the
623
+ # comments section.
624
+ ca_idx = html.find('class="commentarea"')
625
+ if ca_idx < 0:
626
+ # Couldn't isolate the header (empty thread, stripped page, etc.).
627
+ # Trust the JSON `locked`/`archived` flags from cmd_repoll
628
+ # instead of fail-closing every thread on a layout edge case.
629
+ return "ok"
630
+ header_html = html[:ca_idx]
631
+ # Match only the tagline CSS classes, not the archived-popup template
632
+ # that old.reddit.com preloads on every page.
633
+ if _re.search(r'class="[^"]*\blocked-tagline\b', header_html):
634
+ return "locked"
635
+ if _re.search(r'class="[^"]*\barchived-tagline\b', header_html):
636
+ return "archived"
637
+ return "ok"
638
+ except Exception:
639
+ return None
640
+
641
+
642
+ def cmd_fetch(args):
643
+ """Fetch a thread's comments via Reddit JSON API."""
644
+ # Check if subreddit is blocked. Honors per-project excludes via the
645
+ # S4L_REDDIT_PROJECT env var (same shape as cmd_search), so a sub on
646
+ # a project's private denylist (or in project_search_excludes) returns
647
+ # the same `subreddit_blocked` error and the LLM stops fetching it.
648
+ import re as _re
649
+ sub_match = _re.search(r'/r/([^/]+)', args.url)
650
+ if sub_match:
651
+ project_env = os.environ.get("S4L_REDDIT_PROJECT") or None
652
+ blocked = _load_comment_blocked_subs(project_name=project_env)
653
+ if sub_match.group(1).lower() in blocked:
654
+ print(json.dumps({"error": "subreddit_blocked", "subreddit": sub_match.group(1)}))
655
+ return
656
+
657
+ # Convert URL to .json endpoint
658
+ url = args.url.rstrip("/")
659
+ # Handle old.reddit.com or www.reddit.com
660
+ if not url.endswith(".json"):
661
+ url = url + ".json"
662
+ url = url + "?limit=20&sort=top"
663
+
664
+ data = _do_request(url)
665
+
666
+ if not isinstance(data, list) or len(data) < 2:
667
+ print(json.dumps({"error": "unexpected response format"}))
668
+ return
669
+
670
+ # Thread info
671
+ thread_data = data[0]["data"]["children"][0]["data"]
672
+ thread = {
673
+ "title": thread_data.get("title", ""),
674
+ "author": thread_data.get("author", ""),
675
+ "selftext": thread_data.get("selftext", ""),
676
+ "score": thread_data.get("score", 0),
677
+ "num_comments": thread_data.get("num_comments", 0),
678
+ "subreddit": f"r/{thread_data.get('subreddit', '')}",
679
+ "url": args.url,
680
+ }
681
+
682
+ if thread_data.get("archived") or thread_data.get("locked"):
683
+ status = "archived" if thread_data.get("archived") else "locked"
684
+ print(json.dumps({"error": f"thread_{status}", "thread": thread}))
685
+ return
686
+
687
+ html_state = _html_postable_check(args.url)
688
+ if html_state in ("locked", "archived"):
689
+ print(json.dumps({"error": f"thread_{html_state}", "thread": thread,
690
+ "detected_via": "html"}))
691
+ return
692
+
693
+ # Top comments (flatten one level)
694
+ comments = []
695
+ for child in data[1]["data"]["children"][:15]:
696
+ if child.get("kind") != "t1":
697
+ continue
698
+ c = child.get("data", {})
699
+ comment = {
700
+ "id": c.get("name", ""), # full thing ID like t1_abc123
701
+ "author": c.get("author", ""),
702
+ "body": c.get("body", ""),
703
+ "score": c.get("score", 0),
704
+ "permalink": f"https://old.reddit.com{c.get('permalink', '')}",
705
+ }
706
+ comments.append(comment)
707
+
708
+ print(json.dumps({"thread": thread, "comments": comments}, indent=2))
709
+
710
+
711
+ def cmd_repoll(args):
712
+ """Re-fetch current score/comments for a list of thread URLs.
713
+
714
+ Used by ripen_reddit_plan.py to compute T1 - T0 deltas after a 5-min
715
+ sleep, then gate posts by composite delta score.
716
+
717
+ Reads JSON on stdin: {"urls": ["https://old.reddit.com/r/.../comments/.../...", ...]}
718
+ Writes JSON to stdout: {"results": {"<url>": {"ok": true, "score": N, "comments": M} | {"ok": false, "error": "..."}}}
719
+
720
+ Failures (network, rate limit, deleted thread) are returned per-url with
721
+ ok=false so the caller can fail-closed and drop those candidates.
722
+ """
723
+ import re as _re
724
+ raw = sys.stdin.read().strip()
725
+ if not raw:
726
+ print(json.dumps({"results": {}}))
727
+ return
728
+ try:
729
+ payload = json.loads(raw)
730
+ except json.JSONDecodeError as e:
731
+ print(json.dumps({"error": f"bad_json: {e}"}))
732
+ sys.exit(1)
733
+ urls = payload.get("urls") or []
734
+ results = {}
735
+ for url in urls:
736
+ try:
737
+ base = url.rstrip("/")
738
+ if not base.endswith(".json"):
739
+ base = base + ".json"
740
+ data = _do_request(base + "?limit=1&sort=top")
741
+ if not isinstance(data, list) or len(data) < 1:
742
+ results[url] = {"ok": False, "error": "unexpected_response"}
743
+ continue
744
+ td = data[0]["data"]["children"][0]["data"]
745
+ # Catch JSON-level locks/archives before reporting ok=True.
746
+ # Note: Reddit's JSON locked flag sometimes misreports for HTML-only
747
+ # AutoMod locks (see _html_postable_check). Those are caught later
748
+ # in ripen via the check-locked subcommand for T1 survivors.
749
+ if td.get("locked"):
750
+ results[url] = {"ok": False, "error": "thread_locked"}
751
+ continue
752
+ if td.get("archived"):
753
+ results[url] = {"ok": False, "error": "thread_archived"}
754
+ continue
755
+ results[url] = {
756
+ "ok": True,
757
+ "score": int(td.get("score") or 0),
758
+ "comments": int(td.get("num_comments") or 0),
759
+ }
760
+ except RateLimitedError as e:
761
+ results[url] = {"ok": False, "error": f"rate_limited:{int(e.reset_seconds)}"}
762
+ except Exception as e:
763
+ results[url] = {"ok": False, "error": f"{type(e).__name__}:{str(e)[:80]}"}
764
+ print(json.dumps({"results": results}))
765
+
766
+
767
+ def cmd_check_locked(args):
768
+ """Lightweight HTML-only lock check for a single thread URL.
769
+
770
+ Used by ripen_reddit_plan.py after the delta gate to catch AutoMod
771
+ HTML-only locks that the JSON API misreports as locked=false (known
772
+ issue on r/Entrepreneur and others). One unauthenticated GET, ~1s.
773
+
774
+ Returns {"url": "...", "state": "ok"|"locked"|"archived"|"error"}
775
+ """
776
+ state = _html_postable_check(args.url)
777
+ print(json.dumps({"url": args.url, "state": state or "error"}))
778
+
779
+
780
+ def cmd_already_posted(args):
781
+ """Check if we already posted in a thread via /api/v1/posts/lookup.
782
+
783
+ Scoped per-account so multiple machines running different Reddit
784
+ identities (e.g. Deep_Ad1959 on Mac, Sea_Comparison_1799 on mk0r VM)
785
+ don't see each other's posts as their own. Falls back to unscoped
786
+ when no handle is configured (legacy single-machine behavior).
787
+ """
788
+ q = {"platform": "reddit", "thread_url": args.url}
789
+ acct = _resolve_account("reddit")
790
+ if acct:
791
+ q["our_account"] = acct
792
+ resp = api_get("/api/v1/posts/lookup", query=q)
793
+ post = ((resp or {}).get("data") or {}).get("post")
794
+ if post:
795
+ print(json.dumps({
796
+ "already_posted": True,
797
+ "post_id": post.get("id"),
798
+ "content_preview": post.get("our_content"),
799
+ }))
800
+ else:
801
+ print(json.dumps({"already_posted": False}))
802
+
803
+
804
+ def cmd_log_post(args):
805
+ """Log a posted comment via /api/v1/posts POST.
806
+
807
+ The route enforces the (platform, thread_url) dedup server-side and
808
+ returns 409 with existing_post_id when the thread is already in the
809
+ table; ok_on_conflict=True surfaces that as a structured body.
810
+ """
811
+ session_id = os.environ.get("CLAUDE_SESSION_ID") or None
812
+ # Generation trace: opaque JSONB blob captured by post_reddit.py
813
+ # before invoking Claude. Loaded from a file path (--generation-trace)
814
+ # because the JSON can be several KB; passing inline blows past
815
+ # macOS ARG_MAX. Failure to read just nulls the field — never
816
+ # blocks the INSERT, since losing the audit row for one post is
817
+ # preferable to losing the post.
818
+ generation_trace_blob = None
819
+ trace_path = getattr(args, "generation_trace", None)
820
+ if trace_path:
821
+ try:
822
+ with open(trace_path, "r", encoding="utf-8") as tf:
823
+ generation_trace_blob = json.load(tf)
824
+ except (OSError, json.JSONDecodeError) as e:
825
+ # Stderr only — stdout is reserved for the JSON envelope
826
+ # that post_reddit.py:log_post() parses.
827
+ print(f"WARNING: could not load generation_trace {trace_path}: {e}",
828
+ file=sys.stderr)
829
+ body = {
830
+ "platform": "reddit",
831
+ "thread_url": args.thread_url,
832
+ "thread_author": args.thread_author,
833
+ "thread_title": args.thread_title,
834
+ "our_url": args.our_url,
835
+ "our_content": args.our_text,
836
+ "our_account": args.account,
837
+ "project": args.project,
838
+ "engagement_style": getattr(args, "engagement_style", None),
839
+ "search_topic": getattr(args, "search_topic", None),
840
+ "claude_session_id": session_id,
841
+ "language": None,
842
+ "is_recommendation": False,
843
+ }
844
+ if generation_trace_blob is not None:
845
+ body["generation_trace"] = generation_trace_blob
846
+ # link_source (2026-05-17): tags audience-page traffic (e.g.
847
+ # 'audience_page:founder-ghostwriting') so the dashboard can break out
848
+ # curated landing-page hits from generic homepage links. Set by
849
+ # post_reddit.py based on which URL Claude baked into the reply text.
850
+ if getattr(args, "link_source", None):
851
+ body["link_source"] = args.link_source
852
+ # autoposter_version: social-autoposter package.json version at the moment
853
+ # we posted. Powers per-release attribution: "did 1.5.0 outperform 1.4.x
854
+ # on Reddit?". None when package.json + env are both missing.
855
+ autoposter_version = read_autoposter_version()
856
+ if autoposter_version:
857
+ body["autoposter_version"] = autoposter_version
858
+ resp = api_post("/api/v1/posts", body, ok_on_conflict=True)
859
+ err = resp.get("error") if isinstance(resp, dict) else None
860
+ if err:
861
+ details = (err.get("details") or {}) if isinstance(err, dict) else {}
862
+ print(json.dumps({
863
+ "error": "DUPLICATE_THREAD",
864
+ "message": "Already posted in this thread",
865
+ "existing_post_id": details.get("existing_post_id"),
866
+ "content_preview": details.get("content_preview"),
867
+ }))
868
+ return
869
+ post = ((resp or {}).get("data") or {}).get("post") or {}
870
+ print(json.dumps({
871
+ "logged": True,
872
+ "post_id": post.get("id"),
873
+ "claude_session_id": session_id,
874
+ }))
875
+
876
+
877
+ def main():
878
+ parser = argparse.ArgumentParser(description="Reddit tools for Claude")
879
+ sub = parser.add_subparsers(dest="command")
880
+
881
+ # search
882
+ p_search = sub.add_parser("search", help="Search Reddit for threads")
883
+ p_search.add_argument("query", help="Search query")
884
+ p_search.add_argument("--limit", type=int, default=15, help="Max results")
885
+ p_search.add_argument("--sort", default="relevance", help="Sort order (relevance, new, hot, top, comments)")
886
+ p_search.add_argument("--time", default="week", help="Time filter (hour, day, week, month, year, all)")
887
+ p_search.add_argument("--subreddits", default=None, help="Comma-separated subreddits to scope search (e.g. AI_Agents,SaaS,smallbusiness)")
888
+
889
+ # fetch
890
+ p_fetch = sub.add_parser("fetch", help="Fetch thread + comments")
891
+ p_fetch.add_argument("url", help="Thread URL")
892
+
893
+ # repoll (T1 fetch for ripen)
894
+ sub.add_parser("repoll", help="Re-fetch score/comments for a list of thread URLs (JSON on stdin)")
895
+
896
+ # check-locked (HTML-based lock check, used by ripen for T1 survivors)
897
+ p_cl = sub.add_parser("check-locked", help="Check if a thread is locked via old.reddit.com HTML")
898
+ p_cl.add_argument("url", help="Thread URL")
899
+
900
+ # already-posted
901
+ p_ap = sub.add_parser("already-posted", help="Check if already posted in thread")
902
+ p_ap.add_argument("url", help="Thread URL")
903
+
904
+ # log-post
905
+ p_log = sub.add_parser("log-post", help="Log a posted comment to DB")
906
+ p_log.add_argument("thread_url")
907
+ p_log.add_argument("our_url")
908
+ p_log.add_argument("our_text")
909
+ p_log.add_argument("project")
910
+ p_log.add_argument("thread_author")
911
+ p_log.add_argument("thread_title")
912
+ p_log.add_argument("--account", default="Deep_Ad1959")
913
+ p_log.add_argument("--engagement-style", default=None)
914
+ p_log.add_argument("--search-topic", dest="search_topic", default=None,
915
+ help="The seed topic/query used to find this thread (feedback loop input)")
916
+ p_log.add_argument("--generation-trace", dest="generation_trace", default=None,
917
+ help="Path to a JSON file with the few-shot context Claude "
918
+ "saw before drafting (top_performers report, recent "
919
+ "comments, model, prompt size). Stored in "
920
+ "posts.generation_trace JSONB for audit. See "
921
+ "migrations/2026-05-12_generation_trace.sql for the "
922
+ "shape contract.")
923
+ p_log.add_argument("--link-source", dest="link_source", default=None,
924
+ help="Optional tag for posts.link_source so the dashboard "
925
+ "can break out audience-page traffic (e.g. "
926
+ "'audience_page:founder-ghostwriting') from generic "
927
+ "homepage links.")
928
+
929
+ args = parser.parse_args()
930
+ try:
931
+ if args.command == "search":
932
+ cmd_search(args)
933
+ elif args.command == "fetch":
934
+ cmd_fetch(args)
935
+ elif args.command == "repoll":
936
+ cmd_repoll(args)
937
+ elif args.command == "check-locked":
938
+ cmd_check_locked(args)
939
+ elif args.command == "already-posted":
940
+ cmd_already_posted(args)
941
+ elif args.command == "log-post":
942
+ cmd_log_post(args)
943
+ else:
944
+ parser.print_help()
945
+ except RateLimitedError as e:
946
+ # Return a clean JSON error so Claude can skip and try another action
947
+ print(json.dumps({
948
+ "error": "rate_limited",
949
+ "wait_seconds": int(e.reset_seconds),
950
+ "message": f"Reddit API rate limit hit. Skip this query and try a different topic or command.",
951
+ }))
952
+ sys.exit(2)
953
+
954
+
955
+ if __name__ == "__main__":
956
+ main()