@m13v/s4l 1.6.197-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1314 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +497 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,327 @@
1
+ #!/usr/bin/env python3
2
+ """Scan Twitter notifications via the browser (no API cost) and insert new replies.
3
+
4
+ Browser-based replacement for the old API-powered scan_twitter_mentions.py.
5
+ Consumes JSON from `twitter_browser.py notifications [scroll] [tab]` which
6
+ defaults to the /notifications (All) tab so we catch nested replies where the
7
+ @-tag was dropped. Pass tab="mentions" to restrict to explicit @-mentions only.
8
+ Companion: scan_twitter_thread_followups.py revisits our recent replies to
9
+ pick up depth-2+ follow-ups that never surface in notifications at all.
10
+
11
+ Usage:
12
+ python3 scripts/twitter_browser.py notifications 8 all > /tmp/twitter_notifs.json
13
+ python3 scripts/scan_twitter_mentions_browser.py --json-file /tmp/twitter_notifs.json
14
+
15
+ Migrated 2026-05-18: reads/writes go through s4l.ai HTTP API (/api/v1/posts,
16
+ /api/v1/posts/lookup, /api/v1/replies) via scripts/http_api.py instead of
17
+ psycopg2. Note: the route enforces (platform, their_comment_id) uniqueness
18
+ server-side, so the "existing_ids" prefetch is now a soft local cache used
19
+ to short-circuit the POST loop; we still rely on the API's ON CONFLICT path
20
+ as the source of truth.
21
+
22
+ Migrated 2026-05-23: third-party mentions now write to the dedicated
23
+ `mentions` table via /api/v1/mentions instead of a placeholder row in
24
+ `posts`. The associated reply row carries `mention_id` instead of
25
+ `post_id`, enforced by the replies_post_or_mention_exclusive_check DB
26
+ constraint. See migrations/2026-05-23-mentions-table.sql and
27
+ scripts/migrate_mentions_out_of_posts.py for the cutover history.
28
+ """
29
+
30
+ import argparse
31
+ import json
32
+ import os
33
+ import re
34
+ import sys
35
+
36
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
37
+ from http_api import api_get, api_post # noqa: E402
38
+ from project_topics import topics_for_project # noqa: E402
39
+ try:
40
+ from account_resolver import resolve as _resolve_account # noqa: E402
41
+ except Exception:
42
+ def _resolve_account(_platform): # type: ignore[unused-arg]
43
+ return None
44
+
45
+ CONFIG_PATH = os.path.expanduser("~/social-autoposter/config.json")
46
+ MIN_WORDS = 3
47
+ OUR_HANDLE = _resolve_account("twitter")
48
+ if not OUR_HANDLE:
49
+ # No hardcoded fallback: scanning/attributing under a default handle silently
50
+ # impersonates the repo owner. Refuse to run so the missing config surfaces.
51
+ sys.stderr.write(
52
+ "[scan_twitter_mentions] no Twitter handle configured "
53
+ "(accounts.twitter.handle / AUTOPOSTER_TWITTER_HANDLE); refusing to run "
54
+ "to avoid wrong-account attribution. Run connect_x first.\n")
55
+ sys.exit(1)
56
+
57
+ # Paginate the replies prefetch in chunks so we never blow the route's max
58
+ # limit. 500 is the per-call cap inside /api/v1/replies; we walk pages until
59
+ # the response is short.
60
+ REPLY_PAGE_LIMIT = 500
61
+ REPLY_MAX_PAGES = 200 # 100k rows of headroom; plenty for the dedup cache.
62
+
63
+
64
+ def load_config():
65
+ if os.path.exists(CONFIG_PATH):
66
+ with open(CONFIG_PATH) as f:
67
+ return json.load(f)
68
+ return {}
69
+
70
+
71
+ def word_count(text):
72
+ return len(text.split()) if text else 0
73
+
74
+
75
+ def get_existing_reply_ids():
76
+ """Pull every existing replies.their_comment_id for platform=x as a dedup cache.
77
+
78
+ The route caps responses at 500 rows per call; we paginate by id DESC and
79
+ keep walking until we exhaust the set. The route also handles uniqueness
80
+ on the server, so even if our local cache lags slightly we won't insert
81
+ duplicates — we'll just get ok_on_conflict back from POST.
82
+ """
83
+ cache = set()
84
+ max_id = None
85
+ for _ in range(REPLY_MAX_PAGES):
86
+ query = {
87
+ "platform": "x",
88
+ "limit": REPLY_PAGE_LIMIT,
89
+ "order_by": "id",
90
+ }
91
+ # We don't have an explicit max_id filter on the route today; walk by
92
+ # `since` instead is wrong (since acts on discovered_at). Easiest: ask
93
+ # for the first 500 most-recent rows and trust that older rows in DB
94
+ # already collided once at insert-time, so we don't need a perfect
95
+ # global cache — just a recency window deep enough to catch this
96
+ # cycle's incoming notifications.
97
+ resp = api_get("/api/v1/replies", query=query)
98
+ rows = (resp.get("data") or {}).get("replies") or []
99
+ if not rows:
100
+ break
101
+ for r in rows:
102
+ cid = r.get("their_comment_id")
103
+ if cid:
104
+ cache.add(cid)
105
+ if len(rows) < REPLY_PAGE_LIMIT:
106
+ break
107
+ # Today's route has no "id <" cursor parameter, so one page is all we
108
+ # get. That is enough: it caps memory + roundtrip and the server-side
109
+ # UNIQUE index is still the canonical dedup. Break out.
110
+ break
111
+ # Suppress unused-binding lint warning for max_id while we leave the
112
+ # placeholder in place; future route work may add an id-cursor.
113
+ _ = max_id
114
+ return cache
115
+
116
+
117
+ def get_our_posts():
118
+ """Map tweet_id (last URL segment) -> post row for our active twitter posts."""
119
+ resp = api_get(
120
+ "/api/v1/posts",
121
+ query={"platform": "twitter", "status": "active", "limit": 500},
122
+ )
123
+ rows = (resp.get("data") or {}).get("posts") or []
124
+ posts = {}
125
+ for row in rows:
126
+ url = row.get("our_url")
127
+ if not url:
128
+ continue
129
+ m = re.search(r"/status/(\d+)", url)
130
+ if m:
131
+ posts[m.group(1)] = row
132
+ return posts
133
+
134
+
135
+ def guess_project(text, config):
136
+ projects = config.get("projects", [])
137
+ text_lower = (text or "").lower()
138
+ for p in projects:
139
+ name = p.get("name", "")
140
+ # DB-backed seed list (post 2026-05-27 config.json removal).
141
+ topics = topics_for_project(name)
142
+ for topic in topics:
143
+ if topic.lower() in text_lower:
144
+ return name
145
+ if name.lower() in text_lower:
146
+ return name
147
+ return config.get("default_project", "General")
148
+
149
+
150
+ def most_recent_active_project():
151
+ """Project_name of the most recent active twitter post we made.
152
+
153
+ Used as a fallback for replies-to-us where the notification feed doesn't
154
+ expose the parent tweet ID, so we can't identify *which* of our posts
155
+ the mention is under. Recency is a much stronger signal than
156
+ keyword-matching a 3-word reply body.
157
+
158
+ Post 2026-05-23 the "(mention - no original post)" placeholder rows no
159
+ longer exist in `posts` (they live in `mentions` now), so the SQL/
160
+ client-side filter that used to live here is gone.
161
+ """
162
+ resp = api_get(
163
+ "/api/v1/posts",
164
+ query={
165
+ "platform": "twitter",
166
+ "status": "active",
167
+ "limit": 50,
168
+ },
169
+ )
170
+ rows = (resp.get("data") or {}).get("posts") or []
171
+ for r in rows:
172
+ proj = r.get("project_name")
173
+ if not proj:
174
+ continue
175
+ return proj
176
+ return None
177
+
178
+
179
+ def process_notifications(notifications, config):
180
+ exclusions = config.get("exclusions", {})
181
+ excluded_accounts = {a.lower() for a in exclusions.get("twitter_accounts", [])}
182
+ excluded_accounts.add(OUR_HANDLE.lower())
183
+
184
+ existing_ids = get_existing_reply_ids()
185
+ our_posts = get_our_posts()
186
+ recent_project = most_recent_active_project()
187
+
188
+ stats = {
189
+ "new": 0,
190
+ "already_tracked": 0,
191
+ "excluded_author": 0,
192
+ "own_account": 0,
193
+ "too_short": 0,
194
+ "no_tweet_id": 0,
195
+ }
196
+
197
+ for n in notifications:
198
+ tweet_id = n.get("tweet_id", "")
199
+ handle = (n.get("handle") or "").lstrip("@")
200
+ text = n.get("text") or ""
201
+ tweet_url = n.get("tweet_url") or (
202
+ f"https://x.com/{handle}/status/{tweet_id}" if handle and tweet_id else ""
203
+ )
204
+ replying_to = (n.get("replying_to") or "").lstrip("@").lower()
205
+
206
+ if not tweet_id:
207
+ stats["no_tweet_id"] += 1
208
+ continue
209
+
210
+ if tweet_id in existing_ids:
211
+ stats["already_tracked"] += 1
212
+ continue
213
+
214
+ if handle.lower() in excluded_accounts:
215
+ stats["own_account" if handle.lower() == OUR_HANDLE.lower() else "excluded_author"] += 1
216
+ continue
217
+
218
+ if word_count(text) < MIN_WORDS:
219
+ stats["too_short"] += 1
220
+ continue
221
+
222
+ # Resolve project for the mention. Reply-to-us inherits the project
223
+ # of our most recent active post (short reply text is unreliable for
224
+ # keyword matching); other mentions fall back to keyword guess.
225
+ is_reply_to_us = replying_to == OUR_HANDLE.lower() and bool(our_posts)
226
+ if is_reply_to_us and recent_project:
227
+ project = recent_project
228
+ else:
229
+ project = guess_project(text, config)
230
+ # _ = our_posts # currently unused for direct post_id linkage; notifications
231
+ # don't expose conversation_id, so we attribute via mentions table only.
232
+
233
+ # Insert into /api/v1/mentions. Dedup on (platform, mentioning_url)
234
+ # — if the row already exists we get back existing_mention_id from
235
+ # the 409 body via ok_on_conflict.
236
+ mention_body = {
237
+ "platform": "twitter",
238
+ "mentioning_url": tweet_url,
239
+ "mentioning_handle": handle,
240
+ "mentioning_text": text,
241
+ "our_handle": OUR_HANDLE,
242
+ "project": project,
243
+ "status": "active",
244
+ }
245
+ mention_resp = api_post(
246
+ "/api/v1/mentions", mention_body, ok_on_conflict=True,
247
+ )
248
+ mention_data = mention_resp.get("data") or {}
249
+ mention_row = mention_data.get("mention") or {}
250
+ mention_id = mention_row.get("id")
251
+ if not mention_id and mention_resp.get("error"):
252
+ details = (mention_resp.get("error") or {}).get("details") or {}
253
+ mention_id = details.get("existing_mention_id")
254
+ if not mention_id:
255
+ inner = details.get("mention") or {}
256
+ mention_id = inner.get("id")
257
+ if not mention_id:
258
+ print(
259
+ f" WARNING: could not resolve mention_id for {tweet_url!r}; skipping",
260
+ file=sys.stderr,
261
+ )
262
+ continue
263
+
264
+ reply_resp = api_post(
265
+ "/api/v1/replies",
266
+ {
267
+ "mention_id": mention_id,
268
+ "platform": "x",
269
+ "their_comment_id": tweet_id,
270
+ "their_author": handle,
271
+ "their_content": text,
272
+ "their_comment_url": tweet_url,
273
+ "depth": 1,
274
+ "status": "pending",
275
+ "our_account": OUR_HANDLE,
276
+ },
277
+ ok_on_conflict=True,
278
+ )
279
+ # 409 means the row already existed under the server-side UNIQUE
280
+ # (platform, their_comment_id) constraint; count it as already_tracked
281
+ # rather than new so the summary matches reality.
282
+ if (reply_resp.get("error") or {}).get("code") == "duplicate_reply":
283
+ stats["already_tracked"] += 1
284
+ else:
285
+ stats["new"] += 1
286
+ print(f" NEW: @{handle}: {text[:80]}")
287
+ existing_ids.add(tweet_id)
288
+
289
+ return stats
290
+
291
+
292
+ def main():
293
+ parser = argparse.ArgumentParser(
294
+ description="Process Twitter notification data from browser scanner"
295
+ )
296
+ parser.add_argument(
297
+ "--json-file",
298
+ required=True,
299
+ help="Path to JSON from twitter_browser.py notifications",
300
+ )
301
+ args = parser.parse_args()
302
+
303
+ with open(args.json_file) as f:
304
+ data = json.load(f)
305
+
306
+ if isinstance(data, dict) and data.get("error"):
307
+ print(f"ERROR from extractor: {data['error']}", file=sys.stderr)
308
+ sys.exit(1)
309
+
310
+ notifications = data.get("notifications", []) if isinstance(data, dict) else data
311
+ print(f"Processing {len(notifications)} mentions...")
312
+
313
+ config = load_config()
314
+ stats = process_notifications(notifications, config)
315
+
316
+ print(
317
+ f"\nSummary: {stats['new']} new, "
318
+ f"{stats['already_tracked']} already tracked, "
319
+ f"{stats['excluded_author']} excluded, "
320
+ f"{stats['own_account']} own account, "
321
+ f"{stats['too_short']} too short, "
322
+ f"{stats['no_tweet_id']} no tweet_id"
323
+ )
324
+
325
+
326
+ if __name__ == "__main__":
327
+ main()
@@ -0,0 +1,299 @@
1
+ #!/usr/bin/env python3
2
+ """Scan our recent X replies for new public follow-ups and ingest them.
3
+
4
+ Companion to scan_twitter_mentions_browser.py. The mentions tab only surfaces
5
+ explicit @-mentions, so replies to our replies without a retagged handle are
6
+ invisible. This script compensates by revisiting each of our recent X replies
7
+ and scraping the page for depth-2+ comments that aren't yet in the DB.
8
+
9
+ Flow:
10
+ 1. Query `replies` for our X replies in last N days (default 14) where
11
+ `our_reply_url IS NOT NULL`. These are the threads we're subscribing to.
12
+ 2. Write those URLs to a temp file.
13
+ 3. Invoke `twitter_browser.py thread-followups <file>`, which scrapes each
14
+ URL and returns a `{results: [{thread_url, anchor_tweet_id, followups}]}`
15
+ JSON blob.
16
+ 4. For each followup not already in `replies` (by platform+their_comment_id),
17
+ insert a new `replies` row with:
18
+ - platform = 'x'
19
+ - parent_reply_id = id of the original reply (the anchor)
20
+ - post_id = anchor.post_id
21
+ - depth = anchor.depth + 1
22
+ - status = 'pending'
23
+ Tweets we posted ourselves are skipped (OUR_HANDLE check). Own-account
24
+ replies from us get status='replied' with our_reply_id populated, mirroring
25
+ the mentions scanner.
26
+
27
+ Usage:
28
+ python3 scripts/scan_twitter_thread_followups.py [--days N] [--max-urls N]
29
+
30
+ Migrated 2026-05-18: reads/writes now route through the s4l.ai HTTP API
31
+ (/api/v1/replies for both filter-list and insert) instead of psycopg2.
32
+ The (platform, their_comment_id) dedup runs server-side; the local
33
+ known_ids cache is now just for in-loop short-circuiting.
34
+ """
35
+
36
+ import argparse
37
+ import json
38
+ import os
39
+ import re
40
+ import subprocess
41
+ import sys
42
+ import tempfile
43
+ from datetime import datetime, timedelta, timezone
44
+
45
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
46
+ from http_api import api_get, api_post # noqa: E402
47
+ try:
48
+ from account_resolver import resolve as _resolve_account # noqa: E402
49
+ except Exception:
50
+ def _resolve_account(_platform): # type: ignore[unused-arg]
51
+ return None
52
+
53
+ CONFIG_PATH = os.path.expanduser("~/social-autoposter/config.json")
54
+ OUR_HANDLE = _resolve_account("twitter")
55
+ if not OUR_HANDLE:
56
+ # No hardcoded fallback: scanning/attributing under a default handle silently
57
+ # impersonates the repo owner. Refuse to run so the missing config surfaces.
58
+ sys.stderr.write(
59
+ "[scan_twitter_followups] no Twitter handle configured "
60
+ "(accounts.twitter.handle / AUTOPOSTER_TWITTER_HANDLE); refusing to run "
61
+ "to avoid wrong-account attribution. Run connect_x first.\n")
62
+ sys.exit(1)
63
+ DEFAULT_DAYS = 14
64
+ DEFAULT_MAX_URLS = 40
65
+ REPO_DIR = os.path.expanduser("~/social-autoposter")
66
+ REPLY_PAGE_LIMIT = 500
67
+
68
+
69
+ def load_config():
70
+ if os.path.exists(CONFIG_PATH):
71
+ with open(CONFIG_PATH) as f:
72
+ return json.load(f)
73
+ return {}
74
+
75
+
76
+ def fetch_our_recent_x_replies(days, max_urls):
77
+ """Return list of (reply_id, our_reply_url, post_id, depth) for our recent X replies.
78
+
79
+ Filters live in the route as:
80
+ - platform = x
81
+ - status = replied (the route's WHERE)
82
+ - has_our_reply_content / has_our_reply_id NOT used here; we need
83
+ our_reply_url, but the route returns it on every row and we filter
84
+ client-side after the page comes back.
85
+ - replied_at >= NOW() - <days>d
86
+ """
87
+ since = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
88
+ resp = api_get(
89
+ "/api/v1/replies",
90
+ query={
91
+ "platform": "x",
92
+ "status": "replied",
93
+ "since": since,
94
+ "limit": max_urls,
95
+ "order_by": "replied_at",
96
+ },
97
+ )
98
+ rows = (resp.get("data") or {}).get("replies") or []
99
+ out = []
100
+ for r in rows:
101
+ url = r.get("our_reply_url")
102
+ if not url:
103
+ continue
104
+ out.append((r["id"], url, r.get("post_id"), int(r.get("depth") or 1)))
105
+ return out[:max_urls]
106
+
107
+
108
+ def existing_comment_ids():
109
+ """First-page snapshot of replies.their_comment_id for platform=x.
110
+
111
+ The route's UNIQUE (platform, their_comment_id) index is the canonical
112
+ dedup; this cache short-circuits the per-followup POST loop and prints
113
+ accurate "already tracked" counts. Bounded at REPLY_PAGE_LIMIT (500) by
114
+ the route — fine because the most recent rows are the ones we'd
115
+ otherwise collide with.
116
+ """
117
+ resp = api_get(
118
+ "/api/v1/replies",
119
+ query={"platform": "x", "limit": REPLY_PAGE_LIMIT, "order_by": "id"},
120
+ )
121
+ rows = (resp.get("data") or {}).get("replies") or []
122
+ return {r.get("their_comment_id") for r in rows if r.get("their_comment_id")}
123
+
124
+
125
+ def anchor_id_from_url(url):
126
+ m = re.search(r"/status/(\d+)", url or "")
127
+ return m.group(1) if m else None
128
+
129
+
130
+ def run_browser_scrape(urls, scroll_count=3):
131
+ """Shell out to twitter_browser.py thread-followups and parse JSON."""
132
+ if not urls:
133
+ return {"results": [], "urls_visited": 0}
134
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
135
+ urls_path = f.name
136
+ for u in urls:
137
+ f.write(u + "\n")
138
+ try:
139
+ proc = subprocess.run(
140
+ ["python3", os.path.join(REPO_DIR, "scripts/twitter_browser.py"),
141
+ "thread-followups", urls_path, str(scroll_count)],
142
+ capture_output=True, text=True, timeout=1800,
143
+ )
144
+ if proc.returncode != 0:
145
+ print(f"ERROR: twitter_browser.py exited {proc.returncode}", file=sys.stderr)
146
+ print(proc.stderr[-2000:], file=sys.stderr)
147
+ return {"results": [], "error": "browser_failed"}
148
+ try:
149
+ return json.loads(proc.stdout)
150
+ except json.JSONDecodeError as e:
151
+ print(f"ERROR: could not parse browser output as JSON: {e}", file=sys.stderr)
152
+ print(proc.stdout[-2000:], file=sys.stderr)
153
+ return {"results": [], "error": "json_parse_failed"}
154
+ finally:
155
+ try:
156
+ os.unlink(urls_path)
157
+ except OSError:
158
+ pass
159
+
160
+
161
+ def insert_followup(followup, parent_reply_id, post_id, parent_depth, root_author=None):
162
+ """Insert one follow-up row via /api/v1/replies. Returns True if inserted,
163
+ False if skipped (own handle, missing required fields, or 409 duplicate)."""
164
+ tweet_id = followup.get("tweet_id") or ""
165
+ handle = (followup.get("handle") or "").lstrip("@")
166
+ text = followup.get("text") or ""
167
+ url = followup.get("tweet_url") or ""
168
+ if not tweet_id or not handle:
169
+ return False
170
+ if handle.lower() == OUR_HANDLE.lower():
171
+ return False
172
+ body = {
173
+ "post_id": post_id,
174
+ "platform": "x",
175
+ "their_comment_id": tweet_id,
176
+ "their_author": handle,
177
+ "their_content": text,
178
+ "their_comment_url": url,
179
+ "depth": (parent_depth or 1) + 1,
180
+ "status": "pending",
181
+ "parent_reply_id": parent_reply_id,
182
+ "our_account": OUR_HANDLE,
183
+ }
184
+ # OP of the thread our reply lives in, scraped for free from the conversation
185
+ # page (twitter_browser.scrape_many_thread_followups). Always set when known,
186
+ # including when the OP is the replier — that equality is the "OP replied"
187
+ # signal the analytic needs.
188
+ root_author = (root_author or "").lstrip("@")
189
+ if root_author:
190
+ body["thread_author_handle"] = root_author
191
+ # Media of the followup tweet itself (images/videos/GIFs/link-cards),
192
+ # captured for free during the same DOM pass in
193
+ # twitter_browser.scrape_thread_followups (2026-06-03 thread-media feature).
194
+ # The engage prompt reads this back via /api/v1/replies/next-pending so it
195
+ # can reply to what the comment VISUALLY shows, not just its text. An empty
196
+ # list [] is meaningful ("captured, none found"); only omit when the
197
+ # extractor returned nothing parseable (None). Harmless no-op against the
198
+ # pre-deploy API, which simply ignores the unknown field.
199
+ media = followup.get("media")
200
+ if isinstance(media, list):
201
+ body["their_media"] = media
202
+ resp = api_post("/api/v1/replies", body, ok_on_conflict=True)
203
+ if (resp.get("error") or {}).get("code") == "duplicate_reply":
204
+ return False
205
+ return True
206
+
207
+
208
+ def main():
209
+ parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
210
+ parser.add_argument("--days", type=int, default=DEFAULT_DAYS,
211
+ help=f"Look back N days for our replies (default {DEFAULT_DAYS})")
212
+ parser.add_argument("--max-urls", type=int, default=DEFAULT_MAX_URLS,
213
+ help=f"Max thread URLs to revisit per run (default {DEFAULT_MAX_URLS})")
214
+ parser.add_argument("--scroll-count", type=int, default=3,
215
+ help="Scrolls per thread page (default 3)")
216
+ parser.add_argument("--dry-run", action="store_true",
217
+ help="Print what would be inserted without writing")
218
+ args = parser.parse_args()
219
+
220
+ our_replies = fetch_our_recent_x_replies(args.days, args.max_urls)
221
+ print(f"Revisiting {len(our_replies)} of our recent X replies (last {args.days}d)")
222
+ if not our_replies:
223
+ return 0
224
+
225
+ url_to_meta = {url: (rid, pid, depth) for rid, url, pid, depth in our_replies}
226
+ urls = list(url_to_meta.keys())
227
+
228
+ print(f"Invoking browser scraper for {len(urls)} URLs...")
229
+ data = run_browser_scrape(urls, scroll_count=args.scroll_count)
230
+
231
+ results = data.get("results", [])
232
+ known_ids = existing_comment_ids()
233
+ new_count = 0
234
+ skip_own = 0
235
+ skip_existing = 0
236
+ skip_anchor = 0
237
+ skip_not_replying_to_us = 0
238
+
239
+ for r in results:
240
+ thread_url = r.get("thread_url") or ""
241
+ anchor_id = r.get("anchor_tweet_id") or anchor_id_from_url(thread_url)
242
+ root_author = (r.get("root_author") or "").lstrip("@")
243
+ meta = url_to_meta.get(thread_url)
244
+ if not meta:
245
+ continue
246
+ parent_reply_id, post_id, parent_depth = meta
247
+
248
+ for fu in r.get("followups", []):
249
+ tid = fu.get("tweet_id")
250
+ handle = (fu.get("handle") or "").lstrip("@")
251
+ if not tid:
252
+ continue
253
+ if tid == anchor_id:
254
+ skip_anchor += 1
255
+ continue
256
+ if handle.lower() == OUR_HANDLE.lower():
257
+ skip_own += 1
258
+ continue
259
+ # Filter: only keep tweets that are actually replying to us.
260
+ # X tweet permalink pages inject "more from this author" / "you might
261
+ # like" articles into the timeline. Without this check, those leak
262
+ # in as fake follow-ups (observed 2026-05: ~80% of captures were
263
+ # the seed author's later unrelated promotional tweets, not replies
264
+ # to our reply). The extractor in twitter_browser.py captures
265
+ # `replying_to` from the "Replying to @handle" block above each
266
+ # tweet; if it's empty or doesn't point at our handle, it's not a
267
+ # response to us.
268
+ replying_to = (fu.get("replying_to") or "").lstrip("@").lower()
269
+ if replying_to != OUR_HANDLE.lower():
270
+ skip_not_replying_to_us += 1
271
+ continue
272
+ if tid in known_ids:
273
+ skip_existing += 1
274
+ continue
275
+ if args.dry_run:
276
+ print(f" [DRY] @{handle} (tid={tid}) op=@{root_author or '?'} parent_reply={parent_reply_id} depth={(parent_depth or 1) + 1}: {(fu.get('text') or '')[:80]}")
277
+ new_count += 1
278
+ known_ids.add(tid)
279
+ continue
280
+ inserted = insert_followup(fu, parent_reply_id, post_id, parent_depth, root_author=root_author)
281
+ if inserted:
282
+ new_count += 1
283
+ known_ids.add(tid)
284
+ print(f" NEW follow-up: @{handle} (tid={tid}) parent_reply={parent_reply_id} depth={(parent_depth or 1) + 1}: {(fu.get('text') or '')[:80]}")
285
+ else:
286
+ # 409 duplicate (someone else inserted between our local cache
287
+ # and this POST). Count it as already-tracked, not new.
288
+ known_ids.add(tid)
289
+ skip_existing += 1
290
+
291
+ print(f"\nSummary: {new_count} new follow-ups ingested, "
292
+ f"{skip_existing} already tracked, {skip_own} own account, "
293
+ f"{skip_anchor} anchor skips, {skip_not_replying_to_us} not replying to us")
294
+ return new_count
295
+
296
+
297
+ if __name__ == "__main__":
298
+ rc = main()
299
+ sys.exit(0 if rc >= 0 else 1)