@m13v/s4l 1.6.197-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1314 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +497 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,168 @@
1
+ #!/usr/bin/env python3
2
+ """Scan GitHub issues for new replies to our comments.
3
+
4
+ Finds all issues we've commented on, checks for new comments from other users,
5
+ inserts into `replies` table as 'pending' or 'skipped'.
6
+
7
+ Works by scanning via thread_url + gh API - doesn't require our_url to be set.
8
+ """
9
+
10
+ import json
11
+ import os
12
+ import re
13
+ import subprocess
14
+ import sys
15
+ import time
16
+
17
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
18
+ from http_api import api_get, api_post
19
+
20
+ MIN_WORDS = 5
21
+ CONFIG_PATH = os.path.expanduser("~/social-autoposter/config.json")
22
+
23
+ # NOTE: posts/replies for GitHub live under platform='github' in the DB; the
24
+ # 'github_issues' value used here matches zero rows, so Phase A has long been a
25
+ # no-op. Preserved verbatim during the HTTP-only migration to avoid an
26
+ # unrequested volume/cost change (switching to 'github' would suddenly scan all
27
+ # ~6.8k GitHub posts). If you want to actually scan GitHub replies, flip
28
+ # SCAN_PLATFORM to 'github' deliberately.
29
+ SCAN_PLATFORM = "github_issues"
30
+
31
+
32
+ def load_config():
33
+ if os.path.exists(CONFIG_PATH):
34
+ with open(CONFIG_PATH) as f:
35
+ return json.load(f)
36
+ return {}
37
+
38
+
39
+ def word_count(text):
40
+ return len(text.split()) if text else 0
41
+
42
+
43
+ def main():
44
+ config = load_config()
45
+ github_user = config.get("accounts", {}).get("github", {}).get("username", "m13v")
46
+
47
+ # Get all active GitHub posts we've commented on. The posts GET returns id +
48
+ # thread_url together, so we capture the post_id map here and skip the
49
+ # per-thread lookup the direct-SQL version used to do.
50
+ resp = api_get("/api/v1/posts",
51
+ query={"platform": SCAN_PLATFORM, "status": "active", "limit": 500})
52
+ rows = ((resp or {}).get("data") or {}).get("posts") or []
53
+
54
+ issues = {}
55
+ post_id_by_url = {}
56
+ for row in rows:
57
+ url = row.get("thread_url")
58
+ if not url:
59
+ continue
60
+ # First post per thread_url wins (mirrors the old "use the first one").
61
+ post_id_by_url.setdefault(url, row.get("id"))
62
+ match = re.match(r"https://github\.com/([^/]+/[^/]+)/issues/(\d+)", url)
63
+ if match:
64
+ repo = match.group(1)
65
+ issue_num = match.group(2)
66
+ issues[f"{repo}/{issue_num}"] = url
67
+
68
+ # Load exclusions
69
+ excluded_authors = {a.lower() for a in config.get("exclusions", {}).get("authors", [])}
70
+ excluded_repos = {r.lower() for r in config.get("exclusions", {}).get("github_repos", [])}
71
+
72
+ # Filter out issues from excluded repos
73
+ issues = {k: v for k, v in issues.items()
74
+ if not any(repo_pat in k.lower() for repo_pat in excluded_repos)}
75
+
76
+ print(f"Scanning {len(issues)} GitHub issues for replies...")
77
+
78
+ discovered = 0
79
+ skipped = 0
80
+ errors = 0
81
+
82
+ for issue_key, thread_url in issues.items():
83
+ repo, issue_num = issue_key.rsplit("/", 1)
84
+
85
+ # post_id captured alongside thread_url in the posts GET above.
86
+ post_id = post_id_by_url.get(thread_url)
87
+ if not post_id:
88
+ continue
89
+
90
+ # Fetch all comments on the issue
91
+ try:
92
+ result = subprocess.run(
93
+ ["gh", "api", f"repos/{repo}/issues/{issue_num}/comments",
94
+ "--jq", f'[.[] | {{id: .id, user: .user.login, body: .body, url: .html_url, created: .created_at}}]'],
95
+ capture_output=True, text=True, timeout=15
96
+ )
97
+ if result.returncode != 0:
98
+ errors += 1
99
+ continue
100
+ comments = json.loads(result.stdout) if result.stdout.strip() else []
101
+ except Exception as e:
102
+ print(f" ERROR scanning {issue_key}: {e}")
103
+ errors += 1
104
+ continue
105
+
106
+ # Find our comments to know their timestamps
107
+ our_comments = [c for c in comments if c.get("user") == github_user]
108
+ other_comments = [c for c in comments if c.get("user") != github_user]
109
+
110
+ if not our_comments:
111
+ continue
112
+
113
+ # Get the timestamp of our first comment
114
+ our_first_ts = min(c["created"] for c in our_comments)
115
+
116
+ # Only look at comments after our first comment
117
+ replies_to_us = [c for c in other_comments if c["created"] > our_first_ts]
118
+
119
+ for comment in replies_to_us:
120
+ author = comment.get("user", "")
121
+ body = comment.get("body", "")
122
+ comment_id = str(comment.get("id", ""))
123
+ comment_url = comment.get("url", "")
124
+
125
+ # Determine status + skip_reason up front; the (platform,
126
+ # their_comment_id) UNIQUE index on the API handles "already
127
+ # tracked" (returns 409), so the old COUNT pre-check is gone.
128
+ if author.lower() in excluded_authors:
129
+ status, skip_reason = "skipped", "excluded_author"
130
+ elif word_count(body) < MIN_WORDS:
131
+ status, skip_reason = "skipped", f"too_short ({word_count(body)} words)"
132
+ else:
133
+ status, skip_reason = "pending", None
134
+
135
+ payload = {
136
+ "post_id": post_id,
137
+ "platform": SCAN_PLATFORM,
138
+ "their_comment_id": comment_id,
139
+ "their_author": author,
140
+ "their_content": body,
141
+ "their_comment_url": comment_url,
142
+ "depth": 1,
143
+ "status": status,
144
+ }
145
+ if skip_reason:
146
+ payload["skip_reason"] = skip_reason
147
+
148
+ resp = api_post("/api/v1/replies", payload, ok_on_conflict=True)
149
+ if not (resp or {}).get("ok"):
150
+ # 409 duplicate_reply: already tracked from a prior run. Skip.
151
+ continue
152
+ reply = ((resp or {}).get("data") or {}).get("reply")
153
+ if reply is None:
154
+ # Blocklist / velocity gate dropped this fresh pending row.
155
+ continue
156
+ if status == "skipped":
157
+ skipped += 1
158
+ else:
159
+ discovered += 1
160
+ print(f" NEW: @{author} on {issue_key}: {body[:80]}...")
161
+
162
+ time.sleep(1) # Light rate limiting
163
+
164
+ print(f"\nGitHub scan complete: {discovered} new pending, {skipped} skipped, {errors} errors")
165
+
166
+
167
+ if __name__ == "__main__":
168
+ main()
@@ -0,0 +1,481 @@
1
+ #!/usr/bin/env python3
2
+ """Scan Instagram Graph API for new comments on our posts.
3
+
4
+ For each enabled Instagram account in config.json (matt_diak, matthewheartful,
5
+ omidotme), this:
6
+
7
+ 1. Fetches /api/v1/posts?platform=instagram&our_account=<username> to build
8
+ a {shortcode: post_id} map of our DB-tracked IG posts.
9
+ 2. Lists /me/media for the account (reuses the same Graph API call shape
10
+ update_instagram_stats.py uses).
11
+ 3. For each media item present in our DB, calls /{media-id}/comments with
12
+ the replies sub-resource expanded.
13
+ 4. Inserts each comment (and its nested replies) into the `replies` table
14
+ via reply_insert.insert_reply(). Server-side UNIQUE (platform,
15
+ their_comment_id) handles dedup; this script never SELECTs.
16
+
17
+ Filters (mirrors scan_reddit_replies / scan_github_replies behavior):
18
+ - Skip comments whose author is in config.exclusions.authors
19
+ - Skip our own usernames (matt_diak / matthewheartful / omidotme) so we
20
+ don't try to reply to ourselves
21
+ - Skip backfill-old comments (older than BACKFILL_HOURS) with
22
+ status='skipped' / skip_reason='backfill_old'
23
+ - Skip too-short comments (< MIN_WORDS) with skip_reason='too_short'
24
+
25
+ This is discovery-only. Posting replies back to Instagram lives in a separate
26
+ engage script (Phase 2, not built yet); for now new rows surface in the
27
+ dashboard replies feed as platform='instagram', status='pending'.
28
+
29
+ Usage:
30
+ python3 scripts/scan_instagram_comments.py [--quiet] [--limit N]
31
+ [--account NAME]
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import argparse
37
+ import json
38
+ import os
39
+ import sys
40
+ import time
41
+ import urllib.error
42
+ import urllib.parse
43
+ import urllib.request
44
+ from datetime import datetime, timezone
45
+ from pathlib import Path
46
+
47
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
48
+ from http_api import api_get
49
+ from reply_insert import insert_reply as _insert_reply
50
+
51
+
52
+ IG_ENV_PATH = Path.home() / "instagram-graph-api" / ".env"
53
+ GRAPH = "https://graph.instagram.com/v22.0"
54
+ SA_CONFIG = Path(__file__).resolve().parent.parent / "config.json"
55
+
56
+ # Discovery filters
57
+ BACKFILL_HOURS = 48
58
+ MIN_WORDS = 5
59
+ # Per-Graph-API-call sleep so we stay polite under the 60/hr, 4800/day caps.
60
+ # 3 accounts * ~10 media * (1 list + 1 comments call) = ~60 calls/cycle;
61
+ # at 0.2s sleep that's ~12s per cycle, well inside 30-minute scheduling.
62
+ GRAPH_SLEEP_SECS = 0.2
63
+
64
+
65
+ # ── env / config ──────────────────────────────────────────────────────────────
66
+
67
+ def load_ig_env() -> dict:
68
+ if not IG_ENV_PATH.exists():
69
+ return {}
70
+ env = {}
71
+ for line in IG_ENV_PATH.read_text().splitlines():
72
+ line = line.strip()
73
+ if not line or line.startswith("#") or "=" not in line:
74
+ continue
75
+ k, v = line.split("=", 1)
76
+ env[k.strip()] = v.strip()
77
+ return env
78
+
79
+
80
+ def load_config() -> dict:
81
+ try:
82
+ return json.loads(SA_CONFIG.read_text())
83
+ except FileNotFoundError:
84
+ return {}
85
+
86
+
87
+ def resolve_account_creds(account_name: str, ig_env: dict, accounts_cfg: list):
88
+ """Return (ig_user_id, long_token) or (None, None). Matches the lookup
89
+ pattern in scripts/update_instagram_stats.py."""
90
+ match = next(
91
+ (a for a in accounts_cfg if a.get("username", "").lower() == account_name.lower()),
92
+ None,
93
+ )
94
+ if match:
95
+ uid = ig_env.get(match.get("ig_user_id_env", "IG_USER_ID"))
96
+ tok = ig_env.get(match.get("ig_long_token_env", "IG_LONG_TOKEN"))
97
+ if uid and tok:
98
+ return uid, tok
99
+ uid = ig_env.get("IG_USER_ID")
100
+ tok = ig_env.get("IG_LONG_TOKEN")
101
+ return uid, tok
102
+
103
+
104
+ # ── Graph API helpers ─────────────────────────────────────────────────────────
105
+
106
+ def graph_get(path: str, token: str, **params):
107
+ params["access_token"] = token
108
+ url = f"{GRAPH}/{path}?{urllib.parse.urlencode(params)}"
109
+ with urllib.request.urlopen(url, timeout=20) as r:
110
+ return json.loads(r.read())
111
+
112
+
113
+ def shortcode_from_url(url: str | None) -> str | None:
114
+ """Extract shortcode from an IG permalink.
115
+
116
+ https://www.instagram.com/reel/DYkkj8RDo9P/ -> DYkkj8RDo9P
117
+ """
118
+ import re
119
+ m = re.search(r"/(?:reel|p|tv)/([A-Za-z0-9_-]+)", url or "")
120
+ return m.group(1) if m else None
121
+
122
+
123
+ def fetch_media_list(ig_user_id: str, token: str, max_pages: int = 5) -> list[dict]:
124
+ """Page through /me/media. Returns the raw items list with permalink + id."""
125
+ out = []
126
+ fields = "id,media_type,media_product_type,permalink,timestamp"
127
+ url = (
128
+ f"{GRAPH}/{ig_user_id}/media"
129
+ f"?fields={fields}&limit=100&access_token={token}"
130
+ )
131
+ pages = 0
132
+ while url and pages < max_pages:
133
+ with urllib.request.urlopen(url, timeout=20) as r:
134
+ data = json.loads(r.read())
135
+ out.extend(data.get("data", []) or [])
136
+ url = (data.get("paging") or {}).get("next")
137
+ pages += 1
138
+ if url:
139
+ time.sleep(GRAPH_SLEEP_SECS)
140
+ return out
141
+
142
+
143
+ def fetch_comments(media_id: str, token: str) -> list[dict]:
144
+ """Return top-level comments for a media item, each with a nested
145
+ `replies.data[]` list (Graph API caps the sub-list at 25 by default; that
146
+ matches typical traffic on our posts)."""
147
+ fields = (
148
+ "id,username,text,timestamp,"
149
+ "replies{id,username,text,timestamp}"
150
+ )
151
+ try:
152
+ data = graph_get(f"{media_id}/comments", token, fields=fields, limit=50)
153
+ except urllib.error.HTTPError as e:
154
+ body = e.read().decode(errors="replace")[:200]
155
+ raise GraphApiError(f"HTTP {e.code} on /{media_id}/comments: {body}")
156
+ return data.get("data", []) or []
157
+
158
+
159
+ class GraphApiError(Exception):
160
+ pass
161
+
162
+
163
+ # ── posts lookup ──────────────────────────────────────────────────────────────
164
+
165
+ def fetch_posts_map(account_username: str) -> dict[str, int]:
166
+ """Build {shortcode: post_id} for posts.platform='instagram' AND
167
+ posts.our_account=account_username. Uses the same /api/v1/posts endpoint
168
+ scan_reddit_replies.py uses for its post-id lookup."""
169
+ out: dict[str, int] = {}
170
+ resp = api_get(
171
+ "/api/v1/posts",
172
+ query={"platform": "instagram", "limit": 500},
173
+ )
174
+ posts = ((resp or {}).get("data") or {}).get("posts") or []
175
+ for p in posts:
176
+ if (p.get("our_account") or "").lower() != account_username.lower():
177
+ continue
178
+ code = shortcode_from_url(p.get("our_url"))
179
+ if code:
180
+ out[code] = int(p.get("id"))
181
+ return out
182
+
183
+
184
+ # ── parse / classify ──────────────────────────────────────────────────────────
185
+
186
+ def parse_ts(ts: str | None) -> float:
187
+ """Parse an IG ISO-8601 timestamp to a unix timestamp. Returns 0 on
188
+ failure (which counts as "old" for backfill purposes)."""
189
+ if not ts:
190
+ return 0.0
191
+ try:
192
+ # Instagram returns +0000 (no colon), strip and parse as UTC.
193
+ s = ts.replace("+0000", "+00:00")
194
+ return datetime.fromisoformat(s).timestamp()
195
+ except Exception:
196
+ return 0.0
197
+
198
+
199
+ def word_count(text: str | None) -> int:
200
+ return len((text or "").split())
201
+
202
+
203
+ def build_comment_url(shortcode: str, comment_id: str) -> str:
204
+ return f"https://www.instagram.com/p/{shortcode}/c/{comment_id}/"
205
+
206
+
207
+ # ── main scan loop ────────────────────────────────────────────────────────────
208
+
209
+ class IgCommentScanner:
210
+ def __init__(
211
+ self,
212
+ account_username: str,
213
+ ig_user_id: str,
214
+ token: str,
215
+ posts_map: dict[str, int],
216
+ excluded_authors: set[str],
217
+ quiet: bool = False,
218
+ media_limit: int | None = None,
219
+ ):
220
+ self.account = account_username
221
+ self.ig_user_id = ig_user_id
222
+ self.token = token
223
+ self.posts_map = posts_map
224
+ self.excluded = excluded_authors
225
+ self.quiet = quiet
226
+ self.media_limit = media_limit
227
+
228
+ self.discovered = 0
229
+ self.backfill_skipped = 0
230
+ self.too_short_skipped = 0
231
+ self.excluded_skipped = 0
232
+ self.already_tracked = 0
233
+ self.media_checked = 0
234
+ self.media_no_post = 0
235
+ self.comments_seen = 0
236
+
237
+ def log(self, msg: str):
238
+ if not self.quiet:
239
+ print(msg)
240
+
241
+ def _insert(
242
+ self,
243
+ post_id: int,
244
+ comment_id: str,
245
+ author: str,
246
+ content: str,
247
+ comment_url: str,
248
+ depth: int,
249
+ status: str,
250
+ skip_reason: str | None = None,
251
+ ):
252
+ result = _insert_reply(
253
+ None, post_id, "instagram", comment_id, author, content, comment_url,
254
+ parent_reply_id=None, depth=depth, status=status, skip_reason=skip_reason,
255
+ )
256
+ if result is None:
257
+ self.already_tracked += 1
258
+ return
259
+ if result == "pending":
260
+ self.discovered += 1
261
+ elif result == "skipped":
262
+ if skip_reason == "backfill_old":
263
+ self.backfill_skipped += 1
264
+ elif skip_reason and skip_reason.startswith("too_short"):
265
+ self.too_short_skipped += 1
266
+ elif skip_reason == "excluded_author":
267
+ self.excluded_skipped += 1
268
+
269
+ def _classify_and_insert(
270
+ self,
271
+ post_id: int,
272
+ shortcode: str,
273
+ comment: dict,
274
+ backfill_cutoff: float,
275
+ depth: int,
276
+ ):
277
+ comment_id = str(comment.get("id") or "")
278
+ if not comment_id:
279
+ return
280
+ self.comments_seen += 1
281
+ author = comment.get("username") or ""
282
+ content = comment.get("text") or ""
283
+ comment_url = build_comment_url(shortcode, comment_id)
284
+ created = parse_ts(comment.get("timestamp"))
285
+
286
+ if author.lower() in self.excluded:
287
+ self._insert(
288
+ post_id, comment_id, author, content, comment_url, depth,
289
+ status="skipped", skip_reason="excluded_author",
290
+ )
291
+ return
292
+
293
+ if created and created < backfill_cutoff:
294
+ self._insert(
295
+ post_id, comment_id, author, content, comment_url, depth,
296
+ status="skipped", skip_reason="backfill_old",
297
+ )
298
+ return
299
+
300
+ wc = word_count(content)
301
+ if wc < MIN_WORDS:
302
+ self._insert(
303
+ post_id, comment_id, author, content, comment_url, depth,
304
+ status="skipped", skip_reason=f"too_short ({wc} words)",
305
+ )
306
+ return
307
+
308
+ self._insert(
309
+ post_id, comment_id, author, content, comment_url, depth,
310
+ status="pending", skip_reason=None,
311
+ )
312
+
313
+ def scan(self):
314
+ self.log(f"[scan-ig-comments] account={self.account} posts_in_db={len(self.posts_map)}")
315
+ if not self.posts_map:
316
+ self.log(f"[scan-ig-comments] no instagram posts in DB for account={self.account}; nothing to scan")
317
+ return
318
+
319
+ try:
320
+ media_items = fetch_media_list(self.ig_user_id, self.token)
321
+ except urllib.error.HTTPError as e:
322
+ body = e.read().decode(errors="replace")[:200]
323
+ self.log(f"[scan-ig-comments] /me/media failed for {self.account}: HTTP {e.code} {body}")
324
+ return
325
+ except Exception as e:
326
+ self.log(f"[scan-ig-comments] /me/media failed for {self.account}: {e}")
327
+ return
328
+
329
+ self.log(f"[scan-ig-comments] /me/media returned {len(media_items)} items")
330
+ backfill_cutoff = time.time() - BACKFILL_HOURS * 3600
331
+
332
+ checked = 0
333
+ for item in media_items:
334
+ if self.media_limit and checked >= self.media_limit:
335
+ break
336
+ permalink = item.get("permalink")
337
+ shortcode = shortcode_from_url(permalink)
338
+ if not shortcode:
339
+ continue
340
+ post_id = self.posts_map.get(shortcode)
341
+ if not post_id:
342
+ self.media_no_post += 1
343
+ continue
344
+
345
+ media_id = item.get("id")
346
+ try:
347
+ comments = fetch_comments(media_id, self.token)
348
+ except GraphApiError as e:
349
+ self.log(f"[scan-ig-comments] media={media_id} shortcode={shortcode} comments fetch failed: {e}")
350
+ continue
351
+
352
+ self.media_checked += 1
353
+ checked += 1
354
+ self.log(
355
+ f"[scan-ig-comments] media={media_id} shortcode={shortcode} "
356
+ f"top_level_comments={len(comments)}"
357
+ )
358
+
359
+ for c in comments:
360
+ self._classify_and_insert(post_id, shortcode, c, backfill_cutoff, depth=1)
361
+ # Nested replies (replies to top-level comments). Author may
362
+ # be us (we already replied) or someone else (we got a reply
363
+ # to OUR reply). The excluded-author filter inside
364
+ # _classify_and_insert handles the first case.
365
+ replies = ((c.get("replies") or {}).get("data") or [])
366
+ for r in replies:
367
+ self._classify_and_insert(post_id, shortcode, r, backfill_cutoff, depth=2)
368
+
369
+ time.sleep(GRAPH_SLEEP_SECS)
370
+
371
+ def summary(self) -> dict:
372
+ return {
373
+ "account": self.account,
374
+ "media_checked": self.media_checked,
375
+ "media_no_post_in_db": self.media_no_post,
376
+ "comments_seen": self.comments_seen,
377
+ "discovered": self.discovered,
378
+ "backfill_skipped": self.backfill_skipped,
379
+ "too_short_skipped": self.too_short_skipped,
380
+ "excluded_skipped": self.excluded_skipped,
381
+ "already_tracked": self.already_tracked,
382
+ }
383
+
384
+
385
+ def main():
386
+ parser = argparse.ArgumentParser()
387
+ parser.add_argument("--quiet", action="store_true")
388
+ parser.add_argument("--limit", type=int, default=None,
389
+ help="Cap media items inspected per account (debug)")
390
+ parser.add_argument("--account", default=None,
391
+ help="Scan only this account (default: all enabled)")
392
+ args = parser.parse_args()
393
+
394
+ ig_env = load_ig_env()
395
+ cfg = load_config()
396
+ accounts_cfg = ((cfg.get("instagram") or {}).get("accounts") or [])
397
+ exclusions = cfg.get("exclusions") or {}
398
+ base_excluded = {a.lower() for a in (exclusions.get("authors") or [])}
399
+ # Always exclude our own usernames so we don't reply to ourselves.
400
+ own_usernames = {a.get("username", "").lower() for a in accounts_cfg if a.get("username")}
401
+
402
+ if args.account:
403
+ accounts_to_scan = [a for a in accounts_cfg
404
+ if a.get("username", "").lower() == args.account.lower()]
405
+ else:
406
+ accounts_to_scan = [a for a in accounts_cfg if a.get("enabled", True)]
407
+
408
+ if not accounts_to_scan:
409
+ print("[scan-ig-comments] no instagram accounts to scan; exiting")
410
+ print("SUMMARY:DISCOVERED=0 SKIPPED=0 CHECKED=0 ALREADY=0 ACCOUNTS=0")
411
+ return
412
+
413
+ totals = {
414
+ "discovered": 0,
415
+ "backfill_skipped": 0,
416
+ "too_short_skipped": 0,
417
+ "excluded_skipped": 0,
418
+ "already_tracked": 0,
419
+ "media_checked": 0,
420
+ "comments_seen": 0,
421
+ "accounts": 0,
422
+ }
423
+
424
+ for account_cfg in accounts_to_scan:
425
+ username = account_cfg.get("username", "")
426
+ if not username:
427
+ continue
428
+ uid, tok = resolve_account_creds(username, ig_env, accounts_cfg)
429
+ if not uid or not tok:
430
+ print(f"[scan-ig-comments] missing creds for account={username}; skipping")
431
+ continue
432
+
433
+ excluded_for_account = set(base_excluded) | set(own_usernames)
434
+
435
+ try:
436
+ posts_map = fetch_posts_map(username)
437
+ except Exception as e:
438
+ print(f"[scan-ig-comments] posts lookup failed for {username}: {e}")
439
+ continue
440
+
441
+ scanner = IgCommentScanner(
442
+ username, uid, tok, posts_map, excluded_for_account,
443
+ quiet=args.quiet, media_limit=args.limit,
444
+ )
445
+ scanner.scan()
446
+ s = scanner.summary()
447
+ if not args.quiet:
448
+ print(
449
+ f"[scan-ig-comments] account={username} done: "
450
+ f"media_checked={s['media_checked']} comments_seen={s['comments_seen']} "
451
+ f"discovered={s['discovered']} "
452
+ f"backfill_skipped={s['backfill_skipped']} "
453
+ f"too_short_skipped={s['too_short_skipped']} "
454
+ f"excluded_skipped={s['excluded_skipped']} "
455
+ f"already_tracked={s['already_tracked']}"
456
+ )
457
+
458
+ totals["discovered"] += s["discovered"]
459
+ totals["backfill_skipped"] += s["backfill_skipped"]
460
+ totals["too_short_skipped"] += s["too_short_skipped"]
461
+ totals["excluded_skipped"] += s["excluded_skipped"]
462
+ totals["already_tracked"] += s["already_tracked"]
463
+ totals["media_checked"] += s["media_checked"]
464
+ totals["comments_seen"] += s["comments_seen"]
465
+ totals["accounts"] += 1
466
+
467
+ skipped_total = (
468
+ totals["backfill_skipped"]
469
+ + totals["too_short_skipped"]
470
+ + totals["excluded_skipped"]
471
+ )
472
+
473
+ print(
474
+ f"SUMMARY:DISCOVERED={totals['discovered']} SKIPPED={skipped_total} "
475
+ f"CHECKED={totals['media_checked']} ALREADY={totals['already_tracked']} "
476
+ f"ACCOUNTS={totals['accounts']}"
477
+ )
478
+
479
+
480
+ if __name__ == "__main__":
481
+ main()