@m13v/s4l 1.6.197-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1314 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +497 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,252 @@
1
+ #!/usr/bin/env python3
2
+ """Scan Moltbook notifications for new replies to our content.
3
+
4
+ Uses /api/v1/notifications (inbox-style), mirroring the Reddit scanner.
5
+ Replaces the legacy per-post comment-polling scan, which broke when
6
+ /api/v1/posts/{uuid} stopped embedding the `comments` array and moved
7
+ them to /api/v1/posts/{uuid}/comments (~2026-03-18).
8
+
9
+ Handles notification types `comment_reply` and `mention`.
10
+ `dm_request` and `new_follower` are ignored (not engagement we reply to).
11
+
12
+ Inserts into the `replies` table as 'pending' (fresh) or 'skipped'
13
+ (backfill_old / too_short / deleted_or_spam). Matches posts by
14
+ `relatedPostId` against `posts.thread_url`. Dedupe key is
15
+ `relatedCommentId` via `reply_insert.already_tracked`.
16
+
17
+ Usage:
18
+ python3 scripts/scan_moltbook_replies.py
19
+
20
+ Requires MOLTBOOK_API_KEY in ~/social-autoposter/.env.
21
+ """
22
+
23
+ import json
24
+ import os
25
+ import sys
26
+ import time
27
+ from datetime import datetime, timezone
28
+
29
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
30
+ from http_api import api_get, load_env
31
+ from reply_insert import insert_reply as _insert_reply
32
+ from moltbook_tools import (
33
+ fetch_moltbook_json,
34
+ HttpNotFoundError,
35
+ MoltbookRateLimitedError,
36
+ )
37
+
38
+ CONFIG_PATH = os.path.expanduser("~/social-autoposter/config.json")
39
+
40
+ PAGE_LIMIT = 100
41
+ MAX_PAGES = 20 # caps pagination at ~2000 items per run
42
+ BACKFILL_HOURS = 48
43
+ CONSECUTIVE_KNOWN_STOP = 50
44
+ ENGAGE_TYPES = {"comment_reply", "mention"}
45
+ MIN_WORDS = 5
46
+ PAGE_PAUSE_SECONDS = 1.0
47
+
48
+
49
+ def load_config():
50
+ if os.path.exists(CONFIG_PATH):
51
+ with open(CONFIG_PATH) as f:
52
+ return json.load(f)
53
+ return {}
54
+
55
+
56
+ def word_count(text):
57
+ return len(text.split()) if text else 0
58
+
59
+
60
+ def parse_iso(ts):
61
+ if not ts:
62
+ return None
63
+ if ts.endswith("Z"):
64
+ ts = ts[:-1] + "+00:00"
65
+ try:
66
+ return datetime.fromisoformat(ts)
67
+ except ValueError:
68
+ return None
69
+
70
+
71
+ class MoltbookNotificationScanner:
72
+ def __init__(self, api_key, self_username, self_agent_id, excluded_authors):
73
+ self.api_key = api_key
74
+ self.self_username_lower = (self_username or "").lower()
75
+ self.self_agent_id = (self_agent_id or "").lower()
76
+ self.excluded = {a.lower() for a in (excluded_authors or [])}
77
+ self.excluded.update({"[deleted]", self.self_username_lower})
78
+ # reply_insert.insert_reply ignores the `db` arg (HTTP-only) but still
79
+ # accepts it positionally; pass None to satisfy the signature.
80
+ self.db = None
81
+ self.discovered = 0
82
+ self.skipped_backfill = 0
83
+ self.skipped_short = 0
84
+ self.skipped_moderated = 0
85
+ self.skipped_excluded = 0
86
+ self.skipped_self = 0
87
+ self.unmatched = 0
88
+ self.total_seen = 0
89
+ self.consecutive_known = 0
90
+
91
+ def _post_id_for_moltbook(self, related_post_id):
92
+ if not related_post_id:
93
+ return None
94
+ resp = api_get(
95
+ "/api/v1/posts",
96
+ query={
97
+ "platform": "moltbook",
98
+ "thread_url_contains": related_post_id,
99
+ "order_by": "id",
100
+ "order_dir": "desc",
101
+ "limit": 1,
102
+ },
103
+ )
104
+ rows = ((resp or {}).get("data") or {}).get("posts") or []
105
+ return rows[0].get("id") if rows else None
106
+
107
+ def _insert(self, post_id, comment_id, author, content, comment_url,
108
+ post_uuid, status, skip_reason=None):
109
+ counters_before = (self.discovered + self.skipped_backfill
110
+ + self.skipped_short + self.skipped_moderated)
111
+ result = _insert_reply(
112
+ self.db, post_id, "moltbook", comment_id, author, content, comment_url,
113
+ status=status, skip_reason=skip_reason,
114
+ moltbook_post_uuid=post_uuid,
115
+ )
116
+ if result == "pending":
117
+ self.discovered += 1
118
+ elif result == "skipped":
119
+ if skip_reason == "backfill_old":
120
+ self.skipped_backfill += 1
121
+ elif skip_reason == "moderated":
122
+ self.skipped_moderated += 1
123
+ elif skip_reason and skip_reason.startswith("too_short"):
124
+ self.skipped_short += 1
125
+ counters_after = (self.discovered + self.skipped_backfill
126
+ + self.skipped_short + self.skipped_moderated)
127
+ if counters_after == counters_before:
128
+ self.consecutive_known += 1
129
+ else:
130
+ self.consecutive_known = 0
131
+
132
+ def scan(self):
133
+ if not self.api_key:
134
+ print("MOLTBOOK_API_KEY not set, skipping Moltbook notification scan")
135
+ return
136
+ print("Scanning Moltbook notifications...")
137
+ backfill_cutoff = datetime.now(timezone.utc).timestamp() - BACKFILL_HOURS * 3600
138
+ cursor = None
139
+ for page in range(1, MAX_PAGES + 1):
140
+ url = f"https://www.moltbook.com/api/v1/notifications?limit={PAGE_LIMIT}"
141
+ if cursor:
142
+ url += f"&cursor={cursor}"
143
+ try:
144
+ data = fetch_moltbook_json(url, api_key=self.api_key)
145
+ except MoltbookRateLimitedError as e:
146
+ print(f" Stopping scan: Moltbook rate-limited for {int(e.reset_seconds)}s")
147
+ break
148
+ except HttpNotFoundError:
149
+ print(" Notifications endpoint returned 404; aborting scan")
150
+ break
151
+ if not data:
152
+ print(" Empty response; aborting scan")
153
+ break
154
+ notifs = data.get("notifications") or []
155
+ print(f" page {page}: {len(notifs)} notifications (has_more={data.get('has_more')})")
156
+ if not notifs:
157
+ break
158
+ for n in notifs:
159
+ self.total_seen += 1
160
+ ntype = n.get("type")
161
+ if ntype not in ENGAGE_TYPES:
162
+ continue
163
+ comment_id = n.get("relatedCommentId")
164
+ post_uuid = n.get("relatedPostId")
165
+ if not comment_id or not post_uuid:
166
+ continue
167
+ post_id = self._post_id_for_moltbook(post_uuid)
168
+ if not post_id:
169
+ self.unmatched += 1
170
+ continue
171
+ comment = n.get("comment") or {}
172
+ author_id = (comment.get("authorId") or "").strip()
173
+ if author_id and author_id.lower() == self.self_agent_id:
174
+ self.skipped_self += 1
175
+ continue
176
+ author = author_id or "[unknown]"
177
+ if author.lower() in self.excluded:
178
+ self.skipped_excluded += 1
179
+ continue
180
+ content = comment.get("content") or ""
181
+ comment_url = f"https://www.moltbook.com/post/{post_uuid}#{comment_id}"
182
+
183
+ if comment.get("isDeleted") or comment.get("isSpam") or comment.get("isFlagged"):
184
+ self._insert(post_id, comment_id, author, content, comment_url,
185
+ post_uuid=post_uuid,
186
+ status="skipped", skip_reason="moderated")
187
+ continue
188
+
189
+ created_at = parse_iso(n.get("createdAt") or comment.get("createdAt"))
190
+ is_old = bool(created_at and created_at.timestamp() < backfill_cutoff)
191
+
192
+ if word_count(content) < MIN_WORDS:
193
+ self._insert(post_id, comment_id, author, content, comment_url,
194
+ post_uuid=post_uuid,
195
+ status="skipped",
196
+ skip_reason=f"too_short ({word_count(content)} words)")
197
+ elif is_old:
198
+ self._insert(post_id, comment_id, author, content, comment_url,
199
+ post_uuid=post_uuid,
200
+ status="skipped", skip_reason="backfill_old")
201
+ else:
202
+ self._insert(post_id, comment_id, author, content, comment_url,
203
+ post_uuid=post_uuid,
204
+ status="pending")
205
+ print(f" NEW: [{post_id}] author={author[:8]} {content[:80]}...")
206
+ if self.consecutive_known >= CONSECUTIVE_KNOWN_STOP:
207
+ print(f" hit {self.consecutive_known} consecutive already-known items, stopping pagination")
208
+ return
209
+ if not data.get("has_more"):
210
+ break
211
+ cursor = data.get("next_cursor")
212
+ if not cursor:
213
+ break
214
+ if page < MAX_PAGES:
215
+ time.sleep(PAGE_PAUSE_SECONDS)
216
+
217
+ def finish(self):
218
+ print(
219
+ f"Notification scan complete: seen={self.total_seen} "
220
+ f"new_pending={self.discovered} backfill_skipped={self.skipped_backfill} "
221
+ f"too_short_skipped={self.skipped_short} moderated_skipped={self.skipped_moderated} "
222
+ f"excluded_author={self.skipped_excluded} self_filtered={self.skipped_self} "
223
+ f"unmatched_thread={self.unmatched}"
224
+ )
225
+ return {
226
+ "discovered": self.discovered,
227
+ "backfill_skipped": self.skipped_backfill,
228
+ "too_short_skipped": self.skipped_short,
229
+ "moderated_skipped": self.skipped_moderated,
230
+ "excluded": self.skipped_excluded,
231
+ "self_filtered": self.skipped_self,
232
+ "unmatched": self.unmatched,
233
+ "total_seen": self.total_seen,
234
+ }
235
+
236
+
237
+ def main():
238
+ load_env()
239
+ api_key = os.environ.get("MOLTBOOK_API_KEY", "")
240
+ config = load_config()
241
+ acct = config.get("accounts", {}).get("moltbook", {}) or {}
242
+ self_username = acct.get("username", "")
243
+ self_agent_id = acct.get("agent_id", "") # optional; filters own replies by agentId if set
244
+ excluded_authors = config.get("exclusions", {}).get("authors", [])
245
+ scanner = MoltbookNotificationScanner(api_key, self_username, self_agent_id, excluded_authors)
246
+ scanner.scan()
247
+ result = scanner.finish()
248
+ sys.exit(0 if result["discovered"] > 0 else 1)
249
+
250
+
251
+ if __name__ == "__main__":
252
+ main()
@@ -0,0 +1,190 @@
1
+ #!/usr/bin/env python3
2
+ """Repo hygiene scanner: block PII, secrets, images, and absolute home paths.
3
+
4
+ This is a PUBLIC repo. The scanner is the single source of truth reused by:
5
+ - the shared pre-commit hook (scripts/git-hooks/pre-commit), mode --staged
6
+ - the gitleaks-adjacent CI job (.github/workflows/secret-scan.yml), mode --all
7
+ - on-demand audits: `python3 scripts/scan_pii.py --all`
8
+
9
+ What it flags in staged/tracked content:
10
+ 1. Secret-shaped literals (tokens, private keys, db URLs with passwords).
11
+ 2. Real client/operator PII from a gitignored denylist (pii_denylist.local.txt):
12
+ one term per line (email, name, handle, phone). Never commit that file.
13
+ 3. Tracked images / media (this repo stores none; see .gitignore).
14
+ 4. Absolute /Users/<name>/ home paths (leaks layout, breaks other operators).
15
+
16
+ Exit code 0 = clean, 1 = violations found (prints them). Override a specific
17
+ commit with `git commit --no-verify`, but fix the finding instead when you can.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import re
24
+ import subprocess
25
+ import sys
26
+ from pathlib import Path
27
+
28
+ REPO = Path(__file__).resolve().parent.parent
29
+ DENYLIST_FILE = REPO / "pii_denylist.local.txt"
30
+
31
+ # Files/paths the scanner must not scan (it would flag its own rules/examples).
32
+ ALLOWLIST_SUFFIXES = (
33
+ "scripts/scan_pii.py",
34
+ "scripts/git-hooks/pre-commit",
35
+ ".github/workflows/secret-scan.yml",
36
+ ".gitignore",
37
+ ".env.example",
38
+ "config.example.json",
39
+ "pii_denylist.local.txt",
40
+ )
41
+
42
+ SECRET_PATTERNS = [
43
+ (r"ghp_[A-Za-z0-9]{36}", "GitHub personal access token"),
44
+ (r"gh[opsu]_[A-Za-z0-9]{36}", "GitHub token"),
45
+ (r"github_pat_[A-Za-z0-9_]{50,}", "GitHub fine-grained PAT"),
46
+ (r"npm_[A-Za-z0-9]{36}", "npm token"),
47
+ (r"sk-(?:proj-)?[A-Za-z0-9]{20,}", "OpenAI-style secret key"),
48
+ (r"xox[baprs]-[A-Za-z0-9-]{10,}", "Slack token"),
49
+ (r"AKIA[0-9A-Z]{16}", "AWS access key id"),
50
+ (r"AIza[0-9A-Za-z_\-]{35}", "Google API key"),
51
+ (r"-----BEGIN (?:RSA |EC |OPENSSH |PGP )?PRIVATE KEY", "Private key block"),
52
+ (r"(?:postgres(?:ql)?|mysql|mongodb)://[^\s:/'\"]+:[^\s@'\"]+@", "DB URL with inline password"),
53
+ (r"(?i)(?:api[_-]?key|secret|passwd|password|auth[_-]?token)\s*[=:]\s*[\"'][^\"'\s]{16,}[\"']", "Hardcoded credential"),
54
+ ]
55
+
56
+ # Absolute home path leak. Placeholder forms (/Users/<you>, /Users/USERNAME) pass.
57
+ HOME_PATH_RE = re.compile(r"/Users/(?!<|USER|USERNAME|you\b|me\b|name\b)[a-z0-9._-]{2,}", re.I)
58
+
59
+ IMAGE_EXT = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".heic", ".bmp", ".tiff"}
60
+
61
+
62
+ def _run(cmd: list[str]) -> str:
63
+ return subprocess.run(cmd, cwd=REPO, capture_output=True, text=True).stdout
64
+
65
+
66
+ def _staged_files() -> list[str]:
67
+ out = _run(["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"])
68
+ return [f for f in out.splitlines() if f.strip()]
69
+
70
+
71
+ def _tracked_files() -> list[str]:
72
+ return [f for f in _run(["git", "ls-files"]).splitlines() if f.strip()]
73
+
74
+
75
+ def _added_lines(path: str) -> list[tuple[int, str]]:
76
+ """Return (lineno, text) for + lines in the staged diff of one file."""
77
+ diff = _run(["git", "diff", "--cached", "--unified=0", "--", path])
78
+ lines: list[tuple[int, str]] = []
79
+ new_ln = 0
80
+ for ln in diff.splitlines():
81
+ if ln.startswith("@@"):
82
+ m = re.search(r"\+(\d+)", ln)
83
+ new_ln = int(m.group(1)) if m else new_ln
84
+ continue
85
+ if ln.startswith("+") and not ln.startswith("+++"):
86
+ lines.append((new_ln, ln[1:]))
87
+ new_ln += 1
88
+ elif not ln.startswith("-"):
89
+ new_ln += 1
90
+ return lines
91
+
92
+
93
+ def _full_lines(path: str) -> list[tuple[int, str]]:
94
+ p = REPO / path
95
+ try:
96
+ text = p.read_text(errors="replace")
97
+ except (OSError, UnicodeDecodeError):
98
+ return []
99
+ return list(enumerate(text.splitlines(), start=1))
100
+
101
+
102
+ def _load_denylist() -> list[str]:
103
+ if not DENYLIST_FILE.exists():
104
+ return []
105
+ terms = []
106
+ for raw in DENYLIST_FILE.read_text().splitlines():
107
+ t = raw.strip()
108
+ if t and not t.startswith("#"):
109
+ terms.append(t)
110
+ return terms
111
+
112
+
113
+ def _is_allowlisted(path: str) -> bool:
114
+ return any(path == s or path.endswith("/" + s) for s in ALLOWLIST_SUFFIXES)
115
+
116
+
117
+ def scan(paths: list[str], staged: bool) -> tuple[list[str], list[str]]:
118
+ """Return (hard, soft) findings.
119
+
120
+ hard = secrets, denylist PII, images. Fail in every mode.
121
+ soft = absolute home paths. Hard-block NEW ones (--staged) but only warn on
122
+ the existing tree (--all), since there is pre-existing debt to burn
123
+ down gradually rather than block CI on day one.
124
+ """
125
+ denylist = _load_denylist()
126
+ deny_re = None
127
+ if denylist:
128
+ deny_re = re.compile("|".join(re.escape(t) for t in denylist), re.I)
129
+ secret_res = [(re.compile(p), label) for p, label in SECRET_PATTERNS]
130
+ hard: list[str] = []
131
+ soft: list[str] = []
132
+
133
+ for path in paths:
134
+ # Image / media files (path-based, no content read).
135
+ if Path(path).suffix.lower() in IMAGE_EXT:
136
+ hard.append(f"{path}: image/media file (this repo tracks no images)")
137
+ continue
138
+ if _is_allowlisted(path):
139
+ continue
140
+
141
+ lines = _added_lines(path) if staged else _full_lines(path)
142
+ for lineno, text in lines:
143
+ for rx, label in secret_res:
144
+ if rx.search(text):
145
+ hard.append(f"{path}:{lineno}: possible {label}")
146
+ if deny_re and deny_re.search(text):
147
+ m = deny_re.search(text)
148
+ hard.append(f"{path}:{lineno}: PII denylist match ('{m.group(0)}')")
149
+ if HOME_PATH_RE.search(text):
150
+ m = HOME_PATH_RE.search(text)
151
+ (hard if staged else soft).append(
152
+ f"{path}:{lineno}: absolute home path ('{m.group(0)}...')"
153
+ )
154
+ return hard, soft
155
+
156
+
157
+ def main() -> int:
158
+ ap = argparse.ArgumentParser(description=__doc__)
159
+ g = ap.add_mutually_exclusive_group()
160
+ g.add_argument("--staged", action="store_true", help="scan staged diff (pre-commit)")
161
+ g.add_argument("--all", action="store_true", help="scan whole tracked tree (CI/audit)")
162
+ args = ap.parse_args()
163
+
164
+ staged = args.staged or not args.all
165
+ paths = _staged_files() if staged else _tracked_files()
166
+ hard, soft = scan(paths, staged=staged)
167
+
168
+ if soft:
169
+ print("Repo hygiene warnings (not blocking):\n", file=sys.stderr)
170
+ for f in soft:
171
+ print(" warn: " + f, file=sys.stderr)
172
+ print("", file=sys.stderr)
173
+
174
+ if not hard:
175
+ return 0
176
+ print("Repo hygiene scan found blocking issues:\n", file=sys.stderr)
177
+ for f in hard:
178
+ print(" " + f, file=sys.stderr)
179
+ print(
180
+ "\nFix the finding (move PII to a *.local.* gitignored file, drop the image, "
181
+ "use an env/config lookup instead of an absolute path).\n"
182
+ "If this is a genuine false positive, override this ONE commit with "
183
+ "`git commit --no-verify`.",
184
+ file=sys.stderr,
185
+ )
186
+ return 1
187
+
188
+
189
+ if __name__ == "__main__":
190
+ raise SystemExit(main())