@m13v/s4l 1.6.197-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1336 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +513 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,228 @@
1
+ #!/usr/bin/env python3
2
+ """LinkedIn URL helpers: ID extraction, canonicalization, dedup checks.
3
+
4
+ LinkedIn surfaces the same post under multiple URL shapes:
5
+ /feed/update/urn:li:activity:<19-digit-activity-id>/[?commentUrn=...]
6
+ /posts/<author-slug>_<keywords>-activity-<19-digit-id>-<5-char-suffix>
7
+ /posts/<author-slug>_<keywords>-share-<19-digit-id>-<5-char-suffix>
8
+ /posts/<author-slug>_<keywords>-ugcPost-<19-digit-id>-<5-char-suffix>
9
+
10
+ The activity URN, share URN, and ugcPost URN for the same logical post are
11
+ DIFFERENT numbers, so canonicalizing to one form by string transform is not
12
+ possible. The pragmatic fix: extract every 16-19 digit ID from a URL and
13
+ treat the SET of IDs as the post identity. Two URLs collide if any ID
14
+ overlaps. (Across our DB this matches because the comment-permalink
15
+ captured after posting always carries the activity URN, so day-2 logging
16
+ under /posts/...-share-<X>-... still has our_url=/feed/update/...activity:<Y>
17
+ where Y matches day-1's stored thread_url ID.)
18
+
19
+ CLI:
20
+ python3 scripts/linkedin_url.py --extract URL
21
+ python3 scripts/linkedin_url.py --canonicalize URL
22
+ python3 scripts/linkedin_url.py --check-engaged URL
23
+ Exits 0 if the URL has any ID overlap with an existing
24
+ platform='linkedin' row. Prints JSON with {engaged, ids, match}.
25
+ python3 scripts/linkedin_url.py --check-self-author URL_OR_SLUG
26
+ Exits 0 if the author profile URL/slug matches one of our own
27
+ LinkedIn accounts (we should never comment on our own posts).
28
+ Exits 1 otherwise. Prints JSON with {input, slug, self}.
29
+ """
30
+
31
+ import argparse
32
+ import json
33
+ import os
34
+ import re
35
+ import sys
36
+ import urllib.parse
37
+
38
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
39
+ from http_api import api_get
40
+
41
+ ID_RE = re.compile(r"\b(\d{16,19})\b")
42
+ ACTIVITY_URN_RE = re.compile(r"urn:li:activity:(\d{16,19})", re.IGNORECASE)
43
+
44
+ # LinkedIn public profile slugs we own. Author URL match against this set
45
+ # means "this is our own post; skip". Add any future account here.
46
+ SELF_LINKEDIN_SLUGS = {"m13v"}
47
+
48
+
49
+ def extract_slug(author_url_or_slug):
50
+ """Pull the public profile slug from a LinkedIn author identifier.
51
+
52
+ Accepts any of:
53
+ 'https://www.linkedin.com/in/m13v/'
54
+ 'https://www.linkedin.com/in/m13v'
55
+ '/in/m13v/'
56
+ 'm13v'
57
+ Returns the lowercase slug, or '' if nothing parseable.
58
+ """
59
+ if not author_url_or_slug:
60
+ return ""
61
+ s = urllib.parse.unquote(author_url_or_slug.strip()).lower().rstrip("/")
62
+ m = re.search(r"/in/([a-z0-9\-_]+)", s)
63
+ if m:
64
+ return m.group(1)
65
+ if re.fullmatch(r"[a-z0-9\-_]+", s):
66
+ return s
67
+ return ""
68
+
69
+
70
+ def is_self_author(author_url_or_slug):
71
+ """True if the given author URL/slug is one of our own LinkedIn
72
+ accounts. Used to skip posts authored by us during pipeline discovery."""
73
+ return extract_slug(author_url_or_slug) in SELF_LINKEDIN_SLUGS
74
+
75
+
76
+ def extract_ids(url):
77
+ """Return ordered, deduped list of 16-19 digit IDs found in the URL.
78
+
79
+ Catches activity URNs, share URNs, ugcPost URNs, and comment URNs
80
+ regardless of where they sit in the path or query string. Decodes
81
+ percent-encoded URNs first so commentUrn=urn%3Ali%3Aactivity%3A...
82
+ contributes its IDs too.
83
+ """
84
+ if not url:
85
+ return []
86
+ decoded = urllib.parse.unquote(url)
87
+ seen = []
88
+ for m in ID_RE.finditer(decoded):
89
+ v = m.group(1)
90
+ if v not in seen:
91
+ seen.append(v)
92
+ return seen
93
+
94
+
95
+ def canonicalize(url):
96
+ """Return a canonical /feed/update/urn:li:activity:<id>/ form when we
97
+ can find an explicit activity URN in the URL. Otherwise return the URL
98
+ with query+fragment stripped. Used for the our_url column so the
99
+ activity-comment permalink doesn't drift between runs."""
100
+ if not url:
101
+ return url
102
+ decoded = urllib.parse.unquote(url)
103
+ m = ACTIVITY_URN_RE.search(decoded)
104
+ if m:
105
+ return f"https://www.linkedin.com/feed/update/urn:li:activity:{m.group(1)}/"
106
+ # Strip query+fragment as a fallback — keeps /posts/... slugs stable but
107
+ # drops tracking params.
108
+ parsed = urllib.parse.urlsplit(url)
109
+ return urllib.parse.urlunsplit((parsed.scheme, parsed.netloc, parsed.path, "", ""))
110
+
111
+
112
+ def find_existing_engagement(ids):
113
+ """Given a list of LinkedIn IDs, return the first existing posts row
114
+ that mentions any of them in posts.urns (primary, GIN-indexed) OR in
115
+ thread_url / our_url (fallback for any row missed by backfill).
116
+
117
+ The urns array path catches the case where the same logical post
118
+ surfaces under different URN forms: search-page DOM exposes the
119
+ ugcPost URN, but our DB stored only the activity URN. Storing every
120
+ URN we ever see for a post into posts.urns means a single
121
+ ``urns && ARRAY[...]`` overlap query catches the collision regardless
122
+ of which URN form the candidate page rendered.
123
+
124
+ Returns None if no overlap, else a dict with keys post_id, posted_at,
125
+ thread_url, our_url, our_account.
126
+
127
+ Migrated 2026-06-01 to the s4l.ai HTTP API
128
+ (GET /api/v1/linkedin-engaged?ids=...). The collision query (urns
129
+ overlap + thread_url/our_url substring) now runs server-side; no
130
+ DATABASE_URL needed.
131
+ """
132
+ if not ids:
133
+ return None
134
+ resp = api_get("/api/v1/linkedin-engaged", {"ids": ",".join(ids)})
135
+ data = resp.get("data") or {}
136
+ if not data.get("engaged"):
137
+ return None
138
+ return data.get("match")
139
+
140
+
141
+ def get_engaged_ids():
142
+ """Return a sorted list of every LinkedIn ID we've engaged with
143
+ (anything 16-19 digits found in thread_url or our_url for
144
+ platform='linkedin'). Used to brief the LLM in run-linkedin.sh.
145
+
146
+ Migrated 2026-06-01: the API returns the raw (thread_url, our_url)
147
+ pairs; the canonical ID extraction (extract_ids) stays single-sourced
148
+ here in Python rather than being re-implemented as a Postgres regexp.
149
+ """
150
+ resp = api_get("/api/v1/linkedin-engaged", {"list_urls": 1})
151
+ rows = (resp.get("data") or {}).get("urls") or []
152
+ ids = set()
153
+ for row in rows:
154
+ for v in extract_ids(row.get("thread_url") or ""):
155
+ ids.add(v)
156
+ for v in extract_ids(row.get("our_url") or ""):
157
+ ids.add(v)
158
+ return sorted(ids)
159
+
160
+
161
+ def main():
162
+ parser = argparse.ArgumentParser()
163
+ parser.add_argument("--extract", help="Print all IDs found in URL")
164
+ parser.add_argument("--canonicalize", help="Print the canonical form of URL")
165
+ parser.add_argument("--check-engaged", help="Check if URL collides with any "
166
+ "existing linkedin row. Exits 0 on collision, 1 otherwise.")
167
+ parser.add_argument("--check-engaged-ids", help="Comma- or whitespace-separated "
168
+ "list of LinkedIn URN IDs (16-19 digits each) extracted "
169
+ "from a candidate post's DOM. Pre-comment dedup primary path: "
170
+ "the URL bar may only carry the share URN while our DB rows "
171
+ "store the activity URN, so the browser-side script must "
172
+ "walk componentkey/data-testid for ALL URNs and pipe them in. "
173
+ "Exits 0 on collision, 1 otherwise.")
174
+ parser.add_argument("--list-engaged-ids", action="store_true",
175
+ help="Print every linkedin ID we've engaged with, one per line.")
176
+ parser.add_argument("--check-self-author", help="Author profile URL or "
177
+ "public-ID slug from a candidate post. Exits 0 if it "
178
+ "matches one of our own LinkedIn accounts (skip the "
179
+ "post), 1 otherwise (proceed). Pre-comment guard so "
180
+ "the pipeline doesn't comment on Matthew's own posts "
181
+ "when search results surface them.")
182
+ args = parser.parse_args()
183
+
184
+ if args.extract:
185
+ print(json.dumps(extract_ids(args.extract)))
186
+ return
187
+ if args.canonicalize:
188
+ print(canonicalize(args.canonicalize))
189
+ return
190
+ if args.check_engaged:
191
+ ids = extract_ids(args.check_engaged)
192
+ match = find_existing_engagement(ids)
193
+ out = {"url": args.check_engaged, "ids": ids, "engaged": bool(match)}
194
+ if match:
195
+ out["match"] = match
196
+ print(json.dumps(out, indent=2))
197
+ sys.exit(0 if match else 1)
198
+ if args.check_engaged_ids:
199
+ # Accept comma, whitespace, or newline separation. Filter to 16-19
200
+ # digit numeric IDs so we don't pollute with ad campaign mcid values
201
+ # or random noise the browser-side walker might pick up.
202
+ raw = re.split(r"[,\s]+", args.check_engaged_ids.strip())
203
+ ids = [v for v in raw if re.fullmatch(r"\d{16,19}", v or "")]
204
+ match = find_existing_engagement(ids)
205
+ out = {"ids": ids, "engaged": bool(match)}
206
+ if match:
207
+ out["match"] = match
208
+ print(json.dumps(out, indent=2))
209
+ sys.exit(0 if match else 1)
210
+ if args.check_self_author:
211
+ slug = extract_slug(args.check_self_author)
212
+ matched = slug in SELF_LINKEDIN_SLUGS
213
+ print(json.dumps({
214
+ "input": args.check_self_author,
215
+ "slug": slug,
216
+ "self": matched,
217
+ }))
218
+ sys.exit(0 if matched else 1)
219
+ if args.list_engaged_ids:
220
+ for v in get_engaged_ids():
221
+ print(v)
222
+ return
223
+ parser.print_help()
224
+ sys.exit(2)
225
+
226
+
227
+ if __name__ == "__main__":
228
+ main()