@m13v/s4l 1.6.197-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1336 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +513 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,1084 @@
1
+ #!/usr/bin/env python3
2
+ """Per-DM short link minting + resolution for outbound link tracking.
3
+
4
+ All outbound URLs in the DM-replies pipeline get wrapped through this tool so
5
+ clicks attribute to the originating DM. Booking links, GitHub repos, our own
6
+ website pages, third-party references — every URL we send goes through /r/<code>.
7
+
8
+ Subcommands:
9
+
10
+ mint --dm-id N --target-url URL
11
+ Idempotent on (dm_id, target_url). Returns a wrapped URL like
12
+ https://<target_project_website>/r/<code>. Refuses if URL points at a
13
+ project not in dms.target_projects[]; the caller must call
14
+ `dm_conversation.py set-target-project --append --project NAME` first.
15
+ Auto-stamps dms.booking_link_sent_at for kind='booking'.
16
+
17
+ resolve --code CODE
18
+ Used by the public /api/short-links/<code> endpoint. Bumps clicks,
19
+ stamps first/last click timestamps, inserts a synthetic [CLICK_SIGNAL]
20
+ row in dm_messages so the engage pipeline picks the thread up. Returns
21
+ target_url + dm_id + project + platform.
22
+
23
+ wrap-text --dm-id N --text "..."
24
+ Find every URL in the text, mint each via the same path, substring-replace
25
+ the original URLs with the wrapped versions. Prints the wrapped text on
26
+ stdout. Used by reddit_browser.py / twitter_browser.py (via direct import
27
+ of `wrap_text()`) and by the LinkedIn shell flow (subprocess).
28
+
29
+ The classifier maps a URL to (kind, matched_project_name) using config.json:
30
+ - booking : URL starts with project.booking_link
31
+ - github : URL starts with project.github or matches project.landing_pages.github_repo
32
+ - website : URL host == project.website host
33
+ - other : no project match (no project guard, kind='other')
34
+
35
+ Wrapped hostname is always the DM's primary `target_project.website` (consistent
36
+ per thread regardless of which project a given link points at).
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import argparse
42
+ import json
43
+ import os
44
+ import re
45
+ import secrets
46
+ import sys
47
+ import uuid
48
+ from urllib.parse import urlencode, urlsplit, urlunsplit, parse_qsl
49
+
50
+ REPO_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
51
+ sys.path.insert(0, os.path.join(REPO_DIR, 'scripts'))
52
+
53
+ # HTTP-only: this module routes every read/write through the s4l.ai HTTP API
54
+ # (scripts/http_api.py). The direct-Postgres lane was removed 2026-06-01; there
55
+ # is no `import db` / get_conn() path any more, not as primary, not as fallback.
56
+
57
+ CONFIG_PATH = os.path.join(REPO_DIR, 'config.json')
58
+ CODE_ALPHABET = 'abcdefghijkmnpqrstuvwxyz23456789'
59
+ CODE_LEN = 8
60
+
61
+ # Default wrapper host used when a project's own /r/<code> redirector is NOT
62
+ # live (config.json short_links_live=false) and the operator hasn't set an
63
+ # explicit short_links_host. s4l.ai's resolver lives at
64
+ # @m13v/seo-components -> app.s4l.ai/api/short-links/<code> and is the
65
+ # social-autoposter-owned fallback. Routing through it keeps first-party click
66
+ # logging in post_link_clicks instead of dropping to UTM-only.
67
+ DEFAULT_FALLBACK_HOST = 'https://s4l.ai'
68
+
69
+ # Match http(s) URLs AND bare-domain references with a path. The bare-domain
70
+ # branch requires at least one path character so we don't match prose like
71
+ # "i.e." or "S.F." or version numbers. Greedy on the path; trailing punctuation
72
+ # is stripped by the caller. Both branches are normalized through
73
+ # _ensure_scheme() before classification.
74
+ #
75
+ # Third branch (added 2026-05-10): bare project hostnames with NO path. Built
76
+ # dynamically from config.json project websites + booking_link + github hosts.
77
+ # A 7d audit found 47/2094 Reddit DMs and 7/319 X DMs mention a project URL,
78
+ # but ZERO short links got minted because the model casually drops domains
79
+ # like "fazm.ai is the link" or "main one is fazm, ai agent for macos,
80
+ # github.com/m13v/fazm" without https:// or trailing path. Branches 1 and 2
81
+ # both miss those, so we never wrap them. The new branch matches a known
82
+ # project host as a bare token, with a negative lookahead so it doesn't
83
+ # overlap with branch 2 ('fazm.ai/path' still goes through branch 2).
84
+ def _build_project_bare_host_pattern():
85
+ """Build an alternation of known project hostnames, longest-first."""
86
+ try:
87
+ with open(CONFIG_PATH, 'r') as f:
88
+ cfg = json.load(f)
89
+ projs = cfg.get('projects') or []
90
+ except Exception:
91
+ return None
92
+ hosts = set()
93
+ for p in projs:
94
+ for field in ('website', 'booking_link', 'github'):
95
+ v = (p.get(field) or '').strip()
96
+ if not v:
97
+ continue
98
+ try:
99
+ netloc = urlsplit(v if '://' in v else 'https://' + v).netloc
100
+ except Exception:
101
+ continue
102
+ host = (netloc or '').lower().split(':', 1)[0]
103
+ # Strip a literal 'www.' prefix only (lstrip would chew chars).
104
+ if host.startswith('www.'):
105
+ host = host[4:]
106
+ if host and '.' in host:
107
+ hosts.add(host)
108
+ if not hosts:
109
+ return None
110
+ parts = sorted({re.escape(h) for h in hosts}, key=len, reverse=True)
111
+ # \b on left, narrow lookahead on right. Reject:
112
+ # - word chars/slashes (mid-token or path → branch 2 territory)
113
+ # - dot+letter (sub-domain extension: 'runner.now.example.com' must NOT
114
+ # match 'runner.now')
115
+ # ALLOW dot+non-letter (sentence-ending: 'try fazm.ai.' must match) and
116
+ # plain punctuation/whitespace. Pre-2026-05-14 this was `(?![\w./])` which
117
+ # over-rejected sentence-ending periods, so 'try fazm.ai.' yielded ZERO
118
+ # matches and the URL went out bare.
119
+ return r'\b(?:' + '|'.join(parts) + r')\b(?![\w/]|\.[a-z])'
120
+
121
+ _PROJECT_BARE_HOST_PAT = _build_project_bare_host_pattern()
122
+ _URL_RE = re.compile(
123
+ (
124
+ r'https?://[^\s<>"\']+'
125
+ r'|'
126
+ r'(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z]{2,}/[^\s<>"\']*'
127
+ + (r'|' + _PROJECT_BARE_HOST_PAT if _PROJECT_BARE_HOST_PAT else '')
128
+ ),
129
+ re.IGNORECASE,
130
+ )
131
+ _TRAILING_PUNCT = '.,;:!?)]}>\'"'
132
+
133
+
134
+ def _ensure_scheme(url: str) -> str:
135
+ """Prepend https:// to bare-domain URLs so urlsplit and downstream consumers
136
+ have a fully qualified URL. https? matches first branch of _URL_RE; the
137
+ bare-domain branch (everything after the alternation) lacks a scheme."""
138
+ if url.startswith(('http://', 'https://')):
139
+ return url
140
+ return 'https://' + url
141
+
142
+
143
+ def _load_projects():
144
+ with open(CONFIG_PATH, 'r') as f:
145
+ return [p for p in json.load(f).get('projects', []) if p.get('name')]
146
+
147
+
148
+ def _gen_code(n=CODE_LEN):
149
+ return ''.join(secrets.choice(CODE_ALPHABET) for _ in range(n))
150
+
151
+
152
+ def _norm_host(url: str) -> str:
153
+ try:
154
+ return (urlsplit(url).netloc or '').lower().lstrip('www.')
155
+ except Exception:
156
+ return ''
157
+
158
+
159
+ def _classify_url(url: str, projects: list) -> tuple[str, str | None]:
160
+ """Return (kind, project_name|None). Longest-prefix-wins across projects.
161
+
162
+ Priority: booking > github > website > other. Ties within a kind go to the
163
+ longest matching prefix so e.g. cal.com/team/mediar/fazm beats a hypothetical
164
+ cal.com/team/mediar/ root. Bare-domain inputs are normalized to https:// first.
165
+ """
166
+ u = _ensure_scheme(url.strip())
167
+ best_booking = ('', None)
168
+ best_github = ('', None)
169
+ best_website = ('', None)
170
+
171
+ for p in projects:
172
+ name = p.get('name')
173
+ if not name:
174
+ continue
175
+
176
+ booking = (p.get('booking_link') or '').strip()
177
+ if booking and u.startswith(booking.rstrip('?').rstrip('/')):
178
+ if len(booking) > len(best_booking[0]):
179
+ best_booking = (booking, name)
180
+
181
+ gh = (p.get('github') or '').strip()
182
+ if gh and u.startswith(gh.rstrip('/')):
183
+ if len(gh) > len(best_github[0]):
184
+ best_github = (gh, name)
185
+
186
+ gh_repo = (p.get('landing_pages', {}) or {}).get('github_repo')
187
+ if gh_repo:
188
+ gh_url = f'https://github.com/{gh_repo.strip("/")}'
189
+ if u.startswith(gh_url):
190
+ if len(gh_url) > len(best_github[0]):
191
+ best_github = (gh_url, name)
192
+
193
+ website = (p.get('website') or '').strip()
194
+ if website:
195
+ site_host = _norm_host(website)
196
+ url_host = _norm_host(u)
197
+ if site_host and url_host and (url_host == site_host or url_host.endswith('.' + site_host)):
198
+ if len(site_host) > len(best_website[0]):
199
+ best_website = (site_host, name)
200
+
201
+ if best_booking[1]:
202
+ return ('booking', best_booking[1])
203
+ if best_github[1]:
204
+ return ('github', best_github[1])
205
+ if best_website[1]:
206
+ return ('website', best_website[1])
207
+ return ('other', None)
208
+
209
+
210
+ def _build_target_url(target_url: str, kind: str, *, dm_id: int, project: str | None, platform: str) -> str:
211
+ """Add UTM params for kinds where we control the analytics consumer.
212
+
213
+ Canonical UTM scheme (matches _build_target_url_for_post + the pool
214
+ minters): utm_source='s4l' identifies the agency for every customer's
215
+ analytics ('this traffic came from S4L'). utm_term carries the platform
216
+ (reddit | twitter | linkedin | github_issues) since utm_source is no
217
+ longer platform-specific. utm_medium stays 'dm' to keep the DM rail
218
+ distinct from posts. utm_content keeps the strict 'dm_<id>' shape
219
+ consumed by bin/server.js (regex /^dm_(\\d+)$/) and project_stats_json
220
+ (LIKE 'dm_%').
221
+
222
+ Booking: Cal.com metadata[utm_*] survives to the booking webhook (the flat
223
+ utm_* gets stripped by Cal's UI), Calendly accepts both — keep both.
224
+ Website: our own domains run PostHog; flat utm_* is enough.
225
+ Github / other: leave the URL untouched (no downstream UTM consumer).
226
+ """
227
+ if kind not in ('booking', 'website'):
228
+ return target_url
229
+
230
+ parts = urlsplit(target_url)
231
+ existing = dict(parse_qsl(parts.query, keep_blank_values=True))
232
+
233
+ utm = {
234
+ 'utm_source': 's4l',
235
+ 'utm_medium': 'dm',
236
+ 'utm_campaign': (project or 'unknown').lower(),
237
+ 'utm_term': (platform or 'unknown').lower(),
238
+ 'utm_content': f'dm_{dm_id}',
239
+ }
240
+ for k, v in utm.items():
241
+ existing.setdefault(k, v)
242
+ if kind == 'booking':
243
+ existing[f'metadata[{k}]'] = v
244
+
245
+ new_query = urlencode(existing, doseq=True)
246
+ return urlunsplit((parts.scheme, parts.netloc, parts.path, new_query, parts.fragment))
247
+
248
+
249
+ def _build_target_url_for_post(target_url: str, kind: str, *, minted_session: str,
250
+ project: str | None, platform: str) -> str:
251
+ """UTM stamping for PUBLIC post wrappers (utm_medium='post').
252
+
253
+ See _build_target_url for the canonical UTM scheme rationale. utm_content
254
+ keeps the 'post_<session>' shape so backfill_real_clicks.py can
255
+ PostHog-join on it.
256
+ """
257
+ if kind not in ('booking', 'website'):
258
+ return target_url
259
+
260
+ parts = urlsplit(target_url)
261
+ existing = dict(parse_qsl(parts.query, keep_blank_values=True))
262
+
263
+ utm = {
264
+ 'utm_source': 's4l',
265
+ 'utm_medium': 'post',
266
+ 'utm_campaign': (project or 'unknown').lower(),
267
+ 'utm_term': (platform or 'unknown').lower(),
268
+ 'utm_content': f'post_{minted_session}',
269
+ }
270
+ for k, v in utm.items():
271
+ existing.setdefault(k, v)
272
+ if kind == 'booking':
273
+ existing[f'metadata[{k}]'] = v
274
+
275
+ new_query = urlencode(existing, doseq=True)
276
+ return urlunsplit((parts.scheme, parts.netloc, parts.path, new_query, parts.fragment))
277
+
278
+
279
+ def _project_website(projects: list, name: str) -> str | None:
280
+ for p in projects:
281
+ if p.get('name') == name:
282
+ site = (p.get('website') or '').strip().rstrip('/')
283
+ return site or None
284
+ return None
285
+
286
+
287
+ def _project_short_links_live(projects: list, name: str) -> bool:
288
+ """True iff the project's OWN domain serves /r/<code>.
289
+
290
+ Default true (preserves behavior for fazm, mediar, assrt, cyrano-systems
291
+ and every other existing project where the customer's domain hosts the
292
+ @m13v/seo-components /r/[code] handler).
293
+
294
+ Set false in config.json for projects where the customer owns the domain
295
+ but hasn't shipped the resolver (or the static CSV) yet. In that case the
296
+ wrapper auto-routes through DEFAULT_FALLBACK_HOST (s4l.ai), so mints still
297
+ produce a live /r/<code> with first-party click logging; we no longer drop
298
+ to UTM-only. See _project_short_links_host for the host-resolution order.
299
+
300
+ An explicit `short_links_host` in config.json (regardless of this flag)
301
+ always wins and is used verbatim.
302
+ """
303
+ for p in projects:
304
+ if p.get('name') == name:
305
+ v = p.get('short_links_live')
306
+ return True if v is None else bool(v)
307
+ return True
308
+
309
+
310
+ def _project_short_links_host(projects: list, name: str) -> str | None:
311
+ """Resolve the wrapper host where /r/<code> is served for this project.
312
+
313
+ Resolution order (first match wins):
314
+ 1. Explicit `short_links_host` in config.json (e.g. "https://s4l.ai").
315
+ Used to pin a project to a specific resolver-bearing host we operate.
316
+ 2. DEFAULT_FALLBACK_HOST (= https://s4l.ai) when `short_links_live` is
317
+ explicitly false. Auto-applied so any project flagged as "customer
318
+ hasn't deployed the resolver yet" still gets a live /r/<code> through
319
+ the social-autoposter-owned resolver, instead of dropping to UTM-only.
320
+ 3. None → caller falls back to project.website (the legacy/default path,
321
+ used when short_links_live is unset/true, meaning the customer's own
322
+ domain has the @m13v/seo-components /r/[code] handler shipped).
323
+
324
+ Callers should always do: `_project_short_links_host(p, name) or website`.
325
+
326
+ The underlying target_url (where the resolver 302s) is unchanged in either
327
+ case — it still points at the customer's site with full UTMs baked in at
328
+ mint time. Only the wrapper host changes.
329
+ """
330
+ for p in projects:
331
+ if p.get('name') == name:
332
+ host = (p.get('short_links_host') or '').strip().rstrip('/')
333
+ if host:
334
+ return host
335
+ if p.get('short_links_live') is False:
336
+ return DEFAULT_FALLBACK_HOST
337
+ return None
338
+ return None
339
+
340
+
341
+ def utm_only_text(*, text: str, platform: str, project_name: str) -> str:
342
+ """Walk every URL in text, replace with its UTM-tagged version (no minting,
343
+ no DB). Safety-net helper for caller exception branches so a bare URL
344
+ never escapes when wrap_text_for_post itself raises.
345
+ """
346
+ if not text:
347
+ return text
348
+ platform = (platform or '').lower()
349
+ if platform == 'x':
350
+ platform = 'twitter'
351
+ minted_session = str(uuid.uuid4())
352
+ projects = _load_projects()
353
+ seen: dict[str, str] = {}
354
+ for m in list(_URL_RE.finditer(text)):
355
+ raw = m.group(0)
356
+ stripped = raw.rstrip(_TRAILING_PUNCT)
357
+ if stripped in seen:
358
+ continue
359
+ if re.search(r'/r/[a-z0-9]{4,32}(?:[/?#]|$)', stripped, re.IGNORECASE):
360
+ seen[stripped] = stripped
361
+ continue
362
+ target = _ensure_scheme(stripped)
363
+ kind, matched_project = _classify_url(target, projects)
364
+ utm_url = _build_target_url_for_post(
365
+ target, kind, minted_session=minted_session,
366
+ project=matched_project or project_name, platform=platform,
367
+ )
368
+ seen[stripped] = utm_url
369
+
370
+ def _sub(m):
371
+ raw = m.group(0)
372
+ stripped = raw.rstrip(_TRAILING_PUNCT)
373
+ trailing = raw[len(stripped):]
374
+ return seen.get(stripped, stripped) + trailing
375
+
376
+ return _URL_RE.sub(_sub, text)
377
+
378
+
379
+ def _dm_row(dm_id: int):
380
+ """Fetch the DM header over HTTP (GET /api/v1/dms/<id>).
381
+
382
+ HTTP-only: there is no direct-Postgres path. Raises SystemExit on a miss,
383
+ matching the prior DB behaviour.
384
+ """
385
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
386
+ from http_api import api_get
387
+ resp = api_get(f"/api/v1/dms/{dm_id}", ok_on_404=True)
388
+ if not resp or not resp.get('ok'):
389
+ raise SystemExit(f"DM #{dm_id} not found")
390
+ dm = (resp.get('data') or {}).get('dm') or {}
391
+ if not dm:
392
+ raise SystemExit(f"DM #{dm_id} not found")
393
+ return dm
394
+
395
+
396
+ def _mint_one(*, dm_id: int, target_url: str, projects: list, projects_by_name: dict,
397
+ dm: dict) -> dict:
398
+ """Core mint logic, shared by `mint` CLI and `wrap_text` library call.
399
+
400
+ HTTP-only: URL classification + UTM/booking target building happen here,
401
+ then the insert-or-reuse runs server-side via POST /api/v1/dm-links/mint.
402
+ There is no direct-Postgres path.
403
+
404
+ Returns one of:
405
+ {ok: True, code, short_url, target_url, kind, project, reused: bool}
406
+ {ok: False, error: "target_project_required", needed_project, url}
407
+ {ok: False, error: "no_primary_website", dm_id}
408
+ """
409
+ target_url = _ensure_scheme((target_url or '').strip())
410
+ if not target_url or target_url == 'https://':
411
+ return {'ok': False, 'error': 'empty_url'}
412
+
413
+ platform = (dm.get('platform') or 'reddit').lower()
414
+ if platform == 'x':
415
+ platform = 'twitter'
416
+
417
+ kind, matched_project = _classify_url(target_url, projects)
418
+
419
+ # Target-project guard: if the URL maps to one of our projects, that project
420
+ # must already be in the DM's target_projects[]. The caller is expected to
421
+ # call set-target-project --append before retry. kind='other' bypasses.
422
+ target_projects = dm.get('target_projects') or []
423
+ if matched_project and matched_project not in target_projects:
424
+ return {
425
+ 'ok': False,
426
+ 'error': 'target_project_required',
427
+ 'needed_project': matched_project,
428
+ 'url': target_url,
429
+ 'kind': kind,
430
+ }
431
+
432
+ # Wrapped hostname: use the DM's primary target_project website. Falls back
433
+ # to the matched_project's website if target_project is unset (rare, only on
434
+ # very fresh rows where set-project hasn't fired yet).
435
+ # If the project has `short_links_host` set in config.json, that overrides
436
+ # the wrapper hostname (used to route through a host WE operate, e.g.
437
+ # s4l.ai, when the customer's domain has no /r/<code> resolver).
438
+ primary = dm.get('target_project') or (matched_project if matched_project else None)
439
+ website = _project_website(projects, primary) if primary else None
440
+ if not website:
441
+ return {
442
+ 'ok': False,
443
+ 'error': 'no_primary_website',
444
+ 'dm_id': dm_id,
445
+ 'detail': f"no website for project={primary!r}; set target_project first",
446
+ }
447
+ wrapper_host = (_project_short_links_host(projects, primary) if primary else None) or website
448
+
449
+ final_target = _build_target_url(
450
+ target_url,
451
+ kind,
452
+ dm_id=dm_id,
453
+ project=matched_project,
454
+ platform=platform,
455
+ )
456
+
457
+ # Insert-or-reuse server-side. The endpoint matches first on the FINAL
458
+ # target_url (post-UTM, what the unique index (dm_id, target_url) is on),
459
+ # then on the bare URL (covers rows minted before a given kind started
460
+ # UTM-stamping). It also stamps dms.booking_link_sent_at for kind='booking'.
461
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
462
+ from http_api import api_post
463
+ stamp_booking = bool(kind == 'booking' and not dm.get('booking_link_sent_at'))
464
+ for _ in range(8):
465
+ code = _gen_code()
466
+ try:
467
+ resp = api_post(
468
+ "/api/v1/dm-links/mint",
469
+ {
470
+ "dm_id": dm_id,
471
+ "code": code,
472
+ "target_url": final_target,
473
+ "bare_url": target_url if final_target != target_url else None,
474
+ "kind": kind,
475
+ "project_at_mint": matched_project,
476
+ "stamp_booking": stamp_booking,
477
+ },
478
+ ok_on_conflict=True,
479
+ )
480
+ except Exception as e:
481
+ return {'ok': False, 'error': 'mint_api_unreachable', 'detail': str(e)}
482
+ if resp and resp.get('ok'):
483
+ data = resp.get('data') or {}
484
+ ret_code = data.get('code') or code
485
+ return {
486
+ 'ok': True,
487
+ 'code': ret_code,
488
+ 'short_url': f"{wrapper_host}/r/{ret_code}",
489
+ 'target_url': final_target,
490
+ 'kind': data.get('kind') or kind,
491
+ 'project': matched_project,
492
+ 'reused': bool(data.get('reused')),
493
+ }
494
+ e = (resp or {}).get('error') or {}
495
+ e_code = e.get('code') if isinstance(e, dict) else None
496
+ if e_code == 'code_collision':
497
+ continue # try another random code
498
+ return {'ok': False, 'error': e_code or 'mint_api_error'}
499
+ return {'ok': False, 'error': 'code_collision_after_8_tries'}
500
+
501
+
502
+ # ---- Library entry point used by reddit_browser.py / twitter_browser.py ----
503
+
504
+ def wrap_text(*, dm_id: int, text: str) -> dict:
505
+ """Find every URL in `text`, mint each, substring-replace.
506
+
507
+ Returns:
508
+ {ok: True, text: "<wrapped>", minted_codes: [...], skipped: [...]}
509
+ {ok: False, error: "...", url: "...", needed_project: "..." }
510
+
511
+ On a target_project_required error, the caller should set-target-project
512
+ --append the needed_project and retry. We DO NOT silently fall through —
513
+ refusing here is the whole point of the multi-project guard.
514
+ """
515
+ if not text:
516
+ return {'ok': True, 'text': text, 'minted_codes': [], 'skipped': []}
517
+
518
+ projects = _load_projects()
519
+ projects_by_name = {p['name']: p for p in projects}
520
+ dm = _dm_row(dm_id)
521
+ seen = {} # original_url -> wrapped_url (dedup so identical URLs map once)
522
+ minted_codes = []
523
+ skipped = []
524
+
525
+ # Iterate matches in order, replace each. Trailing punctuation common in
526
+ # prose ("...github.com/foo.") is stripped from the URL before classify.
527
+ for m in list(_URL_RE.finditer(text)):
528
+ raw = m.group(0)
529
+ stripped = raw.rstrip(_TRAILING_PUNCT)
530
+ trailing = raw[len(stripped):]
531
+ if stripped in seen:
532
+ continue
533
+
534
+ # If the URL is already a wrapped /r/<code> on one of our domains,
535
+ # leave it alone. Recognized by path shape /r/<8 chars from alphabet>.
536
+ if re.search(r'/r/[a-z0-9]{4,32}(?:[/?#]|$)', stripped, re.IGNORECASE):
537
+ seen[stripped] = stripped
538
+ skipped.append({'url': stripped, 'reason': 'already_wrapped'})
539
+ continue
540
+
541
+ res = _mint_one(
542
+ dm_id=dm_id,
543
+ target_url=stripped,
544
+ projects=projects,
545
+ projects_by_name=projects_by_name,
546
+ dm=dm,
547
+ )
548
+ if not res.get('ok'):
549
+ return {**res, 'ok': False}
550
+ seen[stripped] = res['short_url']
551
+ if not res.get('reused'):
552
+ minted_codes.append(res['code'])
553
+ elif res.get('code'):
554
+ # Reused codes still surfaced so callers can backfill message_id.
555
+ minted_codes.append(res['code'])
556
+
557
+ if not seen:
558
+ return {'ok': True, 'text': text, 'minted_codes': [], 'skipped': skipped}
559
+
560
+ # Re-walk the text and substitute. Use the regex again to preserve
561
+ # trailing punctuation outside the URL (we stripped it before classify).
562
+ def _sub(m):
563
+ raw = m.group(0)
564
+ stripped = raw.rstrip(_TRAILING_PUNCT)
565
+ trailing = raw[len(stripped):]
566
+ wrapped = seen.get(stripped, stripped)
567
+ return wrapped + trailing
568
+
569
+ new_text = _URL_RE.sub(_sub, text)
570
+ return {
571
+ 'ok': True,
572
+ 'text': new_text,
573
+ 'minted_codes': minted_codes,
574
+ 'skipped': skipped,
575
+ }
576
+
577
+
578
+ # ---- Post-link library (parallel rail to DM, table=post_links) ----------
579
+
580
+ def _mint_one_post(*, target_url: str, projects: list, platform: str,
581
+ project_name: str, minted_session: str) -> dict:
582
+ """Core mint logic for public posts. Mirrors _mint_one but writes to
583
+ post_links instead of dm_links, with post_id and reply_id BOTH NULL at
584
+ mint time (the caller backfills exactly one of them after log_post or
585
+ reply_db returns the row id).
586
+
587
+ Returns:
588
+ {ok: True, code, short_url, target_url, kind}
589
+ {ok: False, error: 'no_primary_website' | 'empty_url' | 'code_collision_after_8_tries'}
590
+
591
+ External-short-links path: if the project's config.json entry has
592
+ external_short_links=true, we don't mint a fresh code, we CLAIM one from
593
+ the pre-minted pool (post_links rows where minted_session starts with
594
+ 'pool:' and post_id IS NULL and reply_id IS NULL). The pool exists so we
595
+ can hand the client a STATIC CSV they host on their own domain redirector;
596
+ if we minted fresh codes for these projects the CSV would go stale every
597
+ cycle. The pool's target_url is fixed at pool-mint time (homepage with
598
+ platform UTMs + code in utm_content), so the LLM's URL in the comment text
599
+ is ignored for routing -- visitors always land on the destination we baked
600
+ in. Pool depth managed by scripts/mint_external_pool.py.
601
+
602
+ HTTP-only: all DB ops run server-side via /api/v1/post-links/* (mint +
603
+ claim-pool). There is no direct-Postgres path and no fallback.
604
+ """
605
+ target_url = _ensure_scheme((target_url or '').strip())
606
+ if not target_url or target_url == 'https://':
607
+ return {'ok': False, 'error': 'empty_url'}
608
+
609
+ kind, matched_project = _classify_url(target_url, projects)
610
+
611
+ # Wrapper hostname comes from the project we're posting AS, not from any
612
+ # URL classification. Posts always know which project they are for.
613
+ # If the project has `short_links_host` set in config.json (e.g. for clients
614
+ # whose own domain doesn't have a /r/<code> resolver), that overrides the
615
+ # wrapper hostname and routes through a host we operate (s4l.ai). The
616
+ # underlying target_url is unchanged; only the wrapper changes.
617
+ website = _project_website(projects, project_name)
618
+ if not website:
619
+ return {
620
+ 'ok': False,
621
+ 'error': 'no_primary_website',
622
+ 'project': project_name,
623
+ 'detail': f"no website for project={project_name!r} in config.json",
624
+ }
625
+ host_override = _project_short_links_host(projects, project_name)
626
+ wrapper_host = host_override or website
627
+
628
+ platform_norm = (platform or '').lower()
629
+ if platform_norm == 'x':
630
+ platform_norm = 'twitter'
631
+
632
+ project_cfg = next((p for p in projects if p.get('name') == project_name), None)
633
+
634
+ # UTM URL is the universal fallback — used when short_links_live=false on
635
+ # the project, OR when pool/mint can't produce a /r/<code> for any reason.
636
+ # No DB row is created in fallback mode; PostHog still attributes via
637
+ # utm_source/utm_campaign/utm_content=post_<minted_session>. The trade-off
638
+ # is losing the post_links → posts join until the operator flips
639
+ # short_links_live=true and the customer's redirector is live.
640
+ fallback_target = _build_target_url_for_post(
641
+ target_url,
642
+ kind,
643
+ minted_session=minted_session,
644
+ project=matched_project or project_name,
645
+ platform=platform,
646
+ )
647
+
648
+ def _utm_fallback(reason: str) -> dict:
649
+ return {
650
+ 'ok': True,
651
+ 'code': None,
652
+ 'short_url': fallback_target,
653
+ 'target_url': fallback_target,
654
+ 'kind': kind,
655
+ 'utm_only': True,
656
+ 'fallback_reason': reason,
657
+ }
658
+
659
+ # Historically there was a UTM-fallback gate here for short_links_live=false
660
+ # projects, but _project_short_links_host now auto-returns DEFAULT_FALLBACK_HOST
661
+ # (s4l.ai) in that case, so we always have a live wrapper host and can mint.
662
+ # The remaining _utm_fallback paths below are runtime failures of the mint
663
+ # API / pool itself, where UTM is the genuine last resort.
664
+
665
+ # Opt-in policy override: a project may set `force_utm_only: true` in
666
+ # config.json to deliberately post UTM-tagged bare URLs instead of minting
667
+ # a /r/<code> short link. This re-opens (per-project, explicitly) the path
668
+ # that was globally closed on 2026-05-22. Trade-off: no /r/<code> means no
669
+ # post_links row and no first-party post_link_clicks join; attribution still
670
+ # works via the baked-in UTM scheme (utm_source/campaign/term/content) that
671
+ # _build_target_url_for_post already applied to `fallback_target`.
672
+ if project_cfg and project_cfg.get('force_utm_only'):
673
+ return _utm_fallback('policy')
674
+
675
+ if project_cfg and project_cfg.get('external_short_links'):
676
+ # Pool path. Atomically claim the oldest unclaimed pool row server-side.
677
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
678
+ from http_api import api_post
679
+ try:
680
+ resp = api_post(
681
+ "/api/v1/post-links/claim-pool",
682
+ {
683
+ "project_name": project_name,
684
+ "platform": platform_norm,
685
+ "minted_session": minted_session,
686
+ },
687
+ ok_on_conflict=True,
688
+ )
689
+ except Exception:
690
+ return _utm_fallback('api_unreachable')
691
+ if not resp or not resp.get('ok'):
692
+ err = (resp or {}).get('error') or {}
693
+ err_code = err.get('code') if isinstance(err, dict) else None
694
+ return _utm_fallback(err_code or 'pool_exhausted')
695
+ data = resp.get('data') or {}
696
+ pool_code = data.get('code')
697
+ pool_target = data.get('target_url')
698
+ return {
699
+ 'ok': True,
700
+ 'code': pool_code,
701
+ 'short_url': f"{wrapper_host}/r/{pool_code}",
702
+ 'target_url': pool_target,
703
+ 'kind': 'website',
704
+ 'from_pool': True,
705
+ }
706
+
707
+ # Fresh mint: try up to 8 random codes before giving up on collision.
708
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
709
+ from http_api import api_post
710
+ for _ in range(8):
711
+ code = _gen_code()
712
+ try:
713
+ resp = api_post(
714
+ "/api/v1/post-links/mint",
715
+ {
716
+ "code": code,
717
+ "platform": platform,
718
+ "project_name": project_name,
719
+ "target_url": fallback_target,
720
+ "kind": kind,
721
+ "project_at_mint": matched_project,
722
+ "minted_session": minted_session,
723
+ },
724
+ ok_on_conflict=True,
725
+ )
726
+ except Exception:
727
+ return _utm_fallback('api_unreachable')
728
+ if resp and resp.get('ok'):
729
+ return {
730
+ 'ok': True,
731
+ 'code': code,
732
+ 'short_url': f"{wrapper_host}/r/{code}",
733
+ 'target_url': fallback_target,
734
+ 'kind': kind,
735
+ }
736
+ err = (resp or {}).get('error') or {}
737
+ err_code = err.get('code') if isinstance(err, dict) else None
738
+ if err_code == 'code_collision':
739
+ continue # try another random code
740
+ return _utm_fallback(err_code or 'mint_api_error')
741
+ return _utm_fallback('code_collision_after_8_tries')
742
+
743
+
744
+ def wrap_text_for_post(*, text: str, platform: str, project_name: str) -> dict:
745
+ """Find every URL in `text`, mint into post_links, substring-replace.
746
+
747
+ Returns:
748
+ {ok: True, text: <wrapped>, minted_session, codes: [...], skipped: [...]}
749
+ {ok: False, error: ..., url: ...}
750
+
751
+ minted_session is a UUID the caller MUST pass to backfill_post_id /
752
+ backfill_reply_id once the platform call returns the row id from
753
+ log_post.py or reply_db.py. If the platform call fails, the codes are
754
+ orphaned (post_id and reply_id stay NULL); they still resolve correctly
755
+ via target_url frozen at mint time, just without attribution.
756
+
757
+ Normalize platform: 'x' is collapsed to 'twitter' so analytics joins
758
+ against posts.platform line up.
759
+ """
760
+ if not text:
761
+ return {'ok': True, 'text': text, 'minted_session': None,
762
+ 'codes': [], 'skipped': []}
763
+
764
+ platform = (platform or '').lower()
765
+ if platform == 'x':
766
+ platform = 'twitter'
767
+
768
+ minted_session = str(uuid.uuid4())
769
+ projects = _load_projects()
770
+ seen = {}
771
+ codes = []
772
+ skipped = []
773
+
774
+ for m in list(_URL_RE.finditer(text)):
775
+ raw = m.group(0)
776
+ stripped = raw.rstrip(_TRAILING_PUNCT)
777
+ if stripped in seen:
778
+ continue
779
+
780
+ # Already-wrapped /r/<code> on one of our domains: leave alone.
781
+ if re.search(r'/r/[a-z0-9]{4,32}(?:[/?#]|$)', stripped, re.IGNORECASE):
782
+ seen[stripped] = stripped
783
+ skipped.append({'url': stripped, 'reason': 'already_wrapped'})
784
+ continue
785
+
786
+ res = _mint_one_post(
787
+ target_url=stripped,
788
+ projects=projects,
789
+ platform=platform,
790
+ project_name=project_name,
791
+ minted_session=minted_session,
792
+ )
793
+ if not res.get('ok'):
794
+ return {**res, 'ok': False}
795
+ seen[stripped] = res['short_url']
796
+ if res.get('code') is not None:
797
+ codes.append(res['code'])
798
+ else:
799
+ # UTM-only fallback (no /r/<code>): track in skipped[] so the
800
+ # caller's logging doesn't see [None] in codes[] but still has
801
+ # visibility into how the URL was handled.
802
+ skipped.append({'url': stripped, 'reason': 'utm_fallback',
803
+ 'detail': res.get('fallback_reason')})
804
+
805
+ if not seen:
806
+ return {'ok': True, 'text': text, 'minted_session': None,
807
+ 'codes': [], 'skipped': skipped}
808
+
809
+ def _sub(m):
810
+ raw = m.group(0)
811
+ stripped = raw.rstrip(_TRAILING_PUNCT)
812
+ trailing = raw[len(stripped):]
813
+ wrapped = seen.get(stripped, stripped)
814
+ return wrapped + trailing
815
+
816
+ new_text = _URL_RE.sub(_sub, text)
817
+ return {
818
+ 'ok': True,
819
+ 'text': new_text,
820
+ 'minted_session': minted_session,
821
+ 'codes': codes,
822
+ 'skipped': skipped,
823
+ }
824
+
825
+
826
+ def _backfill_via_api(*, minted_session: str, post_id: int | None = None,
827
+ reply_id: int | None = None) -> int:
828
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
829
+ from http_api import api_post
830
+ body: dict = {"minted_session": minted_session}
831
+ if post_id is not None:
832
+ body["post_id"] = int(post_id)
833
+ if reply_id is not None:
834
+ body["reply_id"] = int(reply_id)
835
+ try:
836
+ resp = api_post("/api/v1/post-links/backfill", body)
837
+ except Exception:
838
+ return 0
839
+ if not resp or not resp.get('ok'):
840
+ return 0
841
+ return int((resp.get('data') or {}).get('updated') or 0)
842
+
843
+
844
+ def backfill_post_id(*, minted_session: str, post_id: int) -> int:
845
+ """Stamp post_links.post_id for every code minted under minted_session.
846
+
847
+ Returns the rowcount affected. Safe to call multiple times (idempotent).
848
+ Caller should NOT raise on rowcount==0 because some posts have no URLs
849
+ and minted_session was None — the caller should skip the backfill in
850
+ that case.
851
+
852
+ HTTP-only: routes through /api/v1/post-links/backfill. There is no
853
+ direct-Postgres path and no fallback.
854
+ """
855
+ if not minted_session or post_id is None:
856
+ return 0
857
+ return _backfill_via_api(minted_session=minted_session, post_id=post_id)
858
+
859
+
860
+ def backfill_reply_id(*, minted_session: str, reply_id: int) -> int:
861
+ """Same as backfill_post_id but stamps post_links.reply_id (engage_reddit
862
+ writes to the `replies` table, not `posts`). HTTP-only."""
863
+ if not minted_session or reply_id is None:
864
+ return 0
865
+ return _backfill_via_api(minted_session=minted_session, reply_id=reply_id)
866
+
867
+
868
+ # ---- CLI subcommands ----
869
+
870
+ def cmd_mint(args):
871
+ projects = _load_projects()
872
+ projects_by_name = {p['name']: p for p in projects}
873
+ dm = _dm_row(args.dm_id)
874
+ res = _mint_one(
875
+ dm_id=args.dm_id,
876
+ target_url=args.target_url,
877
+ projects=projects,
878
+ projects_by_name=projects_by_name,
879
+ dm=dm,
880
+ )
881
+ if not res.get('ok'):
882
+ sys.stderr.write(json.dumps(res) + '\n')
883
+ sys.exit(2)
884
+ if args.json:
885
+ print(json.dumps(res))
886
+ else:
887
+ print(res['short_url'])
888
+
889
+
890
+ # Bot User-Agent regex. Matches Twitter card prefetch, LinkedIn unfurl,
891
+ # Slack/Discord/Telegram/WhatsApp link previews, generic Google/Bing crawlers,
892
+ # and Pinterest/Embedly/Snapchat. We discovered 97 percent of /r/<code> hits
893
+ # fired within 30 seconds of mint, average 17s, which is the link-preview
894
+ # fingerprint. Real human ratio cross-referenced against PostHog pageviews
895
+ # was 5-8 percent. When a UA matches:
896
+ # 1. Skip the legacy `clicks` counter increment (so post-2026-05-07 the
897
+ # legacy column is humans-only).
898
+ # 2. Skip the [CLICK_SIGNAL] insert into dm_messages so the engage pipeline
899
+ # isn't woken up by a Slackbot.
900
+ # 3. Still log a row in dm_link_clicks with is_bot=true so historical
901
+ # splits stay accurate.
902
+ # 4. Still return target_url so previews render.
903
+ import hashlib
904
+ import re
905
+ BOT_UA_RE = re.compile(
906
+ r'bot|crawler|spider|Twitterbot|LinkedInBot|Slackbot|facebookexternalhit'
907
+ r'|Discordbot|TelegramBot|WhatsApp|Applebot|Googlebot|Bingbot|YandexBot'
908
+ r'|DuckDuckBot|redditbot|Pinterest|Embedly|Snapchat',
909
+ re.IGNORECASE,
910
+ )
911
+
912
+
913
+ def cmd_resolve(args):
914
+ # HTTP-only: bot detection + IP hashing happen here; the click logging and
915
+ # join read run server-side via POST /api/v1/dm-links/resolve. There is no
916
+ # direct-Postgres path.
917
+ ua = (getattr(args, 'user_agent', '') or '').strip()
918
+ referrer = (getattr(args, 'referrer', '') or '').strip() or None
919
+ is_bot = bool(ua and BOT_UA_RE.search(ua))
920
+ ip_raw = (getattr(args, 'ip', '') or '').strip()
921
+ ip_hash = (
922
+ hashlib.sha256(ip_raw.encode('utf-8')).hexdigest()[:16]
923
+ if ip_raw else None
924
+ )
925
+
926
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
927
+ from http_api import api_post
928
+ resp = api_post(
929
+ "/api/v1/dm-links/resolve",
930
+ {
931
+ "code": args.code,
932
+ "no_count": bool(args.no_count),
933
+ "is_bot": is_bot,
934
+ "ip_hash": ip_hash,
935
+ "user_agent": ua or None,
936
+ "referrer": referrer,
937
+ },
938
+ ok_on_404=True,
939
+ )
940
+ if not resp or not resp.get('ok'):
941
+ print(json.dumps({'error': 'not_found', 'code': args.code}))
942
+ return
943
+ data = resp.get('data') or {}
944
+ print(json.dumps({
945
+ 'dm_id': data.get('dm_id'),
946
+ 'platform': data.get('platform'),
947
+ 'project': data.get('project'),
948
+ 'kind': data.get('kind'),
949
+ 'target_url': data.get('target_url'),
950
+ 'is_bot': data.get('is_bot', is_bot),
951
+ }))
952
+
953
+
954
+ def cmd_wrap_text(args):
955
+ res = wrap_text(dm_id=args.dm_id, text=args.text)
956
+ if not res.get('ok'):
957
+ sys.stderr.write(json.dumps(res) + '\n')
958
+ sys.exit(2)
959
+ if args.json:
960
+ print(json.dumps(res))
961
+ else:
962
+ # Stdout is the wrapped text only — ready to pipe into a `send` command
963
+ # or a shell variable. Diagnostics go to stderr.
964
+ if res.get('minted_codes') or res.get('skipped'):
965
+ sys.stderr.write(json.dumps({
966
+ 'minted_codes': res['minted_codes'],
967
+ 'skipped': res['skipped'],
968
+ }) + '\n')
969
+ sys.stdout.write(res['text'])
970
+
971
+
972
+ def cmd_wrap_post_text(args):
973
+ res = wrap_text_for_post(text=args.text, platform=args.platform,
974
+ project_name=args.project)
975
+ if not res.get('ok'):
976
+ sys.stderr.write(json.dumps(res) + '\n')
977
+ sys.exit(2)
978
+ # JSON envelope is the default for the post path because callers always
979
+ # need minted_session for the backfill UPDATE. The shell scripts that
980
+ # consume this WILL parse JSON.
981
+ print(json.dumps(res))
982
+
983
+
984
+ def cmd_utm_text(args):
985
+ """UTM-only wrap (no DB, no minting). Prints the wrapped text on stdout.
986
+ Used by the Twitter engagement prompt where Claude types the reply through
987
+ the browser MCP (twitter-harness bh_run type_text) and there is no Python
988
+ posting layer to invoke wrap_text_for_post. The typed URL itself carries all attribution
989
+ via utm_source=s4l + utm_term=<platform>; PostHog captures it on landing.
990
+ """
991
+ out = utm_only_text(text=args.text, platform=args.platform,
992
+ project_name=args.project)
993
+ sys.stdout.write(out)
994
+
995
+
996
+ def cmd_backfill_post(args):
997
+ n = backfill_post_id(minted_session=args.minted_session, post_id=args.post_id)
998
+ print(json.dumps({'backfilled': n, 'post_id': args.post_id,
999
+ 'minted_session': args.minted_session}))
1000
+
1001
+
1002
+ def cmd_backfill_reply(args):
1003
+ n = backfill_reply_id(minted_session=args.minted_session, reply_id=args.reply_id)
1004
+ print(json.dumps({'backfilled': n, 'reply_id': args.reply_id,
1005
+ 'minted_session': args.minted_session}))
1006
+
1007
+
1008
+ def main():
1009
+ ap = argparse.ArgumentParser(description=__doc__)
1010
+ sub = ap.add_subparsers(dest='cmd', required=True)
1011
+
1012
+ p_mint = sub.add_parser('mint', help='Mint (or reuse) a wrapped /r/<code> short link for one URL')
1013
+ p_mint.add_argument('--dm-id', type=int, required=True)
1014
+ p_mint.add_argument('--target-url', required=True)
1015
+ p_mint.add_argument('--json', action='store_true', help='Print full JSON envelope')
1016
+
1017
+ p_res = sub.add_parser('resolve', help='Look up code, increment clicks, return target URL')
1018
+ p_res.add_argument('--code', required=True)
1019
+ p_res.add_argument('--no-count', action='store_true', help='Skip click counter update (debugging)')
1020
+ # Bot detection inputs. When --user-agent matches the bot regex (Twitterbot,
1021
+ # LinkedInBot, Slackbot, facebookexternalhit, etc.), the legacy clicks
1022
+ # counter is NOT bumped, [CLICK_SIGNAL] is NOT inserted, but a row IS
1023
+ # appended to dm_link_clicks with is_bot=true so historical splits work.
1024
+ p_res.add_argument('--user-agent', default='', help='Caller User-Agent for bot detection')
1025
+ p_res.add_argument('--referrer', default='', help='Caller Referer header for analytics')
1026
+ p_res.add_argument('--ip', default='', help='Caller IP (sha256 hashed before storage)')
1027
+
1028
+ p_wrap = sub.add_parser('wrap-text', help='Wrap every URL in TEXT through the mint pipeline')
1029
+ p_wrap.add_argument('--dm-id', type=int, required=True)
1030
+ p_wrap.add_argument('--text', required=True)
1031
+ p_wrap.add_argument('--json', action='store_true', help='Print full JSON envelope to stdout')
1032
+
1033
+ p_wrap_post = sub.add_parser('wrap-post-text',
1034
+ help='Wrap URLs in a public post/comment text. '
1035
+ 'Mints into post_links with NULL post_id; '
1036
+ 'backfill via backfill-post or backfill-reply.')
1037
+ p_wrap_post.add_argument('--text', required=True)
1038
+ p_wrap_post.add_argument('--platform', required=True,
1039
+ choices=['reddit', 'twitter', 'x', 'linkedin', 'github_issues', 'github', 'moltbook'])
1040
+ p_wrap_post.add_argument('--project', required=True,
1041
+ help='project_name from config.json (drives wrapper hostname)')
1042
+
1043
+ p_utm = sub.add_parser('utm-text',
1044
+ help='UTM-only wrap (no DB write). Replaces every URL '
1045
+ 'in --text with its UTM-tagged version and prints '
1046
+ 'the result on stdout. Use when no Python posting '
1047
+ 'layer is available (Claude-driven MCP typing).')
1048
+ p_utm.add_argument('--text', required=True)
1049
+ p_utm.add_argument('--platform', required=True,
1050
+ choices=['reddit', 'twitter', 'x', 'linkedin', 'github_issues', 'github', 'moltbook'])
1051
+ p_utm.add_argument('--project', required=True,
1052
+ help='project_name from config.json (drives utm_campaign + wrapper hostname classification)')
1053
+
1054
+ p_bp = sub.add_parser('backfill-post',
1055
+ help='Stamp post_links.post_id for every code minted '
1056
+ 'under --minted-session. Idempotent.')
1057
+ p_bp.add_argument('--minted-session', required=True)
1058
+ p_bp.add_argument('--post-id', type=int, required=True)
1059
+
1060
+ p_br = sub.add_parser('backfill-reply',
1061
+ help='Stamp post_links.reply_id for every code minted '
1062
+ 'under --minted-session. Idempotent.')
1063
+ p_br.add_argument('--minted-session', required=True)
1064
+ p_br.add_argument('--reply-id', type=int, required=True)
1065
+
1066
+ args = ap.parse_args()
1067
+ if args.cmd == 'mint':
1068
+ cmd_mint(args)
1069
+ elif args.cmd == 'resolve':
1070
+ cmd_resolve(args)
1071
+ elif args.cmd == 'wrap-text':
1072
+ cmd_wrap_text(args)
1073
+ elif args.cmd == 'wrap-post-text':
1074
+ cmd_wrap_post_text(args)
1075
+ elif args.cmd == 'utm-text':
1076
+ cmd_utm_text(args)
1077
+ elif args.cmd == 'backfill-post':
1078
+ cmd_backfill_post(args)
1079
+ elif args.cmd == 'backfill-reply':
1080
+ cmd_backfill_reply(args)
1081
+
1082
+
1083
+ if __name__ == '__main__':
1084
+ main()