@m13v/s4l 1.6.197-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1336 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +513 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,1084 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Per-DM short link minting + resolution for outbound link tracking.
|
|
3
|
+
|
|
4
|
+
All outbound URLs in the DM-replies pipeline get wrapped through this tool so
|
|
5
|
+
clicks attribute to the originating DM. Booking links, GitHub repos, our own
|
|
6
|
+
website pages, third-party references — every URL we send goes through /r/<code>.
|
|
7
|
+
|
|
8
|
+
Subcommands:
|
|
9
|
+
|
|
10
|
+
mint --dm-id N --target-url URL
|
|
11
|
+
Idempotent on (dm_id, target_url). Returns a wrapped URL like
|
|
12
|
+
https://<target_project_website>/r/<code>. Refuses if URL points at a
|
|
13
|
+
project not in dms.target_projects[]; the caller must call
|
|
14
|
+
`dm_conversation.py set-target-project --append --project NAME` first.
|
|
15
|
+
Auto-stamps dms.booking_link_sent_at for kind='booking'.
|
|
16
|
+
|
|
17
|
+
resolve --code CODE
|
|
18
|
+
Used by the public /api/short-links/<code> endpoint. Bumps clicks,
|
|
19
|
+
stamps first/last click timestamps, inserts a synthetic [CLICK_SIGNAL]
|
|
20
|
+
row in dm_messages so the engage pipeline picks the thread up. Returns
|
|
21
|
+
target_url + dm_id + project + platform.
|
|
22
|
+
|
|
23
|
+
wrap-text --dm-id N --text "..."
|
|
24
|
+
Find every URL in the text, mint each via the same path, substring-replace
|
|
25
|
+
the original URLs with the wrapped versions. Prints the wrapped text on
|
|
26
|
+
stdout. Used by reddit_browser.py / twitter_browser.py (via direct import
|
|
27
|
+
of `wrap_text()`) and by the LinkedIn shell flow (subprocess).
|
|
28
|
+
|
|
29
|
+
The classifier maps a URL to (kind, matched_project_name) using config.json:
|
|
30
|
+
- booking : URL starts with project.booking_link
|
|
31
|
+
- github : URL starts with project.github or matches project.landing_pages.github_repo
|
|
32
|
+
- website : URL host == project.website host
|
|
33
|
+
- other : no project match (no project guard, kind='other')
|
|
34
|
+
|
|
35
|
+
Wrapped hostname is always the DM's primary `target_project.website` (consistent
|
|
36
|
+
per thread regardless of which project a given link points at).
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from __future__ import annotations
|
|
40
|
+
|
|
41
|
+
import argparse
|
|
42
|
+
import json
|
|
43
|
+
import os
|
|
44
|
+
import re
|
|
45
|
+
import secrets
|
|
46
|
+
import sys
|
|
47
|
+
import uuid
|
|
48
|
+
from urllib.parse import urlencode, urlsplit, urlunsplit, parse_qsl
|
|
49
|
+
|
|
50
|
+
REPO_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
51
|
+
sys.path.insert(0, os.path.join(REPO_DIR, 'scripts'))
|
|
52
|
+
|
|
53
|
+
# HTTP-only: this module routes every read/write through the s4l.ai HTTP API
|
|
54
|
+
# (scripts/http_api.py). The direct-Postgres lane was removed 2026-06-01; there
|
|
55
|
+
# is no `import db` / get_conn() path any more, not as primary, not as fallback.
|
|
56
|
+
|
|
57
|
+
CONFIG_PATH = os.path.join(REPO_DIR, 'config.json')
|
|
58
|
+
CODE_ALPHABET = 'abcdefghijkmnpqrstuvwxyz23456789'
|
|
59
|
+
CODE_LEN = 8
|
|
60
|
+
|
|
61
|
+
# Default wrapper host used when a project's own /r/<code> redirector is NOT
|
|
62
|
+
# live (config.json short_links_live=false) and the operator hasn't set an
|
|
63
|
+
# explicit short_links_host. s4l.ai's resolver lives at
|
|
64
|
+
# @m13v/seo-components -> app.s4l.ai/api/short-links/<code> and is the
|
|
65
|
+
# social-autoposter-owned fallback. Routing through it keeps first-party click
|
|
66
|
+
# logging in post_link_clicks instead of dropping to UTM-only.
|
|
67
|
+
DEFAULT_FALLBACK_HOST = 'https://s4l.ai'
|
|
68
|
+
|
|
69
|
+
# Match http(s) URLs AND bare-domain references with a path. The bare-domain
|
|
70
|
+
# branch requires at least one path character so we don't match prose like
|
|
71
|
+
# "i.e." or "S.F." or version numbers. Greedy on the path; trailing punctuation
|
|
72
|
+
# is stripped by the caller. Both branches are normalized through
|
|
73
|
+
# _ensure_scheme() before classification.
|
|
74
|
+
#
|
|
75
|
+
# Third branch (added 2026-05-10): bare project hostnames with NO path. Built
|
|
76
|
+
# dynamically from config.json project websites + booking_link + github hosts.
|
|
77
|
+
# A 7d audit found 47/2094 Reddit DMs and 7/319 X DMs mention a project URL,
|
|
78
|
+
# but ZERO short links got minted because the model casually drops domains
|
|
79
|
+
# like "fazm.ai is the link" or "main one is fazm, ai agent for macos,
|
|
80
|
+
# github.com/m13v/fazm" without https:// or trailing path. Branches 1 and 2
|
|
81
|
+
# both miss those, so we never wrap them. The new branch matches a known
|
|
82
|
+
# project host as a bare token, with a negative lookahead so it doesn't
|
|
83
|
+
# overlap with branch 2 ('fazm.ai/path' still goes through branch 2).
|
|
84
|
+
def _build_project_bare_host_pattern():
|
|
85
|
+
"""Build an alternation of known project hostnames, longest-first."""
|
|
86
|
+
try:
|
|
87
|
+
with open(CONFIG_PATH, 'r') as f:
|
|
88
|
+
cfg = json.load(f)
|
|
89
|
+
projs = cfg.get('projects') or []
|
|
90
|
+
except Exception:
|
|
91
|
+
return None
|
|
92
|
+
hosts = set()
|
|
93
|
+
for p in projs:
|
|
94
|
+
for field in ('website', 'booking_link', 'github'):
|
|
95
|
+
v = (p.get(field) or '').strip()
|
|
96
|
+
if not v:
|
|
97
|
+
continue
|
|
98
|
+
try:
|
|
99
|
+
netloc = urlsplit(v if '://' in v else 'https://' + v).netloc
|
|
100
|
+
except Exception:
|
|
101
|
+
continue
|
|
102
|
+
host = (netloc or '').lower().split(':', 1)[0]
|
|
103
|
+
# Strip a literal 'www.' prefix only (lstrip would chew chars).
|
|
104
|
+
if host.startswith('www.'):
|
|
105
|
+
host = host[4:]
|
|
106
|
+
if host and '.' in host:
|
|
107
|
+
hosts.add(host)
|
|
108
|
+
if not hosts:
|
|
109
|
+
return None
|
|
110
|
+
parts = sorted({re.escape(h) for h in hosts}, key=len, reverse=True)
|
|
111
|
+
# \b on left, narrow lookahead on right. Reject:
|
|
112
|
+
# - word chars/slashes (mid-token or path → branch 2 territory)
|
|
113
|
+
# - dot+letter (sub-domain extension: 'runner.now.example.com' must NOT
|
|
114
|
+
# match 'runner.now')
|
|
115
|
+
# ALLOW dot+non-letter (sentence-ending: 'try fazm.ai.' must match) and
|
|
116
|
+
# plain punctuation/whitespace. Pre-2026-05-14 this was `(?![\w./])` which
|
|
117
|
+
# over-rejected sentence-ending periods, so 'try fazm.ai.' yielded ZERO
|
|
118
|
+
# matches and the URL went out bare.
|
|
119
|
+
return r'\b(?:' + '|'.join(parts) + r')\b(?![\w/]|\.[a-z])'
|
|
120
|
+
|
|
121
|
+
_PROJECT_BARE_HOST_PAT = _build_project_bare_host_pattern()
|
|
122
|
+
_URL_RE = re.compile(
|
|
123
|
+
(
|
|
124
|
+
r'https?://[^\s<>"\']+'
|
|
125
|
+
r'|'
|
|
126
|
+
r'(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z]{2,}/[^\s<>"\']*'
|
|
127
|
+
+ (r'|' + _PROJECT_BARE_HOST_PAT if _PROJECT_BARE_HOST_PAT else '')
|
|
128
|
+
),
|
|
129
|
+
re.IGNORECASE,
|
|
130
|
+
)
|
|
131
|
+
_TRAILING_PUNCT = '.,;:!?)]}>\'"'
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _ensure_scheme(url: str) -> str:
|
|
135
|
+
"""Prepend https:// to bare-domain URLs so urlsplit and downstream consumers
|
|
136
|
+
have a fully qualified URL. https? matches first branch of _URL_RE; the
|
|
137
|
+
bare-domain branch (everything after the alternation) lacks a scheme."""
|
|
138
|
+
if url.startswith(('http://', 'https://')):
|
|
139
|
+
return url
|
|
140
|
+
return 'https://' + url
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _load_projects():
|
|
144
|
+
with open(CONFIG_PATH, 'r') as f:
|
|
145
|
+
return [p for p in json.load(f).get('projects', []) if p.get('name')]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _gen_code(n=CODE_LEN):
|
|
149
|
+
return ''.join(secrets.choice(CODE_ALPHABET) for _ in range(n))
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _norm_host(url: str) -> str:
|
|
153
|
+
try:
|
|
154
|
+
return (urlsplit(url).netloc or '').lower().lstrip('www.')
|
|
155
|
+
except Exception:
|
|
156
|
+
return ''
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _classify_url(url: str, projects: list) -> tuple[str, str | None]:
|
|
160
|
+
"""Return (kind, project_name|None). Longest-prefix-wins across projects.
|
|
161
|
+
|
|
162
|
+
Priority: booking > github > website > other. Ties within a kind go to the
|
|
163
|
+
longest matching prefix so e.g. cal.com/team/mediar/fazm beats a hypothetical
|
|
164
|
+
cal.com/team/mediar/ root. Bare-domain inputs are normalized to https:// first.
|
|
165
|
+
"""
|
|
166
|
+
u = _ensure_scheme(url.strip())
|
|
167
|
+
best_booking = ('', None)
|
|
168
|
+
best_github = ('', None)
|
|
169
|
+
best_website = ('', None)
|
|
170
|
+
|
|
171
|
+
for p in projects:
|
|
172
|
+
name = p.get('name')
|
|
173
|
+
if not name:
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
booking = (p.get('booking_link') or '').strip()
|
|
177
|
+
if booking and u.startswith(booking.rstrip('?').rstrip('/')):
|
|
178
|
+
if len(booking) > len(best_booking[0]):
|
|
179
|
+
best_booking = (booking, name)
|
|
180
|
+
|
|
181
|
+
gh = (p.get('github') or '').strip()
|
|
182
|
+
if gh and u.startswith(gh.rstrip('/')):
|
|
183
|
+
if len(gh) > len(best_github[0]):
|
|
184
|
+
best_github = (gh, name)
|
|
185
|
+
|
|
186
|
+
gh_repo = (p.get('landing_pages', {}) or {}).get('github_repo')
|
|
187
|
+
if gh_repo:
|
|
188
|
+
gh_url = f'https://github.com/{gh_repo.strip("/")}'
|
|
189
|
+
if u.startswith(gh_url):
|
|
190
|
+
if len(gh_url) > len(best_github[0]):
|
|
191
|
+
best_github = (gh_url, name)
|
|
192
|
+
|
|
193
|
+
website = (p.get('website') or '').strip()
|
|
194
|
+
if website:
|
|
195
|
+
site_host = _norm_host(website)
|
|
196
|
+
url_host = _norm_host(u)
|
|
197
|
+
if site_host and url_host and (url_host == site_host or url_host.endswith('.' + site_host)):
|
|
198
|
+
if len(site_host) > len(best_website[0]):
|
|
199
|
+
best_website = (site_host, name)
|
|
200
|
+
|
|
201
|
+
if best_booking[1]:
|
|
202
|
+
return ('booking', best_booking[1])
|
|
203
|
+
if best_github[1]:
|
|
204
|
+
return ('github', best_github[1])
|
|
205
|
+
if best_website[1]:
|
|
206
|
+
return ('website', best_website[1])
|
|
207
|
+
return ('other', None)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _build_target_url(target_url: str, kind: str, *, dm_id: int, project: str | None, platform: str) -> str:
|
|
211
|
+
"""Add UTM params for kinds where we control the analytics consumer.
|
|
212
|
+
|
|
213
|
+
Canonical UTM scheme (matches _build_target_url_for_post + the pool
|
|
214
|
+
minters): utm_source='s4l' identifies the agency for every customer's
|
|
215
|
+
analytics ('this traffic came from S4L'). utm_term carries the platform
|
|
216
|
+
(reddit | twitter | linkedin | github_issues) since utm_source is no
|
|
217
|
+
longer platform-specific. utm_medium stays 'dm' to keep the DM rail
|
|
218
|
+
distinct from posts. utm_content keeps the strict 'dm_<id>' shape
|
|
219
|
+
consumed by bin/server.js (regex /^dm_(\\d+)$/) and project_stats_json
|
|
220
|
+
(LIKE 'dm_%').
|
|
221
|
+
|
|
222
|
+
Booking: Cal.com metadata[utm_*] survives to the booking webhook (the flat
|
|
223
|
+
utm_* gets stripped by Cal's UI), Calendly accepts both — keep both.
|
|
224
|
+
Website: our own domains run PostHog; flat utm_* is enough.
|
|
225
|
+
Github / other: leave the URL untouched (no downstream UTM consumer).
|
|
226
|
+
"""
|
|
227
|
+
if kind not in ('booking', 'website'):
|
|
228
|
+
return target_url
|
|
229
|
+
|
|
230
|
+
parts = urlsplit(target_url)
|
|
231
|
+
existing = dict(parse_qsl(parts.query, keep_blank_values=True))
|
|
232
|
+
|
|
233
|
+
utm = {
|
|
234
|
+
'utm_source': 's4l',
|
|
235
|
+
'utm_medium': 'dm',
|
|
236
|
+
'utm_campaign': (project or 'unknown').lower(),
|
|
237
|
+
'utm_term': (platform or 'unknown').lower(),
|
|
238
|
+
'utm_content': f'dm_{dm_id}',
|
|
239
|
+
}
|
|
240
|
+
for k, v in utm.items():
|
|
241
|
+
existing.setdefault(k, v)
|
|
242
|
+
if kind == 'booking':
|
|
243
|
+
existing[f'metadata[{k}]'] = v
|
|
244
|
+
|
|
245
|
+
new_query = urlencode(existing, doseq=True)
|
|
246
|
+
return urlunsplit((parts.scheme, parts.netloc, parts.path, new_query, parts.fragment))
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _build_target_url_for_post(target_url: str, kind: str, *, minted_session: str,
|
|
250
|
+
project: str | None, platform: str) -> str:
|
|
251
|
+
"""UTM stamping for PUBLIC post wrappers (utm_medium='post').
|
|
252
|
+
|
|
253
|
+
See _build_target_url for the canonical UTM scheme rationale. utm_content
|
|
254
|
+
keeps the 'post_<session>' shape so backfill_real_clicks.py can
|
|
255
|
+
PostHog-join on it.
|
|
256
|
+
"""
|
|
257
|
+
if kind not in ('booking', 'website'):
|
|
258
|
+
return target_url
|
|
259
|
+
|
|
260
|
+
parts = urlsplit(target_url)
|
|
261
|
+
existing = dict(parse_qsl(parts.query, keep_blank_values=True))
|
|
262
|
+
|
|
263
|
+
utm = {
|
|
264
|
+
'utm_source': 's4l',
|
|
265
|
+
'utm_medium': 'post',
|
|
266
|
+
'utm_campaign': (project or 'unknown').lower(),
|
|
267
|
+
'utm_term': (platform or 'unknown').lower(),
|
|
268
|
+
'utm_content': f'post_{minted_session}',
|
|
269
|
+
}
|
|
270
|
+
for k, v in utm.items():
|
|
271
|
+
existing.setdefault(k, v)
|
|
272
|
+
if kind == 'booking':
|
|
273
|
+
existing[f'metadata[{k}]'] = v
|
|
274
|
+
|
|
275
|
+
new_query = urlencode(existing, doseq=True)
|
|
276
|
+
return urlunsplit((parts.scheme, parts.netloc, parts.path, new_query, parts.fragment))
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _project_website(projects: list, name: str) -> str | None:
|
|
280
|
+
for p in projects:
|
|
281
|
+
if p.get('name') == name:
|
|
282
|
+
site = (p.get('website') or '').strip().rstrip('/')
|
|
283
|
+
return site or None
|
|
284
|
+
return None
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _project_short_links_live(projects: list, name: str) -> bool:
|
|
288
|
+
"""True iff the project's OWN domain serves /r/<code>.
|
|
289
|
+
|
|
290
|
+
Default true (preserves behavior for fazm, mediar, assrt, cyrano-systems
|
|
291
|
+
and every other existing project where the customer's domain hosts the
|
|
292
|
+
@m13v/seo-components /r/[code] handler).
|
|
293
|
+
|
|
294
|
+
Set false in config.json for projects where the customer owns the domain
|
|
295
|
+
but hasn't shipped the resolver (or the static CSV) yet. In that case the
|
|
296
|
+
wrapper auto-routes through DEFAULT_FALLBACK_HOST (s4l.ai), so mints still
|
|
297
|
+
produce a live /r/<code> with first-party click logging; we no longer drop
|
|
298
|
+
to UTM-only. See _project_short_links_host for the host-resolution order.
|
|
299
|
+
|
|
300
|
+
An explicit `short_links_host` in config.json (regardless of this flag)
|
|
301
|
+
always wins and is used verbatim.
|
|
302
|
+
"""
|
|
303
|
+
for p in projects:
|
|
304
|
+
if p.get('name') == name:
|
|
305
|
+
v = p.get('short_links_live')
|
|
306
|
+
return True if v is None else bool(v)
|
|
307
|
+
return True
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _project_short_links_host(projects: list, name: str) -> str | None:
|
|
311
|
+
"""Resolve the wrapper host where /r/<code> is served for this project.
|
|
312
|
+
|
|
313
|
+
Resolution order (first match wins):
|
|
314
|
+
1. Explicit `short_links_host` in config.json (e.g. "https://s4l.ai").
|
|
315
|
+
Used to pin a project to a specific resolver-bearing host we operate.
|
|
316
|
+
2. DEFAULT_FALLBACK_HOST (= https://s4l.ai) when `short_links_live` is
|
|
317
|
+
explicitly false. Auto-applied so any project flagged as "customer
|
|
318
|
+
hasn't deployed the resolver yet" still gets a live /r/<code> through
|
|
319
|
+
the social-autoposter-owned resolver, instead of dropping to UTM-only.
|
|
320
|
+
3. None → caller falls back to project.website (the legacy/default path,
|
|
321
|
+
used when short_links_live is unset/true, meaning the customer's own
|
|
322
|
+
domain has the @m13v/seo-components /r/[code] handler shipped).
|
|
323
|
+
|
|
324
|
+
Callers should always do: `_project_short_links_host(p, name) or website`.
|
|
325
|
+
|
|
326
|
+
The underlying target_url (where the resolver 302s) is unchanged in either
|
|
327
|
+
case — it still points at the customer's site with full UTMs baked in at
|
|
328
|
+
mint time. Only the wrapper host changes.
|
|
329
|
+
"""
|
|
330
|
+
for p in projects:
|
|
331
|
+
if p.get('name') == name:
|
|
332
|
+
host = (p.get('short_links_host') or '').strip().rstrip('/')
|
|
333
|
+
if host:
|
|
334
|
+
return host
|
|
335
|
+
if p.get('short_links_live') is False:
|
|
336
|
+
return DEFAULT_FALLBACK_HOST
|
|
337
|
+
return None
|
|
338
|
+
return None
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def utm_only_text(*, text: str, platform: str, project_name: str) -> str:
|
|
342
|
+
"""Walk every URL in text, replace with its UTM-tagged version (no minting,
|
|
343
|
+
no DB). Safety-net helper for caller exception branches so a bare URL
|
|
344
|
+
never escapes when wrap_text_for_post itself raises.
|
|
345
|
+
"""
|
|
346
|
+
if not text:
|
|
347
|
+
return text
|
|
348
|
+
platform = (platform or '').lower()
|
|
349
|
+
if platform == 'x':
|
|
350
|
+
platform = 'twitter'
|
|
351
|
+
minted_session = str(uuid.uuid4())
|
|
352
|
+
projects = _load_projects()
|
|
353
|
+
seen: dict[str, str] = {}
|
|
354
|
+
for m in list(_URL_RE.finditer(text)):
|
|
355
|
+
raw = m.group(0)
|
|
356
|
+
stripped = raw.rstrip(_TRAILING_PUNCT)
|
|
357
|
+
if stripped in seen:
|
|
358
|
+
continue
|
|
359
|
+
if re.search(r'/r/[a-z0-9]{4,32}(?:[/?#]|$)', stripped, re.IGNORECASE):
|
|
360
|
+
seen[stripped] = stripped
|
|
361
|
+
continue
|
|
362
|
+
target = _ensure_scheme(stripped)
|
|
363
|
+
kind, matched_project = _classify_url(target, projects)
|
|
364
|
+
utm_url = _build_target_url_for_post(
|
|
365
|
+
target, kind, minted_session=minted_session,
|
|
366
|
+
project=matched_project or project_name, platform=platform,
|
|
367
|
+
)
|
|
368
|
+
seen[stripped] = utm_url
|
|
369
|
+
|
|
370
|
+
def _sub(m):
|
|
371
|
+
raw = m.group(0)
|
|
372
|
+
stripped = raw.rstrip(_TRAILING_PUNCT)
|
|
373
|
+
trailing = raw[len(stripped):]
|
|
374
|
+
return seen.get(stripped, stripped) + trailing
|
|
375
|
+
|
|
376
|
+
return _URL_RE.sub(_sub, text)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _dm_row(dm_id: int):
|
|
380
|
+
"""Fetch the DM header over HTTP (GET /api/v1/dms/<id>).
|
|
381
|
+
|
|
382
|
+
HTTP-only: there is no direct-Postgres path. Raises SystemExit on a miss,
|
|
383
|
+
matching the prior DB behaviour.
|
|
384
|
+
"""
|
|
385
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
386
|
+
from http_api import api_get
|
|
387
|
+
resp = api_get(f"/api/v1/dms/{dm_id}", ok_on_404=True)
|
|
388
|
+
if not resp or not resp.get('ok'):
|
|
389
|
+
raise SystemExit(f"DM #{dm_id} not found")
|
|
390
|
+
dm = (resp.get('data') or {}).get('dm') or {}
|
|
391
|
+
if not dm:
|
|
392
|
+
raise SystemExit(f"DM #{dm_id} not found")
|
|
393
|
+
return dm
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def _mint_one(*, dm_id: int, target_url: str, projects: list, projects_by_name: dict,
|
|
397
|
+
dm: dict) -> dict:
|
|
398
|
+
"""Core mint logic, shared by `mint` CLI and `wrap_text` library call.
|
|
399
|
+
|
|
400
|
+
HTTP-only: URL classification + UTM/booking target building happen here,
|
|
401
|
+
then the insert-or-reuse runs server-side via POST /api/v1/dm-links/mint.
|
|
402
|
+
There is no direct-Postgres path.
|
|
403
|
+
|
|
404
|
+
Returns one of:
|
|
405
|
+
{ok: True, code, short_url, target_url, kind, project, reused: bool}
|
|
406
|
+
{ok: False, error: "target_project_required", needed_project, url}
|
|
407
|
+
{ok: False, error: "no_primary_website", dm_id}
|
|
408
|
+
"""
|
|
409
|
+
target_url = _ensure_scheme((target_url or '').strip())
|
|
410
|
+
if not target_url or target_url == 'https://':
|
|
411
|
+
return {'ok': False, 'error': 'empty_url'}
|
|
412
|
+
|
|
413
|
+
platform = (dm.get('platform') or 'reddit').lower()
|
|
414
|
+
if platform == 'x':
|
|
415
|
+
platform = 'twitter'
|
|
416
|
+
|
|
417
|
+
kind, matched_project = _classify_url(target_url, projects)
|
|
418
|
+
|
|
419
|
+
# Target-project guard: if the URL maps to one of our projects, that project
|
|
420
|
+
# must already be in the DM's target_projects[]. The caller is expected to
|
|
421
|
+
# call set-target-project --append before retry. kind='other' bypasses.
|
|
422
|
+
target_projects = dm.get('target_projects') or []
|
|
423
|
+
if matched_project and matched_project not in target_projects:
|
|
424
|
+
return {
|
|
425
|
+
'ok': False,
|
|
426
|
+
'error': 'target_project_required',
|
|
427
|
+
'needed_project': matched_project,
|
|
428
|
+
'url': target_url,
|
|
429
|
+
'kind': kind,
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
# Wrapped hostname: use the DM's primary target_project website. Falls back
|
|
433
|
+
# to the matched_project's website if target_project is unset (rare, only on
|
|
434
|
+
# very fresh rows where set-project hasn't fired yet).
|
|
435
|
+
# If the project has `short_links_host` set in config.json, that overrides
|
|
436
|
+
# the wrapper hostname (used to route through a host WE operate, e.g.
|
|
437
|
+
# s4l.ai, when the customer's domain has no /r/<code> resolver).
|
|
438
|
+
primary = dm.get('target_project') or (matched_project if matched_project else None)
|
|
439
|
+
website = _project_website(projects, primary) if primary else None
|
|
440
|
+
if not website:
|
|
441
|
+
return {
|
|
442
|
+
'ok': False,
|
|
443
|
+
'error': 'no_primary_website',
|
|
444
|
+
'dm_id': dm_id,
|
|
445
|
+
'detail': f"no website for project={primary!r}; set target_project first",
|
|
446
|
+
}
|
|
447
|
+
wrapper_host = (_project_short_links_host(projects, primary) if primary else None) or website
|
|
448
|
+
|
|
449
|
+
final_target = _build_target_url(
|
|
450
|
+
target_url,
|
|
451
|
+
kind,
|
|
452
|
+
dm_id=dm_id,
|
|
453
|
+
project=matched_project,
|
|
454
|
+
platform=platform,
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# Insert-or-reuse server-side. The endpoint matches first on the FINAL
|
|
458
|
+
# target_url (post-UTM, what the unique index (dm_id, target_url) is on),
|
|
459
|
+
# then on the bare URL (covers rows minted before a given kind started
|
|
460
|
+
# UTM-stamping). It also stamps dms.booking_link_sent_at for kind='booking'.
|
|
461
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
462
|
+
from http_api import api_post
|
|
463
|
+
stamp_booking = bool(kind == 'booking' and not dm.get('booking_link_sent_at'))
|
|
464
|
+
for _ in range(8):
|
|
465
|
+
code = _gen_code()
|
|
466
|
+
try:
|
|
467
|
+
resp = api_post(
|
|
468
|
+
"/api/v1/dm-links/mint",
|
|
469
|
+
{
|
|
470
|
+
"dm_id": dm_id,
|
|
471
|
+
"code": code,
|
|
472
|
+
"target_url": final_target,
|
|
473
|
+
"bare_url": target_url if final_target != target_url else None,
|
|
474
|
+
"kind": kind,
|
|
475
|
+
"project_at_mint": matched_project,
|
|
476
|
+
"stamp_booking": stamp_booking,
|
|
477
|
+
},
|
|
478
|
+
ok_on_conflict=True,
|
|
479
|
+
)
|
|
480
|
+
except Exception as e:
|
|
481
|
+
return {'ok': False, 'error': 'mint_api_unreachable', 'detail': str(e)}
|
|
482
|
+
if resp and resp.get('ok'):
|
|
483
|
+
data = resp.get('data') or {}
|
|
484
|
+
ret_code = data.get('code') or code
|
|
485
|
+
return {
|
|
486
|
+
'ok': True,
|
|
487
|
+
'code': ret_code,
|
|
488
|
+
'short_url': f"{wrapper_host}/r/{ret_code}",
|
|
489
|
+
'target_url': final_target,
|
|
490
|
+
'kind': data.get('kind') or kind,
|
|
491
|
+
'project': matched_project,
|
|
492
|
+
'reused': bool(data.get('reused')),
|
|
493
|
+
}
|
|
494
|
+
e = (resp or {}).get('error') or {}
|
|
495
|
+
e_code = e.get('code') if isinstance(e, dict) else None
|
|
496
|
+
if e_code == 'code_collision':
|
|
497
|
+
continue # try another random code
|
|
498
|
+
return {'ok': False, 'error': e_code or 'mint_api_error'}
|
|
499
|
+
return {'ok': False, 'error': 'code_collision_after_8_tries'}
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
# ---- Library entry point used by reddit_browser.py / twitter_browser.py ----
|
|
503
|
+
|
|
504
|
+
def wrap_text(*, dm_id: int, text: str) -> dict:
|
|
505
|
+
"""Find every URL in `text`, mint each, substring-replace.
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
{ok: True, text: "<wrapped>", minted_codes: [...], skipped: [...]}
|
|
509
|
+
{ok: False, error: "...", url: "...", needed_project: "..." }
|
|
510
|
+
|
|
511
|
+
On a target_project_required error, the caller should set-target-project
|
|
512
|
+
--append the needed_project and retry. We DO NOT silently fall through —
|
|
513
|
+
refusing here is the whole point of the multi-project guard.
|
|
514
|
+
"""
|
|
515
|
+
if not text:
|
|
516
|
+
return {'ok': True, 'text': text, 'minted_codes': [], 'skipped': []}
|
|
517
|
+
|
|
518
|
+
projects = _load_projects()
|
|
519
|
+
projects_by_name = {p['name']: p for p in projects}
|
|
520
|
+
dm = _dm_row(dm_id)
|
|
521
|
+
seen = {} # original_url -> wrapped_url (dedup so identical URLs map once)
|
|
522
|
+
minted_codes = []
|
|
523
|
+
skipped = []
|
|
524
|
+
|
|
525
|
+
# Iterate matches in order, replace each. Trailing punctuation common in
|
|
526
|
+
# prose ("...github.com/foo.") is stripped from the URL before classify.
|
|
527
|
+
for m in list(_URL_RE.finditer(text)):
|
|
528
|
+
raw = m.group(0)
|
|
529
|
+
stripped = raw.rstrip(_TRAILING_PUNCT)
|
|
530
|
+
trailing = raw[len(stripped):]
|
|
531
|
+
if stripped in seen:
|
|
532
|
+
continue
|
|
533
|
+
|
|
534
|
+
# If the URL is already a wrapped /r/<code> on one of our domains,
|
|
535
|
+
# leave it alone. Recognized by path shape /r/<8 chars from alphabet>.
|
|
536
|
+
if re.search(r'/r/[a-z0-9]{4,32}(?:[/?#]|$)', stripped, re.IGNORECASE):
|
|
537
|
+
seen[stripped] = stripped
|
|
538
|
+
skipped.append({'url': stripped, 'reason': 'already_wrapped'})
|
|
539
|
+
continue
|
|
540
|
+
|
|
541
|
+
res = _mint_one(
|
|
542
|
+
dm_id=dm_id,
|
|
543
|
+
target_url=stripped,
|
|
544
|
+
projects=projects,
|
|
545
|
+
projects_by_name=projects_by_name,
|
|
546
|
+
dm=dm,
|
|
547
|
+
)
|
|
548
|
+
if not res.get('ok'):
|
|
549
|
+
return {**res, 'ok': False}
|
|
550
|
+
seen[stripped] = res['short_url']
|
|
551
|
+
if not res.get('reused'):
|
|
552
|
+
minted_codes.append(res['code'])
|
|
553
|
+
elif res.get('code'):
|
|
554
|
+
# Reused codes still surfaced so callers can backfill message_id.
|
|
555
|
+
minted_codes.append(res['code'])
|
|
556
|
+
|
|
557
|
+
if not seen:
|
|
558
|
+
return {'ok': True, 'text': text, 'minted_codes': [], 'skipped': skipped}
|
|
559
|
+
|
|
560
|
+
# Re-walk the text and substitute. Use the regex again to preserve
|
|
561
|
+
# trailing punctuation outside the URL (we stripped it before classify).
|
|
562
|
+
def _sub(m):
|
|
563
|
+
raw = m.group(0)
|
|
564
|
+
stripped = raw.rstrip(_TRAILING_PUNCT)
|
|
565
|
+
trailing = raw[len(stripped):]
|
|
566
|
+
wrapped = seen.get(stripped, stripped)
|
|
567
|
+
return wrapped + trailing
|
|
568
|
+
|
|
569
|
+
new_text = _URL_RE.sub(_sub, text)
|
|
570
|
+
return {
|
|
571
|
+
'ok': True,
|
|
572
|
+
'text': new_text,
|
|
573
|
+
'minted_codes': minted_codes,
|
|
574
|
+
'skipped': skipped,
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
# ---- Post-link library (parallel rail to DM, table=post_links) ----------
|
|
579
|
+
|
|
580
|
+
def _mint_one_post(*, target_url: str, projects: list, platform: str,
|
|
581
|
+
project_name: str, minted_session: str) -> dict:
|
|
582
|
+
"""Core mint logic for public posts. Mirrors _mint_one but writes to
|
|
583
|
+
post_links instead of dm_links, with post_id and reply_id BOTH NULL at
|
|
584
|
+
mint time (the caller backfills exactly one of them after log_post or
|
|
585
|
+
reply_db returns the row id).
|
|
586
|
+
|
|
587
|
+
Returns:
|
|
588
|
+
{ok: True, code, short_url, target_url, kind}
|
|
589
|
+
{ok: False, error: 'no_primary_website' | 'empty_url' | 'code_collision_after_8_tries'}
|
|
590
|
+
|
|
591
|
+
External-short-links path: if the project's config.json entry has
|
|
592
|
+
external_short_links=true, we don't mint a fresh code, we CLAIM one from
|
|
593
|
+
the pre-minted pool (post_links rows where minted_session starts with
|
|
594
|
+
'pool:' and post_id IS NULL and reply_id IS NULL). The pool exists so we
|
|
595
|
+
can hand the client a STATIC CSV they host on their own domain redirector;
|
|
596
|
+
if we minted fresh codes for these projects the CSV would go stale every
|
|
597
|
+
cycle. The pool's target_url is fixed at pool-mint time (homepage with
|
|
598
|
+
platform UTMs + code in utm_content), so the LLM's URL in the comment text
|
|
599
|
+
is ignored for routing -- visitors always land on the destination we baked
|
|
600
|
+
in. Pool depth managed by scripts/mint_external_pool.py.
|
|
601
|
+
|
|
602
|
+
HTTP-only: all DB ops run server-side via /api/v1/post-links/* (mint +
|
|
603
|
+
claim-pool). There is no direct-Postgres path and no fallback.
|
|
604
|
+
"""
|
|
605
|
+
target_url = _ensure_scheme((target_url or '').strip())
|
|
606
|
+
if not target_url or target_url == 'https://':
|
|
607
|
+
return {'ok': False, 'error': 'empty_url'}
|
|
608
|
+
|
|
609
|
+
kind, matched_project = _classify_url(target_url, projects)
|
|
610
|
+
|
|
611
|
+
# Wrapper hostname comes from the project we're posting AS, not from any
|
|
612
|
+
# URL classification. Posts always know which project they are for.
|
|
613
|
+
# If the project has `short_links_host` set in config.json (e.g. for clients
|
|
614
|
+
# whose own domain doesn't have a /r/<code> resolver), that overrides the
|
|
615
|
+
# wrapper hostname and routes through a host we operate (s4l.ai). The
|
|
616
|
+
# underlying target_url is unchanged; only the wrapper changes.
|
|
617
|
+
website = _project_website(projects, project_name)
|
|
618
|
+
if not website:
|
|
619
|
+
return {
|
|
620
|
+
'ok': False,
|
|
621
|
+
'error': 'no_primary_website',
|
|
622
|
+
'project': project_name,
|
|
623
|
+
'detail': f"no website for project={project_name!r} in config.json",
|
|
624
|
+
}
|
|
625
|
+
host_override = _project_short_links_host(projects, project_name)
|
|
626
|
+
wrapper_host = host_override or website
|
|
627
|
+
|
|
628
|
+
platform_norm = (platform or '').lower()
|
|
629
|
+
if platform_norm == 'x':
|
|
630
|
+
platform_norm = 'twitter'
|
|
631
|
+
|
|
632
|
+
project_cfg = next((p for p in projects if p.get('name') == project_name), None)
|
|
633
|
+
|
|
634
|
+
# UTM URL is the universal fallback — used when short_links_live=false on
|
|
635
|
+
# the project, OR when pool/mint can't produce a /r/<code> for any reason.
|
|
636
|
+
# No DB row is created in fallback mode; PostHog still attributes via
|
|
637
|
+
# utm_source/utm_campaign/utm_content=post_<minted_session>. The trade-off
|
|
638
|
+
# is losing the post_links → posts join until the operator flips
|
|
639
|
+
# short_links_live=true and the customer's redirector is live.
|
|
640
|
+
fallback_target = _build_target_url_for_post(
|
|
641
|
+
target_url,
|
|
642
|
+
kind,
|
|
643
|
+
minted_session=minted_session,
|
|
644
|
+
project=matched_project or project_name,
|
|
645
|
+
platform=platform,
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
def _utm_fallback(reason: str) -> dict:
|
|
649
|
+
return {
|
|
650
|
+
'ok': True,
|
|
651
|
+
'code': None,
|
|
652
|
+
'short_url': fallback_target,
|
|
653
|
+
'target_url': fallback_target,
|
|
654
|
+
'kind': kind,
|
|
655
|
+
'utm_only': True,
|
|
656
|
+
'fallback_reason': reason,
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
# Historically there was a UTM-fallback gate here for short_links_live=false
|
|
660
|
+
# projects, but _project_short_links_host now auto-returns DEFAULT_FALLBACK_HOST
|
|
661
|
+
# (s4l.ai) in that case, so we always have a live wrapper host and can mint.
|
|
662
|
+
# The remaining _utm_fallback paths below are runtime failures of the mint
|
|
663
|
+
# API / pool itself, where UTM is the genuine last resort.
|
|
664
|
+
|
|
665
|
+
# Opt-in policy override: a project may set `force_utm_only: true` in
|
|
666
|
+
# config.json to deliberately post UTM-tagged bare URLs instead of minting
|
|
667
|
+
# a /r/<code> short link. This re-opens (per-project, explicitly) the path
|
|
668
|
+
# that was globally closed on 2026-05-22. Trade-off: no /r/<code> means no
|
|
669
|
+
# post_links row and no first-party post_link_clicks join; attribution still
|
|
670
|
+
# works via the baked-in UTM scheme (utm_source/campaign/term/content) that
|
|
671
|
+
# _build_target_url_for_post already applied to `fallback_target`.
|
|
672
|
+
if project_cfg and project_cfg.get('force_utm_only'):
|
|
673
|
+
return _utm_fallback('policy')
|
|
674
|
+
|
|
675
|
+
if project_cfg and project_cfg.get('external_short_links'):
|
|
676
|
+
# Pool path. Atomically claim the oldest unclaimed pool row server-side.
|
|
677
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
678
|
+
from http_api import api_post
|
|
679
|
+
try:
|
|
680
|
+
resp = api_post(
|
|
681
|
+
"/api/v1/post-links/claim-pool",
|
|
682
|
+
{
|
|
683
|
+
"project_name": project_name,
|
|
684
|
+
"platform": platform_norm,
|
|
685
|
+
"minted_session": minted_session,
|
|
686
|
+
},
|
|
687
|
+
ok_on_conflict=True,
|
|
688
|
+
)
|
|
689
|
+
except Exception:
|
|
690
|
+
return _utm_fallback('api_unreachable')
|
|
691
|
+
if not resp or not resp.get('ok'):
|
|
692
|
+
err = (resp or {}).get('error') or {}
|
|
693
|
+
err_code = err.get('code') if isinstance(err, dict) else None
|
|
694
|
+
return _utm_fallback(err_code or 'pool_exhausted')
|
|
695
|
+
data = resp.get('data') or {}
|
|
696
|
+
pool_code = data.get('code')
|
|
697
|
+
pool_target = data.get('target_url')
|
|
698
|
+
return {
|
|
699
|
+
'ok': True,
|
|
700
|
+
'code': pool_code,
|
|
701
|
+
'short_url': f"{wrapper_host}/r/{pool_code}",
|
|
702
|
+
'target_url': pool_target,
|
|
703
|
+
'kind': 'website',
|
|
704
|
+
'from_pool': True,
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
# Fresh mint: try up to 8 random codes before giving up on collision.
|
|
708
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
709
|
+
from http_api import api_post
|
|
710
|
+
for _ in range(8):
|
|
711
|
+
code = _gen_code()
|
|
712
|
+
try:
|
|
713
|
+
resp = api_post(
|
|
714
|
+
"/api/v1/post-links/mint",
|
|
715
|
+
{
|
|
716
|
+
"code": code,
|
|
717
|
+
"platform": platform,
|
|
718
|
+
"project_name": project_name,
|
|
719
|
+
"target_url": fallback_target,
|
|
720
|
+
"kind": kind,
|
|
721
|
+
"project_at_mint": matched_project,
|
|
722
|
+
"minted_session": minted_session,
|
|
723
|
+
},
|
|
724
|
+
ok_on_conflict=True,
|
|
725
|
+
)
|
|
726
|
+
except Exception:
|
|
727
|
+
return _utm_fallback('api_unreachable')
|
|
728
|
+
if resp and resp.get('ok'):
|
|
729
|
+
return {
|
|
730
|
+
'ok': True,
|
|
731
|
+
'code': code,
|
|
732
|
+
'short_url': f"{wrapper_host}/r/{code}",
|
|
733
|
+
'target_url': fallback_target,
|
|
734
|
+
'kind': kind,
|
|
735
|
+
}
|
|
736
|
+
err = (resp or {}).get('error') or {}
|
|
737
|
+
err_code = err.get('code') if isinstance(err, dict) else None
|
|
738
|
+
if err_code == 'code_collision':
|
|
739
|
+
continue # try another random code
|
|
740
|
+
return _utm_fallback(err_code or 'mint_api_error')
|
|
741
|
+
return _utm_fallback('code_collision_after_8_tries')
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
def wrap_text_for_post(*, text: str, platform: str, project_name: str) -> dict:
|
|
745
|
+
"""Find every URL in `text`, mint into post_links, substring-replace.
|
|
746
|
+
|
|
747
|
+
Returns:
|
|
748
|
+
{ok: True, text: <wrapped>, minted_session, codes: [...], skipped: [...]}
|
|
749
|
+
{ok: False, error: ..., url: ...}
|
|
750
|
+
|
|
751
|
+
minted_session is a UUID the caller MUST pass to backfill_post_id /
|
|
752
|
+
backfill_reply_id once the platform call returns the row id from
|
|
753
|
+
log_post.py or reply_db.py. If the platform call fails, the codes are
|
|
754
|
+
orphaned (post_id and reply_id stay NULL); they still resolve correctly
|
|
755
|
+
via target_url frozen at mint time, just without attribution.
|
|
756
|
+
|
|
757
|
+
Normalize platform: 'x' is collapsed to 'twitter' so analytics joins
|
|
758
|
+
against posts.platform line up.
|
|
759
|
+
"""
|
|
760
|
+
if not text:
|
|
761
|
+
return {'ok': True, 'text': text, 'minted_session': None,
|
|
762
|
+
'codes': [], 'skipped': []}
|
|
763
|
+
|
|
764
|
+
platform = (platform or '').lower()
|
|
765
|
+
if platform == 'x':
|
|
766
|
+
platform = 'twitter'
|
|
767
|
+
|
|
768
|
+
minted_session = str(uuid.uuid4())
|
|
769
|
+
projects = _load_projects()
|
|
770
|
+
seen = {}
|
|
771
|
+
codes = []
|
|
772
|
+
skipped = []
|
|
773
|
+
|
|
774
|
+
for m in list(_URL_RE.finditer(text)):
|
|
775
|
+
raw = m.group(0)
|
|
776
|
+
stripped = raw.rstrip(_TRAILING_PUNCT)
|
|
777
|
+
if stripped in seen:
|
|
778
|
+
continue
|
|
779
|
+
|
|
780
|
+
# Already-wrapped /r/<code> on one of our domains: leave alone.
|
|
781
|
+
if re.search(r'/r/[a-z0-9]{4,32}(?:[/?#]|$)', stripped, re.IGNORECASE):
|
|
782
|
+
seen[stripped] = stripped
|
|
783
|
+
skipped.append({'url': stripped, 'reason': 'already_wrapped'})
|
|
784
|
+
continue
|
|
785
|
+
|
|
786
|
+
res = _mint_one_post(
|
|
787
|
+
target_url=stripped,
|
|
788
|
+
projects=projects,
|
|
789
|
+
platform=platform,
|
|
790
|
+
project_name=project_name,
|
|
791
|
+
minted_session=minted_session,
|
|
792
|
+
)
|
|
793
|
+
if not res.get('ok'):
|
|
794
|
+
return {**res, 'ok': False}
|
|
795
|
+
seen[stripped] = res['short_url']
|
|
796
|
+
if res.get('code') is not None:
|
|
797
|
+
codes.append(res['code'])
|
|
798
|
+
else:
|
|
799
|
+
# UTM-only fallback (no /r/<code>): track in skipped[] so the
|
|
800
|
+
# caller's logging doesn't see [None] in codes[] but still has
|
|
801
|
+
# visibility into how the URL was handled.
|
|
802
|
+
skipped.append({'url': stripped, 'reason': 'utm_fallback',
|
|
803
|
+
'detail': res.get('fallback_reason')})
|
|
804
|
+
|
|
805
|
+
if not seen:
|
|
806
|
+
return {'ok': True, 'text': text, 'minted_session': None,
|
|
807
|
+
'codes': [], 'skipped': skipped}
|
|
808
|
+
|
|
809
|
+
def _sub(m):
|
|
810
|
+
raw = m.group(0)
|
|
811
|
+
stripped = raw.rstrip(_TRAILING_PUNCT)
|
|
812
|
+
trailing = raw[len(stripped):]
|
|
813
|
+
wrapped = seen.get(stripped, stripped)
|
|
814
|
+
return wrapped + trailing
|
|
815
|
+
|
|
816
|
+
new_text = _URL_RE.sub(_sub, text)
|
|
817
|
+
return {
|
|
818
|
+
'ok': True,
|
|
819
|
+
'text': new_text,
|
|
820
|
+
'minted_session': minted_session,
|
|
821
|
+
'codes': codes,
|
|
822
|
+
'skipped': skipped,
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
def _backfill_via_api(*, minted_session: str, post_id: int | None = None,
|
|
827
|
+
reply_id: int | None = None) -> int:
|
|
828
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
829
|
+
from http_api import api_post
|
|
830
|
+
body: dict = {"minted_session": minted_session}
|
|
831
|
+
if post_id is not None:
|
|
832
|
+
body["post_id"] = int(post_id)
|
|
833
|
+
if reply_id is not None:
|
|
834
|
+
body["reply_id"] = int(reply_id)
|
|
835
|
+
try:
|
|
836
|
+
resp = api_post("/api/v1/post-links/backfill", body)
|
|
837
|
+
except Exception:
|
|
838
|
+
return 0
|
|
839
|
+
if not resp or not resp.get('ok'):
|
|
840
|
+
return 0
|
|
841
|
+
return int((resp.get('data') or {}).get('updated') or 0)
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
def backfill_post_id(*, minted_session: str, post_id: int) -> int:
|
|
845
|
+
"""Stamp post_links.post_id for every code minted under minted_session.
|
|
846
|
+
|
|
847
|
+
Returns the rowcount affected. Safe to call multiple times (idempotent).
|
|
848
|
+
Caller should NOT raise on rowcount==0 because some posts have no URLs
|
|
849
|
+
and minted_session was None — the caller should skip the backfill in
|
|
850
|
+
that case.
|
|
851
|
+
|
|
852
|
+
HTTP-only: routes through /api/v1/post-links/backfill. There is no
|
|
853
|
+
direct-Postgres path and no fallback.
|
|
854
|
+
"""
|
|
855
|
+
if not minted_session or post_id is None:
|
|
856
|
+
return 0
|
|
857
|
+
return _backfill_via_api(minted_session=minted_session, post_id=post_id)
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
def backfill_reply_id(*, minted_session: str, reply_id: int) -> int:
|
|
861
|
+
"""Same as backfill_post_id but stamps post_links.reply_id (engage_reddit
|
|
862
|
+
writes to the `replies` table, not `posts`). HTTP-only."""
|
|
863
|
+
if not minted_session or reply_id is None:
|
|
864
|
+
return 0
|
|
865
|
+
return _backfill_via_api(minted_session=minted_session, reply_id=reply_id)
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
# ---- CLI subcommands ----
|
|
869
|
+
|
|
870
|
+
def cmd_mint(args):
|
|
871
|
+
projects = _load_projects()
|
|
872
|
+
projects_by_name = {p['name']: p for p in projects}
|
|
873
|
+
dm = _dm_row(args.dm_id)
|
|
874
|
+
res = _mint_one(
|
|
875
|
+
dm_id=args.dm_id,
|
|
876
|
+
target_url=args.target_url,
|
|
877
|
+
projects=projects,
|
|
878
|
+
projects_by_name=projects_by_name,
|
|
879
|
+
dm=dm,
|
|
880
|
+
)
|
|
881
|
+
if not res.get('ok'):
|
|
882
|
+
sys.stderr.write(json.dumps(res) + '\n')
|
|
883
|
+
sys.exit(2)
|
|
884
|
+
if args.json:
|
|
885
|
+
print(json.dumps(res))
|
|
886
|
+
else:
|
|
887
|
+
print(res['short_url'])
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
# Bot User-Agent regex. Matches Twitter card prefetch, LinkedIn unfurl,
|
|
891
|
+
# Slack/Discord/Telegram/WhatsApp link previews, generic Google/Bing crawlers,
|
|
892
|
+
# and Pinterest/Embedly/Snapchat. We discovered 97 percent of /r/<code> hits
|
|
893
|
+
# fired within 30 seconds of mint, average 17s, which is the link-preview
|
|
894
|
+
# fingerprint. Real human ratio cross-referenced against PostHog pageviews
|
|
895
|
+
# was 5-8 percent. When a UA matches:
|
|
896
|
+
# 1. Skip the legacy `clicks` counter increment (so post-2026-05-07 the
|
|
897
|
+
# legacy column is humans-only).
|
|
898
|
+
# 2. Skip the [CLICK_SIGNAL] insert into dm_messages so the engage pipeline
|
|
899
|
+
# isn't woken up by a Slackbot.
|
|
900
|
+
# 3. Still log a row in dm_link_clicks with is_bot=true so historical
|
|
901
|
+
# splits stay accurate.
|
|
902
|
+
# 4. Still return target_url so previews render.
|
|
903
|
+
import hashlib
|
|
904
|
+
import re
|
|
905
|
+
BOT_UA_RE = re.compile(
|
|
906
|
+
r'bot|crawler|spider|Twitterbot|LinkedInBot|Slackbot|facebookexternalhit'
|
|
907
|
+
r'|Discordbot|TelegramBot|WhatsApp|Applebot|Googlebot|Bingbot|YandexBot'
|
|
908
|
+
r'|DuckDuckBot|redditbot|Pinterest|Embedly|Snapchat',
|
|
909
|
+
re.IGNORECASE,
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
def cmd_resolve(args):
|
|
914
|
+
# HTTP-only: bot detection + IP hashing happen here; the click logging and
|
|
915
|
+
# join read run server-side via POST /api/v1/dm-links/resolve. There is no
|
|
916
|
+
# direct-Postgres path.
|
|
917
|
+
ua = (getattr(args, 'user_agent', '') or '').strip()
|
|
918
|
+
referrer = (getattr(args, 'referrer', '') or '').strip() or None
|
|
919
|
+
is_bot = bool(ua and BOT_UA_RE.search(ua))
|
|
920
|
+
ip_raw = (getattr(args, 'ip', '') or '').strip()
|
|
921
|
+
ip_hash = (
|
|
922
|
+
hashlib.sha256(ip_raw.encode('utf-8')).hexdigest()[:16]
|
|
923
|
+
if ip_raw else None
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
927
|
+
from http_api import api_post
|
|
928
|
+
resp = api_post(
|
|
929
|
+
"/api/v1/dm-links/resolve",
|
|
930
|
+
{
|
|
931
|
+
"code": args.code,
|
|
932
|
+
"no_count": bool(args.no_count),
|
|
933
|
+
"is_bot": is_bot,
|
|
934
|
+
"ip_hash": ip_hash,
|
|
935
|
+
"user_agent": ua or None,
|
|
936
|
+
"referrer": referrer,
|
|
937
|
+
},
|
|
938
|
+
ok_on_404=True,
|
|
939
|
+
)
|
|
940
|
+
if not resp or not resp.get('ok'):
|
|
941
|
+
print(json.dumps({'error': 'not_found', 'code': args.code}))
|
|
942
|
+
return
|
|
943
|
+
data = resp.get('data') or {}
|
|
944
|
+
print(json.dumps({
|
|
945
|
+
'dm_id': data.get('dm_id'),
|
|
946
|
+
'platform': data.get('platform'),
|
|
947
|
+
'project': data.get('project'),
|
|
948
|
+
'kind': data.get('kind'),
|
|
949
|
+
'target_url': data.get('target_url'),
|
|
950
|
+
'is_bot': data.get('is_bot', is_bot),
|
|
951
|
+
}))
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
def cmd_wrap_text(args):
|
|
955
|
+
res = wrap_text(dm_id=args.dm_id, text=args.text)
|
|
956
|
+
if not res.get('ok'):
|
|
957
|
+
sys.stderr.write(json.dumps(res) + '\n')
|
|
958
|
+
sys.exit(2)
|
|
959
|
+
if args.json:
|
|
960
|
+
print(json.dumps(res))
|
|
961
|
+
else:
|
|
962
|
+
# Stdout is the wrapped text only — ready to pipe into a `send` command
|
|
963
|
+
# or a shell variable. Diagnostics go to stderr.
|
|
964
|
+
if res.get('minted_codes') or res.get('skipped'):
|
|
965
|
+
sys.stderr.write(json.dumps({
|
|
966
|
+
'minted_codes': res['minted_codes'],
|
|
967
|
+
'skipped': res['skipped'],
|
|
968
|
+
}) + '\n')
|
|
969
|
+
sys.stdout.write(res['text'])
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
def cmd_wrap_post_text(args):
|
|
973
|
+
res = wrap_text_for_post(text=args.text, platform=args.platform,
|
|
974
|
+
project_name=args.project)
|
|
975
|
+
if not res.get('ok'):
|
|
976
|
+
sys.stderr.write(json.dumps(res) + '\n')
|
|
977
|
+
sys.exit(2)
|
|
978
|
+
# JSON envelope is the default for the post path because callers always
|
|
979
|
+
# need minted_session for the backfill UPDATE. The shell scripts that
|
|
980
|
+
# consume this WILL parse JSON.
|
|
981
|
+
print(json.dumps(res))
|
|
982
|
+
|
|
983
|
+
|
|
984
|
+
def cmd_utm_text(args):
|
|
985
|
+
"""UTM-only wrap (no DB, no minting). Prints the wrapped text on stdout.
|
|
986
|
+
Used by the Twitter engagement prompt where Claude types the reply through
|
|
987
|
+
the browser MCP (twitter-harness bh_run type_text) and there is no Python
|
|
988
|
+
posting layer to invoke wrap_text_for_post. The typed URL itself carries all attribution
|
|
989
|
+
via utm_source=s4l + utm_term=<platform>; PostHog captures it on landing.
|
|
990
|
+
"""
|
|
991
|
+
out = utm_only_text(text=args.text, platform=args.platform,
|
|
992
|
+
project_name=args.project)
|
|
993
|
+
sys.stdout.write(out)
|
|
994
|
+
|
|
995
|
+
|
|
996
|
+
def cmd_backfill_post(args):
|
|
997
|
+
n = backfill_post_id(minted_session=args.minted_session, post_id=args.post_id)
|
|
998
|
+
print(json.dumps({'backfilled': n, 'post_id': args.post_id,
|
|
999
|
+
'minted_session': args.minted_session}))
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
def cmd_backfill_reply(args):
|
|
1003
|
+
n = backfill_reply_id(minted_session=args.minted_session, reply_id=args.reply_id)
|
|
1004
|
+
print(json.dumps({'backfilled': n, 'reply_id': args.reply_id,
|
|
1005
|
+
'minted_session': args.minted_session}))
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
def main():
|
|
1009
|
+
ap = argparse.ArgumentParser(description=__doc__)
|
|
1010
|
+
sub = ap.add_subparsers(dest='cmd', required=True)
|
|
1011
|
+
|
|
1012
|
+
p_mint = sub.add_parser('mint', help='Mint (or reuse) a wrapped /r/<code> short link for one URL')
|
|
1013
|
+
p_mint.add_argument('--dm-id', type=int, required=True)
|
|
1014
|
+
p_mint.add_argument('--target-url', required=True)
|
|
1015
|
+
p_mint.add_argument('--json', action='store_true', help='Print full JSON envelope')
|
|
1016
|
+
|
|
1017
|
+
p_res = sub.add_parser('resolve', help='Look up code, increment clicks, return target URL')
|
|
1018
|
+
p_res.add_argument('--code', required=True)
|
|
1019
|
+
p_res.add_argument('--no-count', action='store_true', help='Skip click counter update (debugging)')
|
|
1020
|
+
# Bot detection inputs. When --user-agent matches the bot regex (Twitterbot,
|
|
1021
|
+
# LinkedInBot, Slackbot, facebookexternalhit, etc.), the legacy clicks
|
|
1022
|
+
# counter is NOT bumped, [CLICK_SIGNAL] is NOT inserted, but a row IS
|
|
1023
|
+
# appended to dm_link_clicks with is_bot=true so historical splits work.
|
|
1024
|
+
p_res.add_argument('--user-agent', default='', help='Caller User-Agent for bot detection')
|
|
1025
|
+
p_res.add_argument('--referrer', default='', help='Caller Referer header for analytics')
|
|
1026
|
+
p_res.add_argument('--ip', default='', help='Caller IP (sha256 hashed before storage)')
|
|
1027
|
+
|
|
1028
|
+
p_wrap = sub.add_parser('wrap-text', help='Wrap every URL in TEXT through the mint pipeline')
|
|
1029
|
+
p_wrap.add_argument('--dm-id', type=int, required=True)
|
|
1030
|
+
p_wrap.add_argument('--text', required=True)
|
|
1031
|
+
p_wrap.add_argument('--json', action='store_true', help='Print full JSON envelope to stdout')
|
|
1032
|
+
|
|
1033
|
+
p_wrap_post = sub.add_parser('wrap-post-text',
|
|
1034
|
+
help='Wrap URLs in a public post/comment text. '
|
|
1035
|
+
'Mints into post_links with NULL post_id; '
|
|
1036
|
+
'backfill via backfill-post or backfill-reply.')
|
|
1037
|
+
p_wrap_post.add_argument('--text', required=True)
|
|
1038
|
+
p_wrap_post.add_argument('--platform', required=True,
|
|
1039
|
+
choices=['reddit', 'twitter', 'x', 'linkedin', 'github_issues', 'github', 'moltbook'])
|
|
1040
|
+
p_wrap_post.add_argument('--project', required=True,
|
|
1041
|
+
help='project_name from config.json (drives wrapper hostname)')
|
|
1042
|
+
|
|
1043
|
+
p_utm = sub.add_parser('utm-text',
|
|
1044
|
+
help='UTM-only wrap (no DB write). Replaces every URL '
|
|
1045
|
+
'in --text with its UTM-tagged version and prints '
|
|
1046
|
+
'the result on stdout. Use when no Python posting '
|
|
1047
|
+
'layer is available (Claude-driven MCP typing).')
|
|
1048
|
+
p_utm.add_argument('--text', required=True)
|
|
1049
|
+
p_utm.add_argument('--platform', required=True,
|
|
1050
|
+
choices=['reddit', 'twitter', 'x', 'linkedin', 'github_issues', 'github', 'moltbook'])
|
|
1051
|
+
p_utm.add_argument('--project', required=True,
|
|
1052
|
+
help='project_name from config.json (drives utm_campaign + wrapper hostname classification)')
|
|
1053
|
+
|
|
1054
|
+
p_bp = sub.add_parser('backfill-post',
|
|
1055
|
+
help='Stamp post_links.post_id for every code minted '
|
|
1056
|
+
'under --minted-session. Idempotent.')
|
|
1057
|
+
p_bp.add_argument('--minted-session', required=True)
|
|
1058
|
+
p_bp.add_argument('--post-id', type=int, required=True)
|
|
1059
|
+
|
|
1060
|
+
p_br = sub.add_parser('backfill-reply',
|
|
1061
|
+
help='Stamp post_links.reply_id for every code minted '
|
|
1062
|
+
'under --minted-session. Idempotent.')
|
|
1063
|
+
p_br.add_argument('--minted-session', required=True)
|
|
1064
|
+
p_br.add_argument('--reply-id', type=int, required=True)
|
|
1065
|
+
|
|
1066
|
+
args = ap.parse_args()
|
|
1067
|
+
if args.cmd == 'mint':
|
|
1068
|
+
cmd_mint(args)
|
|
1069
|
+
elif args.cmd == 'resolve':
|
|
1070
|
+
cmd_resolve(args)
|
|
1071
|
+
elif args.cmd == 'wrap-text':
|
|
1072
|
+
cmd_wrap_text(args)
|
|
1073
|
+
elif args.cmd == 'wrap-post-text':
|
|
1074
|
+
cmd_wrap_post_text(args)
|
|
1075
|
+
elif args.cmd == 'utm-text':
|
|
1076
|
+
cmd_utm_text(args)
|
|
1077
|
+
elif args.cmd == 'backfill-post':
|
|
1078
|
+
cmd_backfill_post(args)
|
|
1079
|
+
elif args.cmd == 'backfill-reply':
|
|
1080
|
+
cmd_backfill_reply(args)
|
|
1081
|
+
|
|
1082
|
+
|
|
1083
|
+
if __name__ == '__main__':
|
|
1084
|
+
main()
|