@m13v/s4l 1.6.197-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1336 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +513 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""LinkedIn URL helpers: ID extraction, canonicalization, dedup checks.
|
|
3
|
+
|
|
4
|
+
LinkedIn surfaces the same post under multiple URL shapes:
|
|
5
|
+
/feed/update/urn:li:activity:<19-digit-activity-id>/[?commentUrn=...]
|
|
6
|
+
/posts/<author-slug>_<keywords>-activity-<19-digit-id>-<5-char-suffix>
|
|
7
|
+
/posts/<author-slug>_<keywords>-share-<19-digit-id>-<5-char-suffix>
|
|
8
|
+
/posts/<author-slug>_<keywords>-ugcPost-<19-digit-id>-<5-char-suffix>
|
|
9
|
+
|
|
10
|
+
The activity URN, share URN, and ugcPost URN for the same logical post are
|
|
11
|
+
DIFFERENT numbers, so canonicalizing to one form by string transform is not
|
|
12
|
+
possible. The pragmatic fix: extract every 16-19 digit ID from a URL and
|
|
13
|
+
treat the SET of IDs as the post identity. Two URLs collide if any ID
|
|
14
|
+
overlaps. (Across our DB this matches because the comment-permalink
|
|
15
|
+
captured after posting always carries the activity URN, so day-2 logging
|
|
16
|
+
under /posts/...-share-<X>-... still has our_url=/feed/update/...activity:<Y>
|
|
17
|
+
where Y matches day-1's stored thread_url ID.)
|
|
18
|
+
|
|
19
|
+
CLI:
|
|
20
|
+
python3 scripts/linkedin_url.py --extract URL
|
|
21
|
+
python3 scripts/linkedin_url.py --canonicalize URL
|
|
22
|
+
python3 scripts/linkedin_url.py --check-engaged URL
|
|
23
|
+
Exits 0 if the URL has any ID overlap with an existing
|
|
24
|
+
platform='linkedin' row. Prints JSON with {engaged, ids, match}.
|
|
25
|
+
python3 scripts/linkedin_url.py --check-self-author URL_OR_SLUG
|
|
26
|
+
Exits 0 if the author profile URL/slug matches one of our own
|
|
27
|
+
LinkedIn accounts (we should never comment on our own posts).
|
|
28
|
+
Exits 1 otherwise. Prints JSON with {input, slug, self}.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import argparse
|
|
32
|
+
import json
|
|
33
|
+
import os
|
|
34
|
+
import re
|
|
35
|
+
import sys
|
|
36
|
+
import urllib.parse
|
|
37
|
+
|
|
38
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
39
|
+
from http_api import api_get
|
|
40
|
+
|
|
41
|
+
ID_RE = re.compile(r"\b(\d{16,19})\b")
|
|
42
|
+
ACTIVITY_URN_RE = re.compile(r"urn:li:activity:(\d{16,19})", re.IGNORECASE)
|
|
43
|
+
|
|
44
|
+
# LinkedIn public profile slugs we own. Author URL match against this set
|
|
45
|
+
# means "this is our own post; skip". Add any future account here.
|
|
46
|
+
SELF_LINKEDIN_SLUGS = {"m13v"}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def extract_slug(author_url_or_slug):
|
|
50
|
+
"""Pull the public profile slug from a LinkedIn author identifier.
|
|
51
|
+
|
|
52
|
+
Accepts any of:
|
|
53
|
+
'https://www.linkedin.com/in/m13v/'
|
|
54
|
+
'https://www.linkedin.com/in/m13v'
|
|
55
|
+
'/in/m13v/'
|
|
56
|
+
'm13v'
|
|
57
|
+
Returns the lowercase slug, or '' if nothing parseable.
|
|
58
|
+
"""
|
|
59
|
+
if not author_url_or_slug:
|
|
60
|
+
return ""
|
|
61
|
+
s = urllib.parse.unquote(author_url_or_slug.strip()).lower().rstrip("/")
|
|
62
|
+
m = re.search(r"/in/([a-z0-9\-_]+)", s)
|
|
63
|
+
if m:
|
|
64
|
+
return m.group(1)
|
|
65
|
+
if re.fullmatch(r"[a-z0-9\-_]+", s):
|
|
66
|
+
return s
|
|
67
|
+
return ""
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def is_self_author(author_url_or_slug):
|
|
71
|
+
"""True if the given author URL/slug is one of our own LinkedIn
|
|
72
|
+
accounts. Used to skip posts authored by us during pipeline discovery."""
|
|
73
|
+
return extract_slug(author_url_or_slug) in SELF_LINKEDIN_SLUGS
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def extract_ids(url):
|
|
77
|
+
"""Return ordered, deduped list of 16-19 digit IDs found in the URL.
|
|
78
|
+
|
|
79
|
+
Catches activity URNs, share URNs, ugcPost URNs, and comment URNs
|
|
80
|
+
regardless of where they sit in the path or query string. Decodes
|
|
81
|
+
percent-encoded URNs first so commentUrn=urn%3Ali%3Aactivity%3A...
|
|
82
|
+
contributes its IDs too.
|
|
83
|
+
"""
|
|
84
|
+
if not url:
|
|
85
|
+
return []
|
|
86
|
+
decoded = urllib.parse.unquote(url)
|
|
87
|
+
seen = []
|
|
88
|
+
for m in ID_RE.finditer(decoded):
|
|
89
|
+
v = m.group(1)
|
|
90
|
+
if v not in seen:
|
|
91
|
+
seen.append(v)
|
|
92
|
+
return seen
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def canonicalize(url):
|
|
96
|
+
"""Return a canonical /feed/update/urn:li:activity:<id>/ form when we
|
|
97
|
+
can find an explicit activity URN in the URL. Otherwise return the URL
|
|
98
|
+
with query+fragment stripped. Used for the our_url column so the
|
|
99
|
+
activity-comment permalink doesn't drift between runs."""
|
|
100
|
+
if not url:
|
|
101
|
+
return url
|
|
102
|
+
decoded = urllib.parse.unquote(url)
|
|
103
|
+
m = ACTIVITY_URN_RE.search(decoded)
|
|
104
|
+
if m:
|
|
105
|
+
return f"https://www.linkedin.com/feed/update/urn:li:activity:{m.group(1)}/"
|
|
106
|
+
# Strip query+fragment as a fallback — keeps /posts/... slugs stable but
|
|
107
|
+
# drops tracking params.
|
|
108
|
+
parsed = urllib.parse.urlsplit(url)
|
|
109
|
+
return urllib.parse.urlunsplit((parsed.scheme, parsed.netloc, parsed.path, "", ""))
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def find_existing_engagement(ids):
|
|
113
|
+
"""Given a list of LinkedIn IDs, return the first existing posts row
|
|
114
|
+
that mentions any of them in posts.urns (primary, GIN-indexed) OR in
|
|
115
|
+
thread_url / our_url (fallback for any row missed by backfill).
|
|
116
|
+
|
|
117
|
+
The urns array path catches the case where the same logical post
|
|
118
|
+
surfaces under different URN forms: search-page DOM exposes the
|
|
119
|
+
ugcPost URN, but our DB stored only the activity URN. Storing every
|
|
120
|
+
URN we ever see for a post into posts.urns means a single
|
|
121
|
+
``urns && ARRAY[...]`` overlap query catches the collision regardless
|
|
122
|
+
of which URN form the candidate page rendered.
|
|
123
|
+
|
|
124
|
+
Returns None if no overlap, else a dict with keys post_id, posted_at,
|
|
125
|
+
thread_url, our_url, our_account.
|
|
126
|
+
|
|
127
|
+
Migrated 2026-06-01 to the s4l.ai HTTP API
|
|
128
|
+
(GET /api/v1/linkedin-engaged?ids=...). The collision query (urns
|
|
129
|
+
overlap + thread_url/our_url substring) now runs server-side; no
|
|
130
|
+
DATABASE_URL needed.
|
|
131
|
+
"""
|
|
132
|
+
if not ids:
|
|
133
|
+
return None
|
|
134
|
+
resp = api_get("/api/v1/linkedin-engaged", {"ids": ",".join(ids)})
|
|
135
|
+
data = resp.get("data") or {}
|
|
136
|
+
if not data.get("engaged"):
|
|
137
|
+
return None
|
|
138
|
+
return data.get("match")
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def get_engaged_ids():
|
|
142
|
+
"""Return a sorted list of every LinkedIn ID we've engaged with
|
|
143
|
+
(anything 16-19 digits found in thread_url or our_url for
|
|
144
|
+
platform='linkedin'). Used to brief the LLM in run-linkedin.sh.
|
|
145
|
+
|
|
146
|
+
Migrated 2026-06-01: the API returns the raw (thread_url, our_url)
|
|
147
|
+
pairs; the canonical ID extraction (extract_ids) stays single-sourced
|
|
148
|
+
here in Python rather than being re-implemented as a Postgres regexp.
|
|
149
|
+
"""
|
|
150
|
+
resp = api_get("/api/v1/linkedin-engaged", {"list_urls": 1})
|
|
151
|
+
rows = (resp.get("data") or {}).get("urls") or []
|
|
152
|
+
ids = set()
|
|
153
|
+
for row in rows:
|
|
154
|
+
for v in extract_ids(row.get("thread_url") or ""):
|
|
155
|
+
ids.add(v)
|
|
156
|
+
for v in extract_ids(row.get("our_url") or ""):
|
|
157
|
+
ids.add(v)
|
|
158
|
+
return sorted(ids)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def main():
|
|
162
|
+
parser = argparse.ArgumentParser()
|
|
163
|
+
parser.add_argument("--extract", help="Print all IDs found in URL")
|
|
164
|
+
parser.add_argument("--canonicalize", help="Print the canonical form of URL")
|
|
165
|
+
parser.add_argument("--check-engaged", help="Check if URL collides with any "
|
|
166
|
+
"existing linkedin row. Exits 0 on collision, 1 otherwise.")
|
|
167
|
+
parser.add_argument("--check-engaged-ids", help="Comma- or whitespace-separated "
|
|
168
|
+
"list of LinkedIn URN IDs (16-19 digits each) extracted "
|
|
169
|
+
"from a candidate post's DOM. Pre-comment dedup primary path: "
|
|
170
|
+
"the URL bar may only carry the share URN while our DB rows "
|
|
171
|
+
"store the activity URN, so the browser-side script must "
|
|
172
|
+
"walk componentkey/data-testid for ALL URNs and pipe them in. "
|
|
173
|
+
"Exits 0 on collision, 1 otherwise.")
|
|
174
|
+
parser.add_argument("--list-engaged-ids", action="store_true",
|
|
175
|
+
help="Print every linkedin ID we've engaged with, one per line.")
|
|
176
|
+
parser.add_argument("--check-self-author", help="Author profile URL or "
|
|
177
|
+
"public-ID slug from a candidate post. Exits 0 if it "
|
|
178
|
+
"matches one of our own LinkedIn accounts (skip the "
|
|
179
|
+
"post), 1 otherwise (proceed). Pre-comment guard so "
|
|
180
|
+
"the pipeline doesn't comment on Matthew's own posts "
|
|
181
|
+
"when search results surface them.")
|
|
182
|
+
args = parser.parse_args()
|
|
183
|
+
|
|
184
|
+
if args.extract:
|
|
185
|
+
print(json.dumps(extract_ids(args.extract)))
|
|
186
|
+
return
|
|
187
|
+
if args.canonicalize:
|
|
188
|
+
print(canonicalize(args.canonicalize))
|
|
189
|
+
return
|
|
190
|
+
if args.check_engaged:
|
|
191
|
+
ids = extract_ids(args.check_engaged)
|
|
192
|
+
match = find_existing_engagement(ids)
|
|
193
|
+
out = {"url": args.check_engaged, "ids": ids, "engaged": bool(match)}
|
|
194
|
+
if match:
|
|
195
|
+
out["match"] = match
|
|
196
|
+
print(json.dumps(out, indent=2))
|
|
197
|
+
sys.exit(0 if match else 1)
|
|
198
|
+
if args.check_engaged_ids:
|
|
199
|
+
# Accept comma, whitespace, or newline separation. Filter to 16-19
|
|
200
|
+
# digit numeric IDs so we don't pollute with ad campaign mcid values
|
|
201
|
+
# or random noise the browser-side walker might pick up.
|
|
202
|
+
raw = re.split(r"[,\s]+", args.check_engaged_ids.strip())
|
|
203
|
+
ids = [v for v in raw if re.fullmatch(r"\d{16,19}", v or "")]
|
|
204
|
+
match = find_existing_engagement(ids)
|
|
205
|
+
out = {"ids": ids, "engaged": bool(match)}
|
|
206
|
+
if match:
|
|
207
|
+
out["match"] = match
|
|
208
|
+
print(json.dumps(out, indent=2))
|
|
209
|
+
sys.exit(0 if match else 1)
|
|
210
|
+
if args.check_self_author:
|
|
211
|
+
slug = extract_slug(args.check_self_author)
|
|
212
|
+
matched = slug in SELF_LINKEDIN_SLUGS
|
|
213
|
+
print(json.dumps({
|
|
214
|
+
"input": args.check_self_author,
|
|
215
|
+
"slug": slug,
|
|
216
|
+
"self": matched,
|
|
217
|
+
}))
|
|
218
|
+
sys.exit(0 if matched else 1)
|
|
219
|
+
if args.list_engaged_ids:
|
|
220
|
+
for v in get_engaged_ids():
|
|
221
|
+
print(v)
|
|
222
|
+
return
|
|
223
|
+
parser.print_help()
|
|
224
|
+
sys.exit(2)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
if __name__ == "__main__":
|
|
228
|
+
main()
|