@m13v/s4l 1.6.197-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1314 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +497 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,580 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Scan replies table for users worth DMing across all platforms.
|
|
3
|
+
|
|
4
|
+
Criteria for DM candidates:
|
|
5
|
+
- User replied to our post/comment with a substantive comment (status='replied', meaning we already engaged publicly)
|
|
6
|
+
- We haven't already DM'd this user for this reply
|
|
7
|
+
- User isn't in exclusion list
|
|
8
|
+
- Comment has enough substance (>10 words) to continue the conversation
|
|
9
|
+
- Not a bot or deleted account
|
|
10
|
+
- Post is recent enough (last 7 days)
|
|
11
|
+
|
|
12
|
+
Supports: Reddit, LinkedIn, Twitter/X
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
python3 scripts/scan_dm_candidates.py [--dry-run] [--max N] [--platform reddit|linkedin|x|all]
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import sys
|
|
22
|
+
from datetime import datetime, timedelta, timezone
|
|
23
|
+
|
|
24
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
25
|
+
import db as dbmod
|
|
26
|
+
from project_topics import topics_for_project
|
|
27
|
+
|
|
28
|
+
CONFIG_PATH = os.path.expanduser("~/social-autoposter/config.json")
|
|
29
|
+
# Min-word floor to promote a public reply into a DM candidate.
|
|
30
|
+
# X replies are natively shorter (quote-tweets, @-mentions), so the bar is lower.
|
|
31
|
+
# Reddit floor lowered to 4 on 2026-04-21 after data showed 4-9 word Reddit
|
|
32
|
+
# replies are often direct questions and strong opinions, not filler; the
|
|
33
|
+
# previous 10-word floor was leaving ~66 eligible candidates/30d on the table.
|
|
34
|
+
MIN_WORDS_BY_PLATFORM = {"reddit": 4, "linkedin": 10, "x": 4}
|
|
35
|
+
MIN_WORDS_DEFAULT = 10
|
|
36
|
+
# Wait this long after our public reply before DMing, so the DM doesn't
|
|
37
|
+
# feel like a double-tap on the same day. Next scan picks it up.
|
|
38
|
+
POST_REPLY_COOLDOWN_HOURS = 5
|
|
39
|
+
MAX_AGE_DAYS = 7
|
|
40
|
+
DEFAULT_MAX_CANDIDATES = 100
|
|
41
|
+
PLATFORMS = ["reddit", "linkedin", "x"]
|
|
42
|
+
|
|
43
|
+
# Skip reasons that mean "this person can never receive a DM from us, ever".
|
|
44
|
+
# These are recipient-side blocks (DMs disabled, suspended, company page,
|
|
45
|
+
# inmail credits exhausted) or competitor disqualifications. Stored as ILIKE
|
|
46
|
+
# patterns; matched against the dms.skip_reason column to permanently exclude
|
|
47
|
+
# an author from future rescans (no 30-day window). Anything not matched here
|
|
48
|
+
# (low_value, hostile, thin_conversation, etc.) is treated as transient and
|
|
49
|
+
# the user can be re-promoted later.
|
|
50
|
+
PERMANENT_SKIP_REASON_PATTERNS = (
|
|
51
|
+
"chat_disabled%",
|
|
52
|
+
"dms_closed%",
|
|
53
|
+
"cannot_send_dms_disabled%",
|
|
54
|
+
"%DMs disabled%",
|
|
55
|
+
"%has DMs closed%",
|
|
56
|
+
"%user has DMs disabled%",
|
|
57
|
+
"%DMs not open%",
|
|
58
|
+
"not_following_no_dm_access%",
|
|
59
|
+
"x_requires_premium_to_dm_non_followers%",
|
|
60
|
+
"%requires verified/premium%",
|
|
61
|
+
"%requires X verification/premium%",
|
|
62
|
+
"%only verified users can send DM%",
|
|
63
|
+
"not_connected_cannot_dm%",
|
|
64
|
+
"not_connected_3rd_degree_cant_message%",
|
|
65
|
+
"not_connected_inmail_credits_exhausted%",
|
|
66
|
+
"requires_inmail_credit%",
|
|
67
|
+
"requires_inmail_no_credits%",
|
|
68
|
+
"no_inmail_credits%",
|
|
69
|
+
"not_1st_connection_no_inmail_credits%",
|
|
70
|
+
"3rd_plus_connection_cannot_dm%",
|
|
71
|
+
"messaging_restricted%",
|
|
72
|
+
"%InMail credits exhausted%",
|
|
73
|
+
"%InMail credits are depleted%",
|
|
74
|
+
"company_page%",
|
|
75
|
+
"company page%",
|
|
76
|
+
"%company page, cannot DM%",
|
|
77
|
+
"account_suspended%",
|
|
78
|
+
"%account is suspended%",
|
|
79
|
+
"cannot_identify_correct_profile%",
|
|
80
|
+
"encrypted_dm_passcode_required%",
|
|
81
|
+
"x_encrypted_dm_passcode_required%",
|
|
82
|
+
"disqualified:%",
|
|
83
|
+
"unable to send a message request%",
|
|
84
|
+
"%unable to send a message request%",
|
|
85
|
+
"send_button_disabled%",
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Transient failure patterns: infrastructure errors (browser profile lock
|
|
89
|
+
# contention, MCP wrapper death, Playwright launch failures, verify-failed
|
|
90
|
+
# sends) that should NOT permanently block a candidate. A dms row in
|
|
91
|
+
# status='error' (or 'skipped' tagged with one of these) gets:
|
|
92
|
+
# (a) treated as non-blocking in the discover LEFT JOIN below, so the
|
|
93
|
+
# reply re-appears as a candidate, AND
|
|
94
|
+
# (b) reverted to status='pending' via ON CONFLICT DO UPDATE when
|
|
95
|
+
# re-inserted (see scan_platform).
|
|
96
|
+
# Self-heals the 2026-05-12 "7 warm leads burned by twitter_agent_mcp_unavailable"
|
|
97
|
+
# regression at the source. Without this, the LEFT JOIN d.id IS NULL filter
|
|
98
|
+
# permanently blocked any reply that ever had a transient-error dms row.
|
|
99
|
+
TRANSIENT_SKIP_REASON_PATTERNS = (
|
|
100
|
+
"twitter_agent_mcp_unavailable%",
|
|
101
|
+
"reddit_agent_mcp_unavailable%",
|
|
102
|
+
"linkedin_agent_mcp_unavailable%",
|
|
103
|
+
"mcp_unavailable%",
|
|
104
|
+
"%mcp server not connected%",
|
|
105
|
+
"%no mcp tools%",
|
|
106
|
+
"%MCP server not registered%",
|
|
107
|
+
"send_unverified%",
|
|
108
|
+
"%browser launch failed%",
|
|
109
|
+
"%profile locked by another process%",
|
|
110
|
+
"%chromium profile locked%",
|
|
111
|
+
"%target page, context or browser has been closed%",
|
|
112
|
+
"%playwright%timeout%",
|
|
113
|
+
"%SIGTRAP%",
|
|
114
|
+
"%transient_browser_failure%",
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def load_config():
|
|
118
|
+
if os.path.exists(CONFIG_PATH):
|
|
119
|
+
with open(CONFIG_PATH) as f:
|
|
120
|
+
return json.load(f)
|
|
121
|
+
return {}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def word_count(text):
|
|
125
|
+
return len(text.split()) if text else 0
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def build_project_topic_index(config, platform):
|
|
129
|
+
"""Return [(project_name, [topic_phrase_lower, ...]), ...] for topic matching.
|
|
130
|
+
|
|
131
|
+
Reads from the unified search_topics list (post 2026-04-30 legacy
|
|
132
|
+
cleanup); platform arg kept for callsite compatibility.
|
|
133
|
+
"""
|
|
134
|
+
out = []
|
|
135
|
+
for p in config.get("projects", []) or []:
|
|
136
|
+
name = p.get("name") or p.get("id")
|
|
137
|
+
if not name:
|
|
138
|
+
continue
|
|
139
|
+
phrases = []
|
|
140
|
+
for v in topics_for_project(name):
|
|
141
|
+
if isinstance(v, str) and v.strip():
|
|
142
|
+
phrases.append(v.strip().lower())
|
|
143
|
+
if phrases:
|
|
144
|
+
out.append((name, phrases))
|
|
145
|
+
return out
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def infer_target_project(text_parts, project_topic_index):
|
|
149
|
+
"""Return the project whose topics overlap most with the given text, or None."""
|
|
150
|
+
blob = " ".join(t for t in text_parts if t).lower()
|
|
151
|
+
if not blob:
|
|
152
|
+
return None
|
|
153
|
+
best_name, best_score = None, 0
|
|
154
|
+
for name, phrases in project_topic_index:
|
|
155
|
+
score = 0
|
|
156
|
+
for phrase in phrases:
|
|
157
|
+
if not phrase:
|
|
158
|
+
continue
|
|
159
|
+
if " " in phrase:
|
|
160
|
+
if phrase in blob:
|
|
161
|
+
score += 2
|
|
162
|
+
else:
|
|
163
|
+
if f" {phrase} " in f" {blob} ":
|
|
164
|
+
score += 1
|
|
165
|
+
if score > best_score:
|
|
166
|
+
best_score = score
|
|
167
|
+
best_name = name
|
|
168
|
+
return best_name if best_score > 0 else None
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def upsert_prospect_row(conn, platform, author):
|
|
172
|
+
"""Ensure a prospects row exists for (platform, author); return prospect_id."""
|
|
173
|
+
conn.execute(
|
|
174
|
+
"""
|
|
175
|
+
INSERT INTO prospects (platform, author)
|
|
176
|
+
VALUES (%s, %s)
|
|
177
|
+
ON CONFLICT ON CONSTRAINT prospects_platform_author_unique DO NOTHING
|
|
178
|
+
""",
|
|
179
|
+
(platform, author),
|
|
180
|
+
)
|
|
181
|
+
cur = conn.execute(
|
|
182
|
+
"SELECT id FROM prospects WHERE platform=%s AND author=%s",
|
|
183
|
+
(platform, author),
|
|
184
|
+
)
|
|
185
|
+
row = cur.fetchone()
|
|
186
|
+
return row["id"] if row else None
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def get_excluded_authors(config, platform):
|
|
190
|
+
"""Build excluded authors set for a given platform."""
|
|
191
|
+
excluded = {a.lower() for a in config.get("exclusions", {}).get("authors", [])}
|
|
192
|
+
excluded.add("automoderator")
|
|
193
|
+
excluded.add("[deleted]")
|
|
194
|
+
|
|
195
|
+
if platform == "reddit":
|
|
196
|
+
reddit_account = config.get("accounts", {}).get("reddit", {}).get("username", "")
|
|
197
|
+
if reddit_account:
|
|
198
|
+
excluded.add(reddit_account.lower())
|
|
199
|
+
elif platform == "linkedin":
|
|
200
|
+
linkedin_name = config.get("accounts", {}).get("linkedin", {}).get("name", "")
|
|
201
|
+
if linkedin_name:
|
|
202
|
+
excluded.add(linkedin_name.lower())
|
|
203
|
+
for p in config.get("exclusions", {}).get("linkedin_profiles", []):
|
|
204
|
+
excluded.add(p.lower())
|
|
205
|
+
elif platform == "x":
|
|
206
|
+
twitter_handle = config.get("accounts", {}).get("twitter", {}).get("handle", "").lstrip("@")
|
|
207
|
+
if twitter_handle:
|
|
208
|
+
excluded.add(twitter_handle.lower())
|
|
209
|
+
for t in config.get("exclusions", {}).get("twitter_accounts", []):
|
|
210
|
+
excluded.add(t.lower())
|
|
211
|
+
|
|
212
|
+
# Dynamic exclusion list: fold in author_blocklist HARD handles for this
|
|
213
|
+
# platform (the config above covers static entries; this adds runtime blocks
|
|
214
|
+
# added via reply_db.py or the velocity gate). Fail-open: a website hiccup
|
|
215
|
+
# never breaks or widens scanning.
|
|
216
|
+
try:
|
|
217
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
218
|
+
from http_api import api_get
|
|
219
|
+
_resp = api_get("/api/v1/blocklist", query={"platform": platform},
|
|
220
|
+
ok_on_404=True)
|
|
221
|
+
for _r in (((_resp or {}).get("data") or {}).get("rows") or []):
|
|
222
|
+
_h = (_r.get("handle") or "").strip().lstrip("@").lower()
|
|
223
|
+
if _h and _r.get("severity") == "hard":
|
|
224
|
+
excluded.add(_h)
|
|
225
|
+
except Exception:
|
|
226
|
+
pass
|
|
227
|
+
|
|
228
|
+
return excluded
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def scan_platform(conn, config, platform, max_candidates, dry_run, max_age_days=None):
|
|
232
|
+
"""Scan for DM candidates on a single platform."""
|
|
233
|
+
# Canonicalize Twitter as 'x' (the dms/replies/posts tables use 'x'; some
|
|
234
|
+
# callers historically passed 'twitter'). Without this, dedupe leaks across
|
|
235
|
+
# the two names and the same person can be re-queued.
|
|
236
|
+
if platform == "twitter":
|
|
237
|
+
platform = "x"
|
|
238
|
+
excluded = get_excluded_authors(config, platform)
|
|
239
|
+
topic_index = build_project_topic_index(config, platform)
|
|
240
|
+
age_days = max_age_days if max_age_days is not None else MAX_AGE_DAYS
|
|
241
|
+
|
|
242
|
+
# Multi-account scoping (Twitter only): when this machine has a Twitter
|
|
243
|
+
# handle configured, only surface candidates from replies on posts THIS
|
|
244
|
+
# account made. Without this, a VM running as @matt_diak would discover
|
|
245
|
+
# DM candidates from @m13v_'s public reply threads and propose outreach
|
|
246
|
+
# about conversations the wrong account had. The other platforms
|
|
247
|
+
# (reddit, linkedin) don't yet have multi-machine fanout, so they fall
|
|
248
|
+
# through unscoped. Treat the filter as additive: NULL handle == legacy
|
|
249
|
+
# unscoped behavior.
|
|
250
|
+
twitter_handle = None
|
|
251
|
+
if platform == "x":
|
|
252
|
+
try:
|
|
253
|
+
from twitter_account import resolve_handle as _resolve_twitter_handle
|
|
254
|
+
twitter_handle = _resolve_twitter_handle()
|
|
255
|
+
except Exception:
|
|
256
|
+
twitter_handle = None
|
|
257
|
+
|
|
258
|
+
candidates = conn.execute("""
|
|
259
|
+
SELECT r.id as reply_id, r.post_id, r.platform, r.their_author, r.their_content,
|
|
260
|
+
r.their_comment_url, r.depth,
|
|
261
|
+
r.our_reply_content, r.our_reply_url,
|
|
262
|
+
p.thread_title, p.our_content as our_post_content,
|
|
263
|
+
p.thread_url, p.our_url, p.project_name as post_project,
|
|
264
|
+
r.replied_at
|
|
265
|
+
FROM replies r
|
|
266
|
+
JOIN posts p ON r.post_id = p.id
|
|
267
|
+
LEFT JOIN dms d
|
|
268
|
+
ON d.reply_id = r.id
|
|
269
|
+
AND d.platform = %s
|
|
270
|
+
-- Ignore transient-failure rows when deciding "already has a DM entry".
|
|
271
|
+
-- A reply whose ONLY dms row is e.g. status='error' with
|
|
272
|
+
-- skip_reason='twitter_agent_mcp_unavailable: ...' passes through
|
|
273
|
+
-- this join as d.id IS NULL and gets re-discovered. The ON CONFLICT
|
|
274
|
+
-- DO UPDATE in the INSERT below then flips that row back to pending.
|
|
275
|
+
AND NOT (d.status IN ('error','skipped')
|
|
276
|
+
AND COALESCE(d.skip_reason,'') ILIKE ANY(%s))
|
|
277
|
+
WHERE r.status = 'replied'
|
|
278
|
+
AND r.platform = %s
|
|
279
|
+
AND r.our_reply_content IS NOT NULL
|
|
280
|
+
AND r.our_reply_content != ''
|
|
281
|
+
AND d.id IS NULL
|
|
282
|
+
AND r.replied_at >= NOW() - INTERVAL '%s days'
|
|
283
|
+
AND r.replied_at <= NOW() - (INTERVAL '1 hour' * %s)
|
|
284
|
+
AND (%s::text IS NULL OR p.our_account = %s)
|
|
285
|
+
ORDER BY r.replied_at DESC
|
|
286
|
+
""", (platform, list(TRANSIENT_SKIP_REASON_PATTERNS), platform, age_days,
|
|
287
|
+
POST_REPLY_COOLDOWN_HOURS, twitter_handle, twitter_handle)).fetchall()
|
|
288
|
+
|
|
289
|
+
inserted = 0
|
|
290
|
+
skipped_reasons = {}
|
|
291
|
+
|
|
292
|
+
for row in candidates:
|
|
293
|
+
if inserted >= max_candidates:
|
|
294
|
+
break
|
|
295
|
+
|
|
296
|
+
author = row["their_author"] or ""
|
|
297
|
+
content = row["their_content"] or ""
|
|
298
|
+
|
|
299
|
+
# Skip excluded authors
|
|
300
|
+
if author.lower() in excluded:
|
|
301
|
+
reason = "excluded_author"
|
|
302
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
# Skip low-substance comments (platform-specific floor)
|
|
306
|
+
min_words = MIN_WORDS_BY_PLATFORM.get(platform, MIN_WORDS_DEFAULT)
|
|
307
|
+
if word_count(content) < min_words:
|
|
308
|
+
reason = "too_short"
|
|
309
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
# Dedupe: don't re-promote a candidate if either
|
|
313
|
+
# (a) we sent/queued a REAL private DM (chat_url IS NOT NULL) in the
|
|
314
|
+
# last 30 days, OR
|
|
315
|
+
# (b) we permanently can't (or shouldn't) DM them based on a prior
|
|
316
|
+
# skip/error (chat_disabled, account_suspended, disqualified,
|
|
317
|
+
# inmail credits exhausted, etc. — see PERMANENT_SKIP_REASON_PATTERNS)
|
|
318
|
+
#
|
|
319
|
+
# NOTE 2026-05-13: the recent_active branch REQUIRES chat_url IS NOT NULL.
|
|
320
|
+
# The dms table is also used as a unified prospect-tracker: dm_conversation.
|
|
321
|
+
# ensure-dm inserts rows with status='sent' + chat_url=NULL after every
|
|
322
|
+
# public reply (engage_reddit hook), which previously self-poisoned the
|
|
323
|
+
# cooldown — every public-comment author looked "already_dmd_recently"
|
|
324
|
+
# even though no real DM was ever sent. Result: real DM outreach collapsed
|
|
325
|
+
# from ~100-225/wk pre-Apr 27 to 0 by mid-May 2026. The chat_url IS NOT
|
|
326
|
+
# NULL filter restores the intended semantics: cool down on actual DM
|
|
327
|
+
# delivery, not on public engagement bookkeeping.
|
|
328
|
+
recent_dm = conn.execute("""
|
|
329
|
+
SELECT
|
|
330
|
+
SUM(CASE WHEN status IN ('sent','pending')
|
|
331
|
+
AND chat_url IS NOT NULL
|
|
332
|
+
AND discovered_at >= NOW() - INTERVAL '30 days'
|
|
333
|
+
THEN 1 ELSE 0 END) AS recent_active,
|
|
334
|
+
SUM(CASE WHEN status IN ('skipped','error')
|
|
335
|
+
AND COALESCE(skip_reason,'') ILIKE ANY(%s)
|
|
336
|
+
THEN 1 ELSE 0 END) AS permanent_block
|
|
337
|
+
FROM dms
|
|
338
|
+
WHERE their_author = %s AND platform = %s
|
|
339
|
+
""", (list(PERMANENT_SKIP_REASON_PATTERNS), author, platform)).fetchone()
|
|
340
|
+
|
|
341
|
+
if recent_dm and (recent_dm["recent_active"] or 0) > 0:
|
|
342
|
+
reason = "already_dmd_recently"
|
|
343
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
344
|
+
continue
|
|
345
|
+
if recent_dm and (recent_dm["permanent_block"] or 0) > 0:
|
|
346
|
+
reason = "permanently_unreachable_or_disqualified"
|
|
347
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
348
|
+
continue
|
|
349
|
+
|
|
350
|
+
# Reject if there's already a pending DM for this (platform, author).
|
|
351
|
+
# The existing ON CONFLICT (platform, their_author, reply_id) only blocks
|
|
352
|
+
# re-inserting the SAME comment. When one author has N matched comments
|
|
353
|
+
# (e.g. Economy_Leopard112 with 7 replies → Terminator on 2026-05-13),
|
|
354
|
+
# the scanner used to queue N separate pending DM rows. If the pipeline
|
|
355
|
+
# ever sent, that person would get N DMs back-to-back. Account-killer.
|
|
356
|
+
existing_pending = conn.execute("""
|
|
357
|
+
SELECT 1 FROM dms
|
|
358
|
+
WHERE platform = %s AND their_author = %s AND status = 'pending'
|
|
359
|
+
LIMIT 1
|
|
360
|
+
""", (platform, author)).fetchone()
|
|
361
|
+
if existing_pending:
|
|
362
|
+
reason = "duplicate_pending_author"
|
|
363
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
364
|
+
continue
|
|
365
|
+
|
|
366
|
+
# Build comment context for the DM
|
|
367
|
+
context = f"Thread: {row['thread_title'] or 'N/A'}\n"
|
|
368
|
+
context += f"Their comment: {content}\n"
|
|
369
|
+
context += f"Our reply: {(row['our_reply_content'] or '')}"
|
|
370
|
+
|
|
371
|
+
# Pick target_project: inherit from post; fall back to topic match.
|
|
372
|
+
target_project = row["post_project"]
|
|
373
|
+
if not target_project:
|
|
374
|
+
target_project = infer_target_project(
|
|
375
|
+
[row["thread_title"], content, row["our_reply_content"]],
|
|
376
|
+
topic_index,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
if dry_run:
|
|
380
|
+
print(f" [{platform}] CANDIDATE: {author} (reply #{row['reply_id']}) target={target_project}")
|
|
381
|
+
print(f" Their comment: {content[:100]}...")
|
|
382
|
+
print(f" Our reply: {(row['our_reply_content'] or '')[:100]}...")
|
|
383
|
+
print()
|
|
384
|
+
inserted += 1
|
|
385
|
+
continue
|
|
386
|
+
|
|
387
|
+
prospect_id = upsert_prospect_row(conn, platform, author)
|
|
388
|
+
|
|
389
|
+
# ON CONFLICT DO UPDATE (added 2026-05-13): when a row already exists for
|
|
390
|
+
# this (platform, their_author, reply_id) but its status is a transient
|
|
391
|
+
# error/skipped (twitter_agent_mcp_unavailable, send_unverified,
|
|
392
|
+
# chromium profile locked, etc.), revert it back to status='pending' so
|
|
393
|
+
# the next outreach run picks it up. Non-transient rows (sent, real
|
|
394
|
+
# pending, permanent chat_disabled, disqualified) are left untouched by
|
|
395
|
+
# the WHERE clause. Second prong of the self-heal mechanism, paired
|
|
396
|
+
# with the relaxed LEFT JOIN above.
|
|
397
|
+
conn.execute("""
|
|
398
|
+
INSERT INTO dms (platform, reply_id, post_id, their_author, their_content,
|
|
399
|
+
comment_context, status, prospect_id, target_project)
|
|
400
|
+
VALUES (%s, %s, %s, %s, %s, %s, 'pending', %s, %s)
|
|
401
|
+
ON CONFLICT (platform, their_author, reply_id) DO UPDATE
|
|
402
|
+
SET status = 'pending',
|
|
403
|
+
skip_reason = NULL,
|
|
404
|
+
claude_session_id = NULL,
|
|
405
|
+
discovered_at = NOW(),
|
|
406
|
+
target_project = EXCLUDED.target_project,
|
|
407
|
+
comment_context = EXCLUDED.comment_context
|
|
408
|
+
WHERE dms.status IN ('error','skipped')
|
|
409
|
+
AND COALESCE(dms.skip_reason,'') ILIKE ANY(%s)
|
|
410
|
+
""", (platform, row["reply_id"], row["post_id"], author, content, context,
|
|
411
|
+
prospect_id, target_project, list(TRANSIENT_SKIP_REASON_PATTERNS)))
|
|
412
|
+
conn.commit()
|
|
413
|
+
inserted += 1
|
|
414
|
+
print(f" [{platform}] NEW DM candidate: {author} (reply #{row['reply_id']}) "
|
|
415
|
+
f"target={target_project or '-'}: {content[:70]}...")
|
|
416
|
+
|
|
417
|
+
if skipped_reasons:
|
|
418
|
+
skip_summary = ", ".join(f"{k}={v}" for k, v in skipped_reasons.items())
|
|
419
|
+
print(f" [{platform}] Skipped: {skip_summary}")
|
|
420
|
+
|
|
421
|
+
return inserted
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def _resolve_twitter_handle_for(platform):
|
|
425
|
+
"""Same x-only multi-account scoping scan_platform() uses. None elsewhere."""
|
|
426
|
+
if platform != "x":
|
|
427
|
+
return None
|
|
428
|
+
try:
|
|
429
|
+
from twitter_account import resolve_handle as _resolve_twitter_handle
|
|
430
|
+
return _resolve_twitter_handle()
|
|
431
|
+
except Exception:
|
|
432
|
+
return None
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def scan_platform_http(config, platform, max_candidates, dry_run, max_age_days=None):
|
|
436
|
+
"""DB-free twin of scan_platform().
|
|
437
|
+
|
|
438
|
+
The complex discovery JOIN + per-author dedup signals run server-side via
|
|
439
|
+
POST /api/v1/dm-candidates/discover (the transient/permanent ILIKE pattern
|
|
440
|
+
lists, owned here, are sent in the body). The remaining config-driven
|
|
441
|
+
filters (excluded authors, min-word floor, target-project inference) and
|
|
442
|
+
the max-candidates cap stay client-side, identical to the DB path. Inserts
|
|
443
|
+
go through POST /api/v1/prospects + POST /api/v1/dm-candidates.
|
|
444
|
+
"""
|
|
445
|
+
from http_api import api_post
|
|
446
|
+
|
|
447
|
+
if platform == "twitter":
|
|
448
|
+
platform = "x"
|
|
449
|
+
excluded = get_excluded_authors(config, platform)
|
|
450
|
+
topic_index = build_project_topic_index(config, platform)
|
|
451
|
+
age_days = max_age_days if max_age_days is not None else MAX_AGE_DAYS
|
|
452
|
+
twitter_handle = _resolve_twitter_handle_for(platform)
|
|
453
|
+
|
|
454
|
+
resp = api_post(
|
|
455
|
+
"/api/v1/dm-candidates/discover",
|
|
456
|
+
{
|
|
457
|
+
"platform": platform,
|
|
458
|
+
"age_days": age_days,
|
|
459
|
+
"cooldown_hours": POST_REPLY_COOLDOWN_HOURS,
|
|
460
|
+
"twitter_handle": twitter_handle,
|
|
461
|
+
"transient_patterns": list(TRANSIENT_SKIP_REASON_PATTERNS),
|
|
462
|
+
"permanent_patterns": list(PERMANENT_SKIP_REASON_PATTERNS),
|
|
463
|
+
"limit": 2000,
|
|
464
|
+
},
|
|
465
|
+
)
|
|
466
|
+
candidates = (resp.get("data") or {}).get("candidates") or []
|
|
467
|
+
|
|
468
|
+
inserted = 0
|
|
469
|
+
skipped_reasons = {}
|
|
470
|
+
|
|
471
|
+
for row in candidates:
|
|
472
|
+
if inserted >= max_candidates:
|
|
473
|
+
break
|
|
474
|
+
|
|
475
|
+
author = row.get("their_author") or ""
|
|
476
|
+
content = row.get("their_content") or ""
|
|
477
|
+
|
|
478
|
+
if author.lower() in excluded:
|
|
479
|
+
reason = "excluded_author"
|
|
480
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
481
|
+
continue
|
|
482
|
+
|
|
483
|
+
min_words = MIN_WORDS_BY_PLATFORM.get(platform, MIN_WORDS_DEFAULT)
|
|
484
|
+
if word_count(content) < min_words:
|
|
485
|
+
reason = "too_short"
|
|
486
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
487
|
+
continue
|
|
488
|
+
|
|
489
|
+
if (row.get("recent_active") or 0) > 0:
|
|
490
|
+
reason = "already_dmd_recently"
|
|
491
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
492
|
+
continue
|
|
493
|
+
if (row.get("permanent_block") or 0) > 0:
|
|
494
|
+
reason = "permanently_unreachable_or_disqualified"
|
|
495
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
496
|
+
continue
|
|
497
|
+
if (row.get("existing_pending") or 0) > 0:
|
|
498
|
+
reason = "duplicate_pending_author"
|
|
499
|
+
skipped_reasons[reason] = skipped_reasons.get(reason, 0) + 1
|
|
500
|
+
continue
|
|
501
|
+
|
|
502
|
+
context = f"Thread: {row.get('thread_title') or 'N/A'}\n"
|
|
503
|
+
context += f"Their comment: {content}\n"
|
|
504
|
+
context += f"Our reply: {(row.get('our_reply_content') or '')}"
|
|
505
|
+
|
|
506
|
+
target_project = row.get("post_project")
|
|
507
|
+
if not target_project:
|
|
508
|
+
target_project = infer_target_project(
|
|
509
|
+
[row.get("thread_title"), content, row.get("our_reply_content")],
|
|
510
|
+
topic_index,
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
if dry_run:
|
|
514
|
+
print(f" [{platform}] CANDIDATE: {author} (reply #{row.get('reply_id')}) target={target_project}")
|
|
515
|
+
print(f" Their comment: {content[:100]}...")
|
|
516
|
+
print(f" Our reply: {(row.get('our_reply_content') or '')[:100]}...")
|
|
517
|
+
print()
|
|
518
|
+
inserted += 1
|
|
519
|
+
continue
|
|
520
|
+
|
|
521
|
+
prospect = api_post("/api/v1/prospects", {"platform": platform, "author": author})
|
|
522
|
+
prospect_id = ((prospect.get("data") or {}).get("prospect") or {}).get("id")
|
|
523
|
+
|
|
524
|
+
api_post(
|
|
525
|
+
"/api/v1/dm-candidates",
|
|
526
|
+
{
|
|
527
|
+
"platform": platform,
|
|
528
|
+
"reply_id": row.get("reply_id"),
|
|
529
|
+
"post_id": row.get("post_id"),
|
|
530
|
+
"their_author": author,
|
|
531
|
+
"their_content": content,
|
|
532
|
+
"comment_context": context,
|
|
533
|
+
"prospect_id": prospect_id,
|
|
534
|
+
"target_project": target_project,
|
|
535
|
+
"transient_patterns": list(TRANSIENT_SKIP_REASON_PATTERNS),
|
|
536
|
+
},
|
|
537
|
+
)
|
|
538
|
+
inserted += 1
|
|
539
|
+
print(f" [{platform}] NEW DM candidate: {author} (reply #{row.get('reply_id')}) "
|
|
540
|
+
f"target={target_project or '-'}: {content[:70]}...")
|
|
541
|
+
|
|
542
|
+
if skipped_reasons:
|
|
543
|
+
skip_summary = ", ".join(f"{k}={v}" for k, v in skipped_reasons.items())
|
|
544
|
+
print(f" [{platform}] Skipped: {skip_summary}")
|
|
545
|
+
|
|
546
|
+
return inserted
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def main():
|
|
550
|
+
parser = argparse.ArgumentParser(description="Find users worth DMing based on comment engagement")
|
|
551
|
+
parser.add_argument("--dry-run", action="store_true", help="Print candidates without inserting")
|
|
552
|
+
parser.add_argument("--max", type=int, default=DEFAULT_MAX_CANDIDATES, help="Max candidates per platform")
|
|
553
|
+
parser.add_argument("--platform", default="all", choices=PLATFORMS + ["all"],
|
|
554
|
+
help="Platform to scan (default: all)")
|
|
555
|
+
parser.add_argument("--days", type=int, default=None,
|
|
556
|
+
help=f"Override MAX_AGE_DAYS (default {MAX_AGE_DAYS}). Use for one-shot backfills after threshold changes.")
|
|
557
|
+
args = parser.parse_args()
|
|
558
|
+
|
|
559
|
+
config = load_config()
|
|
560
|
+
dbmod.load_env()
|
|
561
|
+
|
|
562
|
+
platforms = PLATFORMS if args.platform == "all" else [args.platform]
|
|
563
|
+
total = 0
|
|
564
|
+
|
|
565
|
+
# HTTP-only: discovery + insert run server-side via the s4l.ai HTTP API.
|
|
566
|
+
# The direct-Postgres lane was removed 2026-06-01 — there is no DB path and
|
|
567
|
+
# no fallback. DATABASE_URL, if present, is ignored.
|
|
568
|
+
for platform in platforms:
|
|
569
|
+
print(f"\nScanning {platform} for DM candidates...")
|
|
570
|
+
count = scan_platform_http(config, platform, args.max, args.dry_run, max_age_days=args.days)
|
|
571
|
+
total += count
|
|
572
|
+
|
|
573
|
+
action = "found" if args.dry_run else "queued"
|
|
574
|
+
print(f"\nDM scan complete: {total} candidates {action} across {', '.join(platforms)}")
|
|
575
|
+
return total
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
if __name__ == "__main__":
|
|
579
|
+
count = main()
|
|
580
|
+
sys.exit(0 if count > 0 else 1)
|