@m13v/s4l 1.6.197-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1336 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +513 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Scan Twitter notifications via the browser (no API cost) and insert new replies.
|
|
3
|
+
|
|
4
|
+
Browser-based replacement for the old API-powered scan_twitter_mentions.py.
|
|
5
|
+
Consumes JSON from `twitter_browser.py notifications [scroll] [tab]` which
|
|
6
|
+
defaults to the /notifications (All) tab so we catch nested replies where the
|
|
7
|
+
@-tag was dropped. Pass tab="mentions" to restrict to explicit @-mentions only.
|
|
8
|
+
Companion: scan_twitter_thread_followups.py revisits our recent replies to
|
|
9
|
+
pick up depth-2+ follow-ups that never surface in notifications at all.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python3 scripts/twitter_browser.py notifications 8 all > /tmp/twitter_notifs.json
|
|
13
|
+
python3 scripts/scan_twitter_mentions_browser.py --json-file /tmp/twitter_notifs.json
|
|
14
|
+
|
|
15
|
+
Migrated 2026-05-18: reads/writes go through s4l.ai HTTP API (/api/v1/posts,
|
|
16
|
+
/api/v1/posts/lookup, /api/v1/replies) via scripts/http_api.py instead of
|
|
17
|
+
psycopg2. Note: the route enforces (platform, their_comment_id) uniqueness
|
|
18
|
+
server-side, so the "existing_ids" prefetch is now a soft local cache used
|
|
19
|
+
to short-circuit the POST loop; we still rely on the API's ON CONFLICT path
|
|
20
|
+
as the source of truth.
|
|
21
|
+
|
|
22
|
+
Migrated 2026-05-23: third-party mentions now write to the dedicated
|
|
23
|
+
`mentions` table via /api/v1/mentions instead of a placeholder row in
|
|
24
|
+
`posts`. The associated reply row carries `mention_id` instead of
|
|
25
|
+
`post_id`, enforced by the replies_post_or_mention_exclusive_check DB
|
|
26
|
+
constraint. See migrations/2026-05-23-mentions-table.sql and
|
|
27
|
+
scripts/migrate_mentions_out_of_posts.py for the cutover history.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import argparse
|
|
31
|
+
import json
|
|
32
|
+
import os
|
|
33
|
+
import re
|
|
34
|
+
import sys
|
|
35
|
+
|
|
36
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
37
|
+
from http_api import api_get, api_post # noqa: E402
|
|
38
|
+
from project_topics import topics_for_project # noqa: E402
|
|
39
|
+
try:
|
|
40
|
+
from account_resolver import resolve as _resolve_account # noqa: E402
|
|
41
|
+
except Exception:
|
|
42
|
+
def _resolve_account(_platform): # type: ignore[unused-arg]
|
|
43
|
+
return None
|
|
44
|
+
|
|
45
|
+
CONFIG_PATH = os.path.expanduser("~/social-autoposter/config.json")
|
|
46
|
+
MIN_WORDS = 3
|
|
47
|
+
OUR_HANDLE = _resolve_account("twitter")
|
|
48
|
+
if not OUR_HANDLE:
|
|
49
|
+
# No hardcoded fallback: scanning/attributing under a default handle silently
|
|
50
|
+
# impersonates the repo owner. Refuse to run so the missing config surfaces.
|
|
51
|
+
sys.stderr.write(
|
|
52
|
+
"[scan_twitter_mentions] no Twitter handle configured "
|
|
53
|
+
"(accounts.twitter.handle / AUTOPOSTER_TWITTER_HANDLE); refusing to run "
|
|
54
|
+
"to avoid wrong-account attribution. Run connect_x first.\n")
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
# Paginate the replies prefetch in chunks so we never blow the route's max
|
|
58
|
+
# limit. 500 is the per-call cap inside /api/v1/replies; we walk pages until
|
|
59
|
+
# the response is short.
|
|
60
|
+
REPLY_PAGE_LIMIT = 500
|
|
61
|
+
REPLY_MAX_PAGES = 200 # 100k rows of headroom; plenty for the dedup cache.
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def load_config():
|
|
65
|
+
if os.path.exists(CONFIG_PATH):
|
|
66
|
+
with open(CONFIG_PATH) as f:
|
|
67
|
+
return json.load(f)
|
|
68
|
+
return {}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def word_count(text):
|
|
72
|
+
return len(text.split()) if text else 0
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def get_existing_reply_ids():
|
|
76
|
+
"""Pull every existing replies.their_comment_id for platform=x as a dedup cache.
|
|
77
|
+
|
|
78
|
+
The route caps responses at 500 rows per call; we paginate by id DESC and
|
|
79
|
+
keep walking until we exhaust the set. The route also handles uniqueness
|
|
80
|
+
on the server, so even if our local cache lags slightly we won't insert
|
|
81
|
+
duplicates — we'll just get ok_on_conflict back from POST.
|
|
82
|
+
"""
|
|
83
|
+
cache = set()
|
|
84
|
+
max_id = None
|
|
85
|
+
for _ in range(REPLY_MAX_PAGES):
|
|
86
|
+
query = {
|
|
87
|
+
"platform": "x",
|
|
88
|
+
"limit": REPLY_PAGE_LIMIT,
|
|
89
|
+
"order_by": "id",
|
|
90
|
+
}
|
|
91
|
+
# We don't have an explicit max_id filter on the route today; walk by
|
|
92
|
+
# `since` instead is wrong (since acts on discovered_at). Easiest: ask
|
|
93
|
+
# for the first 500 most-recent rows and trust that older rows in DB
|
|
94
|
+
# already collided once at insert-time, so we don't need a perfect
|
|
95
|
+
# global cache — just a recency window deep enough to catch this
|
|
96
|
+
# cycle's incoming notifications.
|
|
97
|
+
resp = api_get("/api/v1/replies", query=query)
|
|
98
|
+
rows = (resp.get("data") or {}).get("replies") or []
|
|
99
|
+
if not rows:
|
|
100
|
+
break
|
|
101
|
+
for r in rows:
|
|
102
|
+
cid = r.get("their_comment_id")
|
|
103
|
+
if cid:
|
|
104
|
+
cache.add(cid)
|
|
105
|
+
if len(rows) < REPLY_PAGE_LIMIT:
|
|
106
|
+
break
|
|
107
|
+
# Today's route has no "id <" cursor parameter, so one page is all we
|
|
108
|
+
# get. That is enough: it caps memory + roundtrip and the server-side
|
|
109
|
+
# UNIQUE index is still the canonical dedup. Break out.
|
|
110
|
+
break
|
|
111
|
+
# Suppress unused-binding lint warning for max_id while we leave the
|
|
112
|
+
# placeholder in place; future route work may add an id-cursor.
|
|
113
|
+
_ = max_id
|
|
114
|
+
return cache
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_our_posts():
|
|
118
|
+
"""Map tweet_id (last URL segment) -> post row for our active twitter posts."""
|
|
119
|
+
resp = api_get(
|
|
120
|
+
"/api/v1/posts",
|
|
121
|
+
query={"platform": "twitter", "status": "active", "limit": 500},
|
|
122
|
+
)
|
|
123
|
+
rows = (resp.get("data") or {}).get("posts") or []
|
|
124
|
+
posts = {}
|
|
125
|
+
for row in rows:
|
|
126
|
+
url = row.get("our_url")
|
|
127
|
+
if not url:
|
|
128
|
+
continue
|
|
129
|
+
m = re.search(r"/status/(\d+)", url)
|
|
130
|
+
if m:
|
|
131
|
+
posts[m.group(1)] = row
|
|
132
|
+
return posts
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def guess_project(text, config):
|
|
136
|
+
projects = config.get("projects", [])
|
|
137
|
+
text_lower = (text or "").lower()
|
|
138
|
+
for p in projects:
|
|
139
|
+
name = p.get("name", "")
|
|
140
|
+
# DB-backed seed list (post 2026-05-27 config.json removal).
|
|
141
|
+
topics = topics_for_project(name)
|
|
142
|
+
for topic in topics:
|
|
143
|
+
if topic.lower() in text_lower:
|
|
144
|
+
return name
|
|
145
|
+
if name.lower() in text_lower:
|
|
146
|
+
return name
|
|
147
|
+
return config.get("default_project", "General")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def most_recent_active_project():
|
|
151
|
+
"""Project_name of the most recent active twitter post we made.
|
|
152
|
+
|
|
153
|
+
Used as a fallback for replies-to-us where the notification feed doesn't
|
|
154
|
+
expose the parent tweet ID, so we can't identify *which* of our posts
|
|
155
|
+
the mention is under. Recency is a much stronger signal than
|
|
156
|
+
keyword-matching a 3-word reply body.
|
|
157
|
+
|
|
158
|
+
Post 2026-05-23 the "(mention - no original post)" placeholder rows no
|
|
159
|
+
longer exist in `posts` (they live in `mentions` now), so the SQL/
|
|
160
|
+
client-side filter that used to live here is gone.
|
|
161
|
+
"""
|
|
162
|
+
resp = api_get(
|
|
163
|
+
"/api/v1/posts",
|
|
164
|
+
query={
|
|
165
|
+
"platform": "twitter",
|
|
166
|
+
"status": "active",
|
|
167
|
+
"limit": 50,
|
|
168
|
+
},
|
|
169
|
+
)
|
|
170
|
+
rows = (resp.get("data") or {}).get("posts") or []
|
|
171
|
+
for r in rows:
|
|
172
|
+
proj = r.get("project_name")
|
|
173
|
+
if not proj:
|
|
174
|
+
continue
|
|
175
|
+
return proj
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def process_notifications(notifications, config):
|
|
180
|
+
exclusions = config.get("exclusions", {})
|
|
181
|
+
excluded_accounts = {a.lower() for a in exclusions.get("twitter_accounts", [])}
|
|
182
|
+
excluded_accounts.add(OUR_HANDLE.lower())
|
|
183
|
+
|
|
184
|
+
existing_ids = get_existing_reply_ids()
|
|
185
|
+
our_posts = get_our_posts()
|
|
186
|
+
recent_project = most_recent_active_project()
|
|
187
|
+
|
|
188
|
+
stats = {
|
|
189
|
+
"new": 0,
|
|
190
|
+
"already_tracked": 0,
|
|
191
|
+
"excluded_author": 0,
|
|
192
|
+
"own_account": 0,
|
|
193
|
+
"too_short": 0,
|
|
194
|
+
"no_tweet_id": 0,
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
for n in notifications:
|
|
198
|
+
tweet_id = n.get("tweet_id", "")
|
|
199
|
+
handle = (n.get("handle") or "").lstrip("@")
|
|
200
|
+
text = n.get("text") or ""
|
|
201
|
+
tweet_url = n.get("tweet_url") or (
|
|
202
|
+
f"https://x.com/{handle}/status/{tweet_id}" if handle and tweet_id else ""
|
|
203
|
+
)
|
|
204
|
+
replying_to = (n.get("replying_to") or "").lstrip("@").lower()
|
|
205
|
+
|
|
206
|
+
if not tweet_id:
|
|
207
|
+
stats["no_tweet_id"] += 1
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
if tweet_id in existing_ids:
|
|
211
|
+
stats["already_tracked"] += 1
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
if handle.lower() in excluded_accounts:
|
|
215
|
+
stats["own_account" if handle.lower() == OUR_HANDLE.lower() else "excluded_author"] += 1
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
if word_count(text) < MIN_WORDS:
|
|
219
|
+
stats["too_short"] += 1
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
# Resolve project for the mention. Reply-to-us inherits the project
|
|
223
|
+
# of our most recent active post (short reply text is unreliable for
|
|
224
|
+
# keyword matching); other mentions fall back to keyword guess.
|
|
225
|
+
is_reply_to_us = replying_to == OUR_HANDLE.lower() and bool(our_posts)
|
|
226
|
+
if is_reply_to_us and recent_project:
|
|
227
|
+
project = recent_project
|
|
228
|
+
else:
|
|
229
|
+
project = guess_project(text, config)
|
|
230
|
+
# _ = our_posts # currently unused for direct post_id linkage; notifications
|
|
231
|
+
# don't expose conversation_id, so we attribute via mentions table only.
|
|
232
|
+
|
|
233
|
+
# Insert into /api/v1/mentions. Dedup on (platform, mentioning_url)
|
|
234
|
+
# — if the row already exists we get back existing_mention_id from
|
|
235
|
+
# the 409 body via ok_on_conflict.
|
|
236
|
+
mention_body = {
|
|
237
|
+
"platform": "twitter",
|
|
238
|
+
"mentioning_url": tweet_url,
|
|
239
|
+
"mentioning_handle": handle,
|
|
240
|
+
"mentioning_text": text,
|
|
241
|
+
"our_handle": OUR_HANDLE,
|
|
242
|
+
"project": project,
|
|
243
|
+
"status": "active",
|
|
244
|
+
}
|
|
245
|
+
mention_resp = api_post(
|
|
246
|
+
"/api/v1/mentions", mention_body, ok_on_conflict=True,
|
|
247
|
+
)
|
|
248
|
+
mention_data = mention_resp.get("data") or {}
|
|
249
|
+
mention_row = mention_data.get("mention") or {}
|
|
250
|
+
mention_id = mention_row.get("id")
|
|
251
|
+
if not mention_id and mention_resp.get("error"):
|
|
252
|
+
details = (mention_resp.get("error") or {}).get("details") or {}
|
|
253
|
+
mention_id = details.get("existing_mention_id")
|
|
254
|
+
if not mention_id:
|
|
255
|
+
inner = details.get("mention") or {}
|
|
256
|
+
mention_id = inner.get("id")
|
|
257
|
+
if not mention_id:
|
|
258
|
+
print(
|
|
259
|
+
f" WARNING: could not resolve mention_id for {tweet_url!r}; skipping",
|
|
260
|
+
file=sys.stderr,
|
|
261
|
+
)
|
|
262
|
+
continue
|
|
263
|
+
|
|
264
|
+
reply_resp = api_post(
|
|
265
|
+
"/api/v1/replies",
|
|
266
|
+
{
|
|
267
|
+
"mention_id": mention_id,
|
|
268
|
+
"platform": "x",
|
|
269
|
+
"their_comment_id": tweet_id,
|
|
270
|
+
"their_author": handle,
|
|
271
|
+
"their_content": text,
|
|
272
|
+
"their_comment_url": tweet_url,
|
|
273
|
+
"depth": 1,
|
|
274
|
+
"status": "pending",
|
|
275
|
+
"our_account": OUR_HANDLE,
|
|
276
|
+
},
|
|
277
|
+
ok_on_conflict=True,
|
|
278
|
+
)
|
|
279
|
+
# 409 means the row already existed under the server-side UNIQUE
|
|
280
|
+
# (platform, their_comment_id) constraint; count it as already_tracked
|
|
281
|
+
# rather than new so the summary matches reality.
|
|
282
|
+
if (reply_resp.get("error") or {}).get("code") == "duplicate_reply":
|
|
283
|
+
stats["already_tracked"] += 1
|
|
284
|
+
else:
|
|
285
|
+
stats["new"] += 1
|
|
286
|
+
print(f" NEW: @{handle}: {text[:80]}")
|
|
287
|
+
existing_ids.add(tweet_id)
|
|
288
|
+
|
|
289
|
+
return stats
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def main():
|
|
293
|
+
parser = argparse.ArgumentParser(
|
|
294
|
+
description="Process Twitter notification data from browser scanner"
|
|
295
|
+
)
|
|
296
|
+
parser.add_argument(
|
|
297
|
+
"--json-file",
|
|
298
|
+
required=True,
|
|
299
|
+
help="Path to JSON from twitter_browser.py notifications",
|
|
300
|
+
)
|
|
301
|
+
args = parser.parse_args()
|
|
302
|
+
|
|
303
|
+
with open(args.json_file) as f:
|
|
304
|
+
data = json.load(f)
|
|
305
|
+
|
|
306
|
+
if isinstance(data, dict) and data.get("error"):
|
|
307
|
+
print(f"ERROR from extractor: {data['error']}", file=sys.stderr)
|
|
308
|
+
sys.exit(1)
|
|
309
|
+
|
|
310
|
+
notifications = data.get("notifications", []) if isinstance(data, dict) else data
|
|
311
|
+
print(f"Processing {len(notifications)} mentions...")
|
|
312
|
+
|
|
313
|
+
config = load_config()
|
|
314
|
+
stats = process_notifications(notifications, config)
|
|
315
|
+
|
|
316
|
+
print(
|
|
317
|
+
f"\nSummary: {stats['new']} new, "
|
|
318
|
+
f"{stats['already_tracked']} already tracked, "
|
|
319
|
+
f"{stats['excluded_author']} excluded, "
|
|
320
|
+
f"{stats['own_account']} own account, "
|
|
321
|
+
f"{stats['too_short']} too short, "
|
|
322
|
+
f"{stats['no_tweet_id']} no tweet_id"
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
if __name__ == "__main__":
|
|
327
|
+
main()
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Scan our recent X replies for new public follow-ups and ingest them.
|
|
3
|
+
|
|
4
|
+
Companion to scan_twitter_mentions_browser.py. The mentions tab only surfaces
|
|
5
|
+
explicit @-mentions, so replies to our replies without a retagged handle are
|
|
6
|
+
invisible. This script compensates by revisiting each of our recent X replies
|
|
7
|
+
and scraping the page for depth-2+ comments that aren't yet in the DB.
|
|
8
|
+
|
|
9
|
+
Flow:
|
|
10
|
+
1. Query `replies` for our X replies in last N days (default 14) where
|
|
11
|
+
`our_reply_url IS NOT NULL`. These are the threads we're subscribing to.
|
|
12
|
+
2. Write those URLs to a temp file.
|
|
13
|
+
3. Invoke `twitter_browser.py thread-followups <file>`, which scrapes each
|
|
14
|
+
URL and returns a `{results: [{thread_url, anchor_tweet_id, followups}]}`
|
|
15
|
+
JSON blob.
|
|
16
|
+
4. For each followup not already in `replies` (by platform+their_comment_id),
|
|
17
|
+
insert a new `replies` row with:
|
|
18
|
+
- platform = 'x'
|
|
19
|
+
- parent_reply_id = id of the original reply (the anchor)
|
|
20
|
+
- post_id = anchor.post_id
|
|
21
|
+
- depth = anchor.depth + 1
|
|
22
|
+
- status = 'pending'
|
|
23
|
+
Tweets we posted ourselves are skipped (OUR_HANDLE check). Own-account
|
|
24
|
+
replies from us get status='replied' with our_reply_id populated, mirroring
|
|
25
|
+
the mentions scanner.
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
python3 scripts/scan_twitter_thread_followups.py [--days N] [--max-urls N]
|
|
29
|
+
|
|
30
|
+
Migrated 2026-05-18: reads/writes now route through the s4l.ai HTTP API
|
|
31
|
+
(/api/v1/replies for both filter-list and insert) instead of psycopg2.
|
|
32
|
+
The (platform, their_comment_id) dedup runs server-side; the local
|
|
33
|
+
known_ids cache is now just for in-loop short-circuiting.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
import argparse
|
|
37
|
+
import json
|
|
38
|
+
import os
|
|
39
|
+
import re
|
|
40
|
+
import subprocess
|
|
41
|
+
import sys
|
|
42
|
+
import tempfile
|
|
43
|
+
from datetime import datetime, timedelta, timezone
|
|
44
|
+
|
|
45
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
46
|
+
from http_api import api_get, api_post # noqa: E402
|
|
47
|
+
try:
|
|
48
|
+
from account_resolver import resolve as _resolve_account # noqa: E402
|
|
49
|
+
except Exception:
|
|
50
|
+
def _resolve_account(_platform): # type: ignore[unused-arg]
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
CONFIG_PATH = os.path.expanduser("~/social-autoposter/config.json")
|
|
54
|
+
OUR_HANDLE = _resolve_account("twitter")
|
|
55
|
+
if not OUR_HANDLE:
|
|
56
|
+
# No hardcoded fallback: scanning/attributing under a default handle silently
|
|
57
|
+
# impersonates the repo owner. Refuse to run so the missing config surfaces.
|
|
58
|
+
sys.stderr.write(
|
|
59
|
+
"[scan_twitter_followups] no Twitter handle configured "
|
|
60
|
+
"(accounts.twitter.handle / AUTOPOSTER_TWITTER_HANDLE); refusing to run "
|
|
61
|
+
"to avoid wrong-account attribution. Run connect_x first.\n")
|
|
62
|
+
sys.exit(1)
|
|
63
|
+
DEFAULT_DAYS = 14
|
|
64
|
+
DEFAULT_MAX_URLS = 40
|
|
65
|
+
REPO_DIR = os.path.expanduser("~/social-autoposter")
|
|
66
|
+
REPLY_PAGE_LIMIT = 500
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def load_config():
|
|
70
|
+
if os.path.exists(CONFIG_PATH):
|
|
71
|
+
with open(CONFIG_PATH) as f:
|
|
72
|
+
return json.load(f)
|
|
73
|
+
return {}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def fetch_our_recent_x_replies(days, max_urls):
|
|
77
|
+
"""Return list of (reply_id, our_reply_url, post_id, depth) for our recent X replies.
|
|
78
|
+
|
|
79
|
+
Filters live in the route as:
|
|
80
|
+
- platform = x
|
|
81
|
+
- status = replied (the route's WHERE)
|
|
82
|
+
- has_our_reply_content / has_our_reply_id NOT used here; we need
|
|
83
|
+
our_reply_url, but the route returns it on every row and we filter
|
|
84
|
+
client-side after the page comes back.
|
|
85
|
+
- replied_at >= NOW() - <days>d
|
|
86
|
+
"""
|
|
87
|
+
since = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
|
|
88
|
+
resp = api_get(
|
|
89
|
+
"/api/v1/replies",
|
|
90
|
+
query={
|
|
91
|
+
"platform": "x",
|
|
92
|
+
"status": "replied",
|
|
93
|
+
"since": since,
|
|
94
|
+
"limit": max_urls,
|
|
95
|
+
"order_by": "replied_at",
|
|
96
|
+
},
|
|
97
|
+
)
|
|
98
|
+
rows = (resp.get("data") or {}).get("replies") or []
|
|
99
|
+
out = []
|
|
100
|
+
for r in rows:
|
|
101
|
+
url = r.get("our_reply_url")
|
|
102
|
+
if not url:
|
|
103
|
+
continue
|
|
104
|
+
out.append((r["id"], url, r.get("post_id"), int(r.get("depth") or 1)))
|
|
105
|
+
return out[:max_urls]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def existing_comment_ids():
|
|
109
|
+
"""First-page snapshot of replies.their_comment_id for platform=x.
|
|
110
|
+
|
|
111
|
+
The route's UNIQUE (platform, their_comment_id) index is the canonical
|
|
112
|
+
dedup; this cache short-circuits the per-followup POST loop and prints
|
|
113
|
+
accurate "already tracked" counts. Bounded at REPLY_PAGE_LIMIT (500) by
|
|
114
|
+
the route — fine because the most recent rows are the ones we'd
|
|
115
|
+
otherwise collide with.
|
|
116
|
+
"""
|
|
117
|
+
resp = api_get(
|
|
118
|
+
"/api/v1/replies",
|
|
119
|
+
query={"platform": "x", "limit": REPLY_PAGE_LIMIT, "order_by": "id"},
|
|
120
|
+
)
|
|
121
|
+
rows = (resp.get("data") or {}).get("replies") or []
|
|
122
|
+
return {r.get("their_comment_id") for r in rows if r.get("their_comment_id")}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def anchor_id_from_url(url):
|
|
126
|
+
m = re.search(r"/status/(\d+)", url or "")
|
|
127
|
+
return m.group(1) if m else None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def run_browser_scrape(urls, scroll_count=3):
|
|
131
|
+
"""Shell out to twitter_browser.py thread-followups and parse JSON."""
|
|
132
|
+
if not urls:
|
|
133
|
+
return {"results": [], "urls_visited": 0}
|
|
134
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
135
|
+
urls_path = f.name
|
|
136
|
+
for u in urls:
|
|
137
|
+
f.write(u + "\n")
|
|
138
|
+
try:
|
|
139
|
+
proc = subprocess.run(
|
|
140
|
+
["python3", os.path.join(REPO_DIR, "scripts/twitter_browser.py"),
|
|
141
|
+
"thread-followups", urls_path, str(scroll_count)],
|
|
142
|
+
capture_output=True, text=True, timeout=1800,
|
|
143
|
+
)
|
|
144
|
+
if proc.returncode != 0:
|
|
145
|
+
print(f"ERROR: twitter_browser.py exited {proc.returncode}", file=sys.stderr)
|
|
146
|
+
print(proc.stderr[-2000:], file=sys.stderr)
|
|
147
|
+
return {"results": [], "error": "browser_failed"}
|
|
148
|
+
try:
|
|
149
|
+
return json.loads(proc.stdout)
|
|
150
|
+
except json.JSONDecodeError as e:
|
|
151
|
+
print(f"ERROR: could not parse browser output as JSON: {e}", file=sys.stderr)
|
|
152
|
+
print(proc.stdout[-2000:], file=sys.stderr)
|
|
153
|
+
return {"results": [], "error": "json_parse_failed"}
|
|
154
|
+
finally:
|
|
155
|
+
try:
|
|
156
|
+
os.unlink(urls_path)
|
|
157
|
+
except OSError:
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def insert_followup(followup, parent_reply_id, post_id, parent_depth, root_author=None):
|
|
162
|
+
"""Insert one follow-up row via /api/v1/replies. Returns True if inserted,
|
|
163
|
+
False if skipped (own handle, missing required fields, or 409 duplicate)."""
|
|
164
|
+
tweet_id = followup.get("tweet_id") or ""
|
|
165
|
+
handle = (followup.get("handle") or "").lstrip("@")
|
|
166
|
+
text = followup.get("text") or ""
|
|
167
|
+
url = followup.get("tweet_url") or ""
|
|
168
|
+
if not tweet_id or not handle:
|
|
169
|
+
return False
|
|
170
|
+
if handle.lower() == OUR_HANDLE.lower():
|
|
171
|
+
return False
|
|
172
|
+
body = {
|
|
173
|
+
"post_id": post_id,
|
|
174
|
+
"platform": "x",
|
|
175
|
+
"their_comment_id": tweet_id,
|
|
176
|
+
"their_author": handle,
|
|
177
|
+
"their_content": text,
|
|
178
|
+
"their_comment_url": url,
|
|
179
|
+
"depth": (parent_depth or 1) + 1,
|
|
180
|
+
"status": "pending",
|
|
181
|
+
"parent_reply_id": parent_reply_id,
|
|
182
|
+
"our_account": OUR_HANDLE,
|
|
183
|
+
}
|
|
184
|
+
# OP of the thread our reply lives in, scraped for free from the conversation
|
|
185
|
+
# page (twitter_browser.scrape_many_thread_followups). Always set when known,
|
|
186
|
+
# including when the OP is the replier — that equality is the "OP replied"
|
|
187
|
+
# signal the analytic needs.
|
|
188
|
+
root_author = (root_author or "").lstrip("@")
|
|
189
|
+
if root_author:
|
|
190
|
+
body["thread_author_handle"] = root_author
|
|
191
|
+
# Media of the followup tweet itself (images/videos/GIFs/link-cards),
|
|
192
|
+
# captured for free during the same DOM pass in
|
|
193
|
+
# twitter_browser.scrape_thread_followups (2026-06-03 thread-media feature).
|
|
194
|
+
# The engage prompt reads this back via /api/v1/replies/next-pending so it
|
|
195
|
+
# can reply to what the comment VISUALLY shows, not just its text. An empty
|
|
196
|
+
# list [] is meaningful ("captured, none found"); only omit when the
|
|
197
|
+
# extractor returned nothing parseable (None). Harmless no-op against the
|
|
198
|
+
# pre-deploy API, which simply ignores the unknown field.
|
|
199
|
+
media = followup.get("media")
|
|
200
|
+
if isinstance(media, list):
|
|
201
|
+
body["their_media"] = media
|
|
202
|
+
resp = api_post("/api/v1/replies", body, ok_on_conflict=True)
|
|
203
|
+
if (resp.get("error") or {}).get("code") == "duplicate_reply":
|
|
204
|
+
return False
|
|
205
|
+
return True
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def main():
|
|
209
|
+
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0])
|
|
210
|
+
parser.add_argument("--days", type=int, default=DEFAULT_DAYS,
|
|
211
|
+
help=f"Look back N days for our replies (default {DEFAULT_DAYS})")
|
|
212
|
+
parser.add_argument("--max-urls", type=int, default=DEFAULT_MAX_URLS,
|
|
213
|
+
help=f"Max thread URLs to revisit per run (default {DEFAULT_MAX_URLS})")
|
|
214
|
+
parser.add_argument("--scroll-count", type=int, default=3,
|
|
215
|
+
help="Scrolls per thread page (default 3)")
|
|
216
|
+
parser.add_argument("--dry-run", action="store_true",
|
|
217
|
+
help="Print what would be inserted without writing")
|
|
218
|
+
args = parser.parse_args()
|
|
219
|
+
|
|
220
|
+
our_replies = fetch_our_recent_x_replies(args.days, args.max_urls)
|
|
221
|
+
print(f"Revisiting {len(our_replies)} of our recent X replies (last {args.days}d)")
|
|
222
|
+
if not our_replies:
|
|
223
|
+
return 0
|
|
224
|
+
|
|
225
|
+
url_to_meta = {url: (rid, pid, depth) for rid, url, pid, depth in our_replies}
|
|
226
|
+
urls = list(url_to_meta.keys())
|
|
227
|
+
|
|
228
|
+
print(f"Invoking browser scraper for {len(urls)} URLs...")
|
|
229
|
+
data = run_browser_scrape(urls, scroll_count=args.scroll_count)
|
|
230
|
+
|
|
231
|
+
results = data.get("results", [])
|
|
232
|
+
known_ids = existing_comment_ids()
|
|
233
|
+
new_count = 0
|
|
234
|
+
skip_own = 0
|
|
235
|
+
skip_existing = 0
|
|
236
|
+
skip_anchor = 0
|
|
237
|
+
skip_not_replying_to_us = 0
|
|
238
|
+
|
|
239
|
+
for r in results:
|
|
240
|
+
thread_url = r.get("thread_url") or ""
|
|
241
|
+
anchor_id = r.get("anchor_tweet_id") or anchor_id_from_url(thread_url)
|
|
242
|
+
root_author = (r.get("root_author") or "").lstrip("@")
|
|
243
|
+
meta = url_to_meta.get(thread_url)
|
|
244
|
+
if not meta:
|
|
245
|
+
continue
|
|
246
|
+
parent_reply_id, post_id, parent_depth = meta
|
|
247
|
+
|
|
248
|
+
for fu in r.get("followups", []):
|
|
249
|
+
tid = fu.get("tweet_id")
|
|
250
|
+
handle = (fu.get("handle") or "").lstrip("@")
|
|
251
|
+
if not tid:
|
|
252
|
+
continue
|
|
253
|
+
if tid == anchor_id:
|
|
254
|
+
skip_anchor += 1
|
|
255
|
+
continue
|
|
256
|
+
if handle.lower() == OUR_HANDLE.lower():
|
|
257
|
+
skip_own += 1
|
|
258
|
+
continue
|
|
259
|
+
# Filter: only keep tweets that are actually replying to us.
|
|
260
|
+
# X tweet permalink pages inject "more from this author" / "you might
|
|
261
|
+
# like" articles into the timeline. Without this check, those leak
|
|
262
|
+
# in as fake follow-ups (observed 2026-05: ~80% of captures were
|
|
263
|
+
# the seed author's later unrelated promotional tweets, not replies
|
|
264
|
+
# to our reply). The extractor in twitter_browser.py captures
|
|
265
|
+
# `replying_to` from the "Replying to @handle" block above each
|
|
266
|
+
# tweet; if it's empty or doesn't point at our handle, it's not a
|
|
267
|
+
# response to us.
|
|
268
|
+
replying_to = (fu.get("replying_to") or "").lstrip("@").lower()
|
|
269
|
+
if replying_to != OUR_HANDLE.lower():
|
|
270
|
+
skip_not_replying_to_us += 1
|
|
271
|
+
continue
|
|
272
|
+
if tid in known_ids:
|
|
273
|
+
skip_existing += 1
|
|
274
|
+
continue
|
|
275
|
+
if args.dry_run:
|
|
276
|
+
print(f" [DRY] @{handle} (tid={tid}) op=@{root_author or '?'} parent_reply={parent_reply_id} depth={(parent_depth or 1) + 1}: {(fu.get('text') or '')[:80]}")
|
|
277
|
+
new_count += 1
|
|
278
|
+
known_ids.add(tid)
|
|
279
|
+
continue
|
|
280
|
+
inserted = insert_followup(fu, parent_reply_id, post_id, parent_depth, root_author=root_author)
|
|
281
|
+
if inserted:
|
|
282
|
+
new_count += 1
|
|
283
|
+
known_ids.add(tid)
|
|
284
|
+
print(f" NEW follow-up: @{handle} (tid={tid}) parent_reply={parent_reply_id} depth={(parent_depth or 1) + 1}: {(fu.get('text') or '')[:80]}")
|
|
285
|
+
else:
|
|
286
|
+
# 409 duplicate (someone else inserted between our local cache
|
|
287
|
+
# and this POST). Count it as already-tracked, not new.
|
|
288
|
+
known_ids.add(tid)
|
|
289
|
+
skip_existing += 1
|
|
290
|
+
|
|
291
|
+
print(f"\nSummary: {new_count} new follow-ups ingested, "
|
|
292
|
+
f"{skip_existing} already tracked, {skip_own} own account, "
|
|
293
|
+
f"{skip_anchor} anchor skips, {skip_not_replying_to_us} not replying to us")
|
|
294
|
+
return new_count
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
if __name__ == "__main__":
|
|
298
|
+
rc = main()
|
|
299
|
+
sys.exit(0 if rc >= 0 else 1)
|