@m13v/s4l 1.6.197-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1314 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +497 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
score_twitter_candidates.py
|
|
4
|
+
|
|
5
|
+
Reads raw tweet data (JSON from stdin or file), calculates virality scores,
|
|
6
|
+
and upserts into the twitter_candidates table.
|
|
7
|
+
|
|
8
|
+
Also expires stale pending candidates by flipping status to 'expired'.
|
|
9
|
+
NO PRUNING: rows are kept forever for analytics (skip-reason audit, engagement
|
|
10
|
+
dynamics, project routing review). Per user instruction 2026-05-08, do not
|
|
11
|
+
re-introduce DELETE-by-age here under any retention window.
|
|
12
|
+
|
|
13
|
+
Can be called standalone or piped from the scanner:
|
|
14
|
+
echo '[{...}]' | python3 scripts/score_twitter_candidates.py
|
|
15
|
+
python3 scripts/score_twitter_candidates.py --file /tmp/tweets.json
|
|
16
|
+
python3 scripts/score_twitter_candidates.py --expire-only
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import json
|
|
21
|
+
import math
|
|
22
|
+
import os
|
|
23
|
+
import re
|
|
24
|
+
import sys
|
|
25
|
+
from datetime import datetime, timezone, timedelta
|
|
26
|
+
|
|
27
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
28
|
+
from http_api import api_get, api_post # noqa: E402
|
|
29
|
+
|
|
30
|
+
# Best-effort dedicated logger for the follow-gate -> skill/logs/follow-gate.log.
|
|
31
|
+
# Guarded so a missing/older helper file can never break scoring (fail-open).
|
|
32
|
+
try:
|
|
33
|
+
import follow_gate_log as _fgl # noqa: E402
|
|
34
|
+
except Exception:
|
|
35
|
+
_fgl = None
|
|
36
|
+
from twitter_account import resolve_handle as _resolve_twitter_handle # noqa: E402
|
|
37
|
+
from project_topics import topics_for_project # noqa: E402
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Freshness window (in hours) for the expire-stale gate that flips stale
|
|
41
|
+
# pending rows to status='expired'. Sourced from the FRESHNESS_HOURS env the
|
|
42
|
+
# cycle exports (run-twitter-cycle.sh) so the expiry ceiling is configured in
|
|
43
|
+
# ONE place. Falls back to 18 when unset (e.g. ad-hoc / --expire-only runs) to
|
|
44
|
+
# preserve the historical default. NOTE: the gate is on discovered_at
|
|
45
|
+
# (discovery age), not tweet_posted_at; for logic D (≤1h discovery freshness)
|
|
46
|
+
# the two are within ~1h of each other.
|
|
47
|
+
EXPIRE_FRESHNESS_HOURS = int(os.environ.get("FRESHNESS_HOURS") or "18")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Real Twitter snowflake IDs are 18-19 digit numbers with full entropy in the
|
|
51
|
+
# low bits (sequence counter + worker/datacenter ID = bottom 22 bits ≈ bottom
|
|
52
|
+
# 7 decimal digits). An ID ending in 6+ zeros is statistically impossible
|
|
53
|
+
# unless the sequence counter, worker ID, and datacenter ID were all exactly 0
|
|
54
|
+
# at submission AND the timestamp aligned with a power-of-two ms boundary —
|
|
55
|
+
# combined probability ≈ 0. Observed 2026-05-16 (batch twcycle-20260516-080005):
|
|
56
|
+
# the harness scan model fabricates IDs by templating a high-digit prefix and
|
|
57
|
+
# zero-padding, e.g. 2055588000000000000, 2055590000000000000 (sequential by 1).
|
|
58
|
+
# fxtwitter rejects these at T1 ("truncated/invalid status ID and loads no
|
|
59
|
+
# tweet"). Drop them at score time so we don't burn draft tokens or candidate
|
|
60
|
+
# rows on phantom URLs.
|
|
61
|
+
_SNOWFLAKE_OK = re.compile(r"/status/(\d{15,19})(?:[/?#]|$)")
|
|
62
|
+
_TRAILING_ZEROS_FAKE = re.compile(r"0{6,}$")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Weight on the additive reach-potential term in calculate_virality_score
|
|
66
|
+
# (2026-05-28). Tunable. Larger = a fresh high-follower thread with no
|
|
67
|
+
# engagement yet ranks higher relative to threads with demonstrated velocity.
|
|
68
|
+
# At 0.6, a freshly-posted tweet from a 50k-200k account scores ~4 on reach
|
|
69
|
+
# alone; a 200M account ~5.4; a sub-1k account stays near 0. Set to 0 to fall
|
|
70
|
+
# back to the pure multiplicative (engagement-only) score.
|
|
71
|
+
REACH_POTENTIAL_WEIGHT = 0.6
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def looks_like_fabricated_tweet_url(url: str) -> bool:
|
|
75
|
+
"""True if the URL's snowflake suffix is the model's fabrication signature.
|
|
76
|
+
|
|
77
|
+
Returns True for:
|
|
78
|
+
- URLs without a parseable /status/<digits> segment
|
|
79
|
+
- URLs whose snowflake ID is outside the plausible 15-19 digit range
|
|
80
|
+
- URLs whose snowflake ID ends in 6 or more zeros (template signature)
|
|
81
|
+
"""
|
|
82
|
+
if not url:
|
|
83
|
+
return True
|
|
84
|
+
m = _SNOWFLAKE_OK.search(url)
|
|
85
|
+
if not m:
|
|
86
|
+
return True
|
|
87
|
+
sid = m.group(1)
|
|
88
|
+
if _TRAILING_ZEROS_FAKE.search(sid):
|
|
89
|
+
return True
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def calculate_virality_score(tweet):
|
|
94
|
+
"""
|
|
95
|
+
Score a tweet's viral potential. Higher = better candidate to reply to.
|
|
96
|
+
|
|
97
|
+
Signals (from research + production tuning):
|
|
98
|
+
1. Engagement velocity (eng/hour) - strongest predictor
|
|
99
|
+
2. Retweet ratio > 0.3 = strong viral signal
|
|
100
|
+
3. Reply count is weighted heavily (discussion = visibility for our reply)
|
|
101
|
+
4. Reply-to-like ratio (discussion quality vs one-way broadcast)
|
|
102
|
+
5. Author followers 5K+ sweet spot, big names not penalized
|
|
103
|
+
6. Age penalty: exponential decay with 6h half-life (softer than before)
|
|
104
|
+
"""
|
|
105
|
+
likes = tweet.get("likes", 0)
|
|
106
|
+
retweets = tweet.get("retweets", 0)
|
|
107
|
+
replies = tweet.get("replies", 0)
|
|
108
|
+
bookmarks = tweet.get("bookmarks", 0)
|
|
109
|
+
views = tweet.get("views", 0)
|
|
110
|
+
followers = tweet.get("author_followers", 0)
|
|
111
|
+
|
|
112
|
+
total_eng = likes + retweets + replies + bookmarks
|
|
113
|
+
|
|
114
|
+
# Age in hours
|
|
115
|
+
age_hours = tweet.get("age_hours", 1)
|
|
116
|
+
if age_hours < 0.1:
|
|
117
|
+
age_hours = 0.1
|
|
118
|
+
|
|
119
|
+
# 1. Engagement velocity (most important)
|
|
120
|
+
velocity = total_eng / age_hours
|
|
121
|
+
|
|
122
|
+
# 2. Retweet ratio (reshare intent)
|
|
123
|
+
rt_ratio = retweets / total_eng if total_eng > 0 else 0
|
|
124
|
+
|
|
125
|
+
# 3. Reply activity bonus (active discussion = more visibility for our reply)
|
|
126
|
+
# 15 replies = +1x, 30 = +2x, 60+ = +4x cap
|
|
127
|
+
reply_bonus = min(replies / 15, 4.0)
|
|
128
|
+
|
|
129
|
+
# 4. Discussion quality (reply:like ratio). High ratio = real discussion.
|
|
130
|
+
# 0.05 ratio = +0.5x, 0.1+ = +1.0x cap
|
|
131
|
+
discussion_ratio = replies / likes if likes > 0 else 0
|
|
132
|
+
discussion_bonus = min(discussion_ratio * 10, 1.0)
|
|
133
|
+
|
|
134
|
+
# 5. Author reach multiplier
|
|
135
|
+
# Sweet spot: 5K+ followers. Big names (KentBeck-class) get full credit,
|
|
136
|
+
# since brand value outweighs the "too competitive" concern.
|
|
137
|
+
if followers < 1000:
|
|
138
|
+
reach_mult = 0.3
|
|
139
|
+
elif followers < 5000:
|
|
140
|
+
reach_mult = 0.6
|
|
141
|
+
elif followers < 50000:
|
|
142
|
+
reach_mult = 1.0
|
|
143
|
+
elif followers < 200000:
|
|
144
|
+
reach_mult = 1.4
|
|
145
|
+
elif followers < 500000:
|
|
146
|
+
reach_mult = 1.3
|
|
147
|
+
else:
|
|
148
|
+
reach_mult = 1.1 # mega accounts still worth it for brand exposure
|
|
149
|
+
|
|
150
|
+
# 6. Age decay: half-life of 6 hours (softened from 3h)
|
|
151
|
+
# 3h = 71%, 6h = 50%, 12h = 25%, 18h = 12.5%
|
|
152
|
+
age_decay = math.exp(-0.1155 * age_hours) # ln(2)/6
|
|
153
|
+
|
|
154
|
+
# 7. Retweet ratio bonus
|
|
155
|
+
rt_bonus = 1.0 + min(rt_ratio * 2, 1.0) # up to 2x for high RT ratio
|
|
156
|
+
|
|
157
|
+
# Engagement-driven score (multiplicative). This collapses to 0 for any
|
|
158
|
+
# tweet with zero engagement, because velocity (= total_eng / age) gates the
|
|
159
|
+
# entire product. That is correct for ranking *demonstrated* momentum.
|
|
160
|
+
engagement_score = velocity * reach_mult * age_decay * rt_bonus * (1 + reply_bonus) * (1 + discussion_bonus)
|
|
161
|
+
|
|
162
|
+
# Reach-potential term (ADDITIVE, 2026-05-28). The multiplicative score above
|
|
163
|
+
# throws away the follower signal whenever engagement is 0: a freshly-posted
|
|
164
|
+
# tweet from a 200M-follower account scored identically (0.0) to a 1-follower
|
|
165
|
+
# nobody, because anything * 0 = 0. That is wrong as a *predictor* — catching
|
|
166
|
+
# a fresh thread on a large account early is real option value (the account
|
|
167
|
+
# reliably draws reach the thread just hasn't accumulated yet). We ADD (not
|
|
168
|
+
# multiply) a reach term so the follower signal survives a zero-engagement
|
|
169
|
+
# velocity. It is monotonic in followers (log10 growth dominates the
|
|
170
|
+
# mega-account reach_mult dip) and decays on the SAME 6h half-life via
|
|
171
|
+
# age_decay, so a stale big-account tweet that STILL has no engagement sinks
|
|
172
|
+
# back toward zero (a real dud), while a fresh one ranks above a fresh nobody.
|
|
173
|
+
# No cap, no cutoff: this only ever raises a score, never removes a candidate.
|
|
174
|
+
reach_potential = math.log10(max(followers, 1)) * reach_mult * age_decay * REACH_POTENTIAL_WEIGHT
|
|
175
|
+
|
|
176
|
+
score = engagement_score + reach_potential
|
|
177
|
+
|
|
178
|
+
return round(score, 2), round(velocity, 2), round(rt_ratio, 3)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def match_project(tweet_text, search_topic, config):
|
|
182
|
+
"""Match a tweet to the best project based on topic and content."""
|
|
183
|
+
projects = config.get("projects", [])
|
|
184
|
+
|
|
185
|
+
# If search_topic maps to a specific project, use that
|
|
186
|
+
topic_lower = (search_topic or "").lower()
|
|
187
|
+
text_lower = (tweet_text or "").lower()
|
|
188
|
+
|
|
189
|
+
for proj in projects:
|
|
190
|
+
name = proj.get("name", "")
|
|
191
|
+
topics = [t.lower() for t in topics_for_project(name)]
|
|
192
|
+
# Direct topic match
|
|
193
|
+
for t in topics:
|
|
194
|
+
if t in topic_lower or t in text_lower:
|
|
195
|
+
return name
|
|
196
|
+
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def upsert_candidates(tweets, config, batch_id=None, attempts_map=None, scored_sidecar=None):
|
|
201
|
+
"""Score and upsert tweet candidates into DB.
|
|
202
|
+
|
|
203
|
+
If batch_id is provided, also populates T0 engagement columns and tags
|
|
204
|
+
the row with batch_id so Phase 2 of the cycle can re-poll only this batch.
|
|
205
|
+
|
|
206
|
+
If attempts_map is provided (dict keyed by (query, project) -> attempt_id),
|
|
207
|
+
stamps twitter_candidates.search_attempt_id so dashboard per-query stats
|
|
208
|
+
can attribute each posted candidate to the exact discovering search,
|
|
209
|
+
rather than fanning out (batch_id, project_name) across every query the
|
|
210
|
+
batch ran for that project (2026-05-21 bug fix).
|
|
211
|
+
|
|
212
|
+
If scored_sidecar is provided, writes per-query verdict tallies to that
|
|
213
|
+
JSON path so run-twitter-cycle.sh can build the directional
|
|
214
|
+
TRIED_QUERIES_JSON for the next retry attempt's prompt (2026-05-28
|
|
215
|
+
retry-feedback loop). Shape:
|
|
216
|
+
{query_string: {raw, kept_after_age, kept_after_skip}, ...}
|
|
217
|
+
raw = tweets fed to upsert_candidates from the enrich step
|
|
218
|
+
kept_after_age = tweets surviving the FRESHNESS_HOURS_DISCOVER cap
|
|
219
|
+
kept_after_skip = tweets that made it through to api_post insert
|
|
220
|
+
Never raises if the path isn't writable; the verdict step falls back to
|
|
221
|
+
raw == kept_after_age (no all_aged_out distinction) when the sidecar is
|
|
222
|
+
missing.
|
|
223
|
+
|
|
224
|
+
Migrated 2026-05-18 to call the s4l.ai HTTP API:
|
|
225
|
+
- dedup probe -> GET /api/v1/posts/thread-urls?platform=twitter
|
|
226
|
+
- per-tweet upsert -> POST /api/v1/twitter-candidates
|
|
227
|
+
(route handles the ON CONFLICT + peer-cycle race guard server-side)
|
|
228
|
+
- freshness gate -> POST /api/v1/twitter-candidates/expire-stale
|
|
229
|
+
(default 18h window; never deletes rows — status flip only)
|
|
230
|
+
"""
|
|
231
|
+
attempts_map = attempts_map or {}
|
|
232
|
+
# Per-query tally for the scored sidecar. We seed `raw` upfront so a query
|
|
233
|
+
# whose every tweet was dropped (stale, fabricated, ceiling) still shows
|
|
234
|
+
# up with raw>0, kept_after_age=0 -> all_aged_out verdict instead of
|
|
235
|
+
# silently disappearing into the kept_after_skip=0 branch.
|
|
236
|
+
sidecar = {}
|
|
237
|
+
if scored_sidecar:
|
|
238
|
+
for _t in tweets:
|
|
239
|
+
_q = (_t.get("query") or "").strip()
|
|
240
|
+
if not _q:
|
|
241
|
+
continue
|
|
242
|
+
ent = sidecar.setdefault(_q, {"raw": 0, "kept_after_age": 0, "kept_after_skip": 0})
|
|
243
|
+
ent["raw"] += 1
|
|
244
|
+
# Get already-posted thread URLs for dedup. Scope per-account so the mk0r
|
|
245
|
+
# VM running as @matt_diak doesn't skip a tweet that @m13v_ posted on
|
|
246
|
+
# (or vice versa). Falls back to unscoped when the resolver can't pin a
|
|
247
|
+
# handle, which preserves the legacy single-machine behavior.
|
|
248
|
+
_twitter_handle = _resolve_twitter_handle()
|
|
249
|
+
_probe_query = {"platform": "twitter"}
|
|
250
|
+
if _twitter_handle:
|
|
251
|
+
_probe_query["our_account"] = _twitter_handle
|
|
252
|
+
posted_resp = api_get("/api/v1/posts/thread-urls", query=_probe_query)
|
|
253
|
+
posted = set((posted_resp.get("data") or {}).get("thread_urls") or [])
|
|
254
|
+
|
|
255
|
+
# Get already-SKIPPED (tweet_url, project) pairs for the per-project skip
|
|
256
|
+
# gate. Claude explicitly rejected these threads for the matched project in
|
|
257
|
+
# a prior cycle (status='skipped'); since the Phase 2b prompt now reserves
|
|
258
|
+
# 'rejected' for permanent, thread-intrinsic reasons (transient cap / dedup
|
|
259
|
+
# / cooldown deferrals are left pending, never skipped), every skipped row
|
|
260
|
+
# is a genuine rejection safe to suppress from future scans permanently.
|
|
261
|
+
# Per-project so a thread skipped as fazm stays eligible if a later scan
|
|
262
|
+
# matches it to a different project. Fail-open: ok_on_404 + try/except so a
|
|
263
|
+
# missing/unavailable endpoint behaves exactly like the pre-feature cycle
|
|
264
|
+
# (no skip filtering) instead of crashing Phase 1.
|
|
265
|
+
skipped_pairs = set()
|
|
266
|
+
if _twitter_handle:
|
|
267
|
+
try:
|
|
268
|
+
_skip_resp = api_get(
|
|
269
|
+
"/api/v1/twitter-candidates/skipped-urls",
|
|
270
|
+
query={"our_account": _twitter_handle},
|
|
271
|
+
ok_on_404=True,
|
|
272
|
+
)
|
|
273
|
+
if _skip_resp.get("_not_found"):
|
|
274
|
+
# 404: endpoint not deployed yet. Explicit so a 0-pair gate is
|
|
275
|
+
# never mistaken for "loaded the set, nothing matched".
|
|
276
|
+
print(
|
|
277
|
+
f"[skip_gate] fail-open: skipped-urls endpoint 404 "
|
|
278
|
+
f"(not deployed) our_account={_twitter_handle}; "
|
|
279
|
+
f"skip filter inactive this cycle",
|
|
280
|
+
file=sys.stderr,
|
|
281
|
+
flush=True,
|
|
282
|
+
)
|
|
283
|
+
else:
|
|
284
|
+
for _pair in (_skip_resp.get("data") or {}).get("pairs") or []:
|
|
285
|
+
_su = (_pair.get("tweet_url") or "").strip()
|
|
286
|
+
if _su:
|
|
287
|
+
skipped_pairs.add((_su, _pair.get("project")))
|
|
288
|
+
except SystemExit as _skip_err:
|
|
289
|
+
# http_api raises SystemExit on terminal HTTP failure (e.g. a 429
|
|
290
|
+
# rate-limit, which is a 4xx). Fail open: an empty set means the
|
|
291
|
+
# gate is inert this cycle rather than crashing Phase 1. Logged
|
|
292
|
+
# explicitly so an inert gate is distinguishable from a real
|
|
293
|
+
# no-match (both otherwise show "already rejected for project: 0").
|
|
294
|
+
skipped_pairs = set()
|
|
295
|
+
print(
|
|
296
|
+
f"[skip_gate] fail-open: skipped-urls fetch failed "
|
|
297
|
+
f"({_skip_err}); skip filter inactive this cycle",
|
|
298
|
+
file=sys.stderr,
|
|
299
|
+
flush=True,
|
|
300
|
+
)
|
|
301
|
+
# Always emit the loaded size so every cycle self-documents whether the
|
|
302
|
+
# gate had real data (N>0) or fell open (N=0). Pairs with N>0 is the
|
|
303
|
+
# positive proof that the check ran against the live skipped set.
|
|
304
|
+
print(
|
|
305
|
+
f"[skip_gate] loaded {len(skipped_pairs)} skipped (url,project) pairs "
|
|
306
|
+
f"for our_account={_twitter_handle or '(unresolved)'}",
|
|
307
|
+
file=sys.stderr,
|
|
308
|
+
flush=True,
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Skip threads whose author is someone we already follow. We don't need to
|
|
312
|
+
# win over accounts already in our network — the comment buys no new reach.
|
|
313
|
+
# The follow list is harvested out-of-band (scripts/harvest_twitter_following.py
|
|
314
|
+
# scrapes x.com/<handle>/following) and stored server-side; we just read the
|
|
315
|
+
# set here, scoped to our posting handle. Fail-open exactly like the skip gate
|
|
316
|
+
# above: a missing endpoint / 429 / unresolved handle leaves the set empty so
|
|
317
|
+
# the cycle behaves exactly as it did before this guardrail (2026-06-03).
|
|
318
|
+
followed_handles = set()
|
|
319
|
+
_follow_source = "unresolved"
|
|
320
|
+
if _twitter_handle:
|
|
321
|
+
try:
|
|
322
|
+
_foll_resp = api_get(
|
|
323
|
+
"/api/v1/followed-accounts",
|
|
324
|
+
query={"platform": "twitter", "our_account": _twitter_handle},
|
|
325
|
+
ok_on_404=True,
|
|
326
|
+
)
|
|
327
|
+
if _foll_resp.get("_not_found"):
|
|
328
|
+
_follow_source = "404"
|
|
329
|
+
print(
|
|
330
|
+
f"[follow_gate] fail-open: followed-accounts endpoint 404 "
|
|
331
|
+
f"(not deployed) our_account={_twitter_handle}; "
|
|
332
|
+
f"follow filter inactive this cycle",
|
|
333
|
+
file=sys.stderr,
|
|
334
|
+
flush=True,
|
|
335
|
+
)
|
|
336
|
+
else:
|
|
337
|
+
_follow_source = "ok"
|
|
338
|
+
for _fh in (_foll_resp.get("data") or {}).get("handles") or []:
|
|
339
|
+
_fhs = (_fh or "").strip().lstrip("@").lower()
|
|
340
|
+
if _fhs:
|
|
341
|
+
followed_handles.add(_fhs)
|
|
342
|
+
except SystemExit as _foll_err:
|
|
343
|
+
_follow_source = "error"
|
|
344
|
+
followed_handles = set()
|
|
345
|
+
print(
|
|
346
|
+
f"[follow_gate] fail-open: followed-accounts fetch failed "
|
|
347
|
+
f"({_foll_err}); follow filter inactive this cycle",
|
|
348
|
+
file=sys.stderr,
|
|
349
|
+
flush=True,
|
|
350
|
+
)
|
|
351
|
+
print(
|
|
352
|
+
f"[follow_gate] loaded {len(followed_handles)} followed handles "
|
|
353
|
+
f"for our_account={_twitter_handle or '(unresolved)'}",
|
|
354
|
+
file=sys.stderr,
|
|
355
|
+
flush=True,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
inserted = updated = skipped = 0
|
|
359
|
+
skipped_fake_id = 0
|
|
360
|
+
skipped_already_rejected = 0
|
|
361
|
+
skipped_followed_author = 0
|
|
362
|
+
|
|
363
|
+
for tweet in tweets:
|
|
364
|
+
url = (tweet.get("tweet_url") or tweet.get("tweetUrl") or "").strip()
|
|
365
|
+
if not url:
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
# Reject hallucinated snowflake IDs (see looks_like_fabricated_tweet_url
|
|
369
|
+
# docstring). Counted separately so the failure mode is visible in the
|
|
370
|
+
# pipeline log; rolled into `skipped` total for backwards-compat metrics.
|
|
371
|
+
if looks_like_fabricated_tweet_url(url):
|
|
372
|
+
skipped += 1
|
|
373
|
+
skipped_fake_id += 1
|
|
374
|
+
print(f" Drop fabricated snowflake: {url}", file=sys.stderr)
|
|
375
|
+
continue
|
|
376
|
+
|
|
377
|
+
# Skip if we already posted on this thread
|
|
378
|
+
if url in posted:
|
|
379
|
+
skipped += 1
|
|
380
|
+
continue
|
|
381
|
+
|
|
382
|
+
# Skip threads authored by someone we already follow (guardrail
|
|
383
|
+
# 2026-06-03). Same class as the posted-dedup above: an identity-based
|
|
384
|
+
# global skip, independent of project, so it lives here (before age math
|
|
385
|
+
# and scoring). followed_handles is the harvested set loaded once above
|
|
386
|
+
# (empty => gate inert, fail-open). enrich_twitter_candidates.py has
|
|
387
|
+
# already canonicalized tweet["handle"] to the author's screen_name.
|
|
388
|
+
_cand_handle = (tweet.get("handle") or "").strip().lstrip("@").lower()
|
|
389
|
+
if _cand_handle and _cand_handle in followed_handles:
|
|
390
|
+
skipped += 1
|
|
391
|
+
skipped_followed_author += 1
|
|
392
|
+
print(
|
|
393
|
+
f"[follow_gate] skip @{tweet.get('handle')} (followed) url={url}",
|
|
394
|
+
file=sys.stderr,
|
|
395
|
+
flush=True,
|
|
396
|
+
)
|
|
397
|
+
if _fgl:
|
|
398
|
+
_fgl.record_skip(_twitter_handle, _cand_handle, url, batch_id)
|
|
399
|
+
continue
|
|
400
|
+
|
|
401
|
+
# Calculate age
|
|
402
|
+
dt_str = tweet.get("datetime", "")
|
|
403
|
+
if dt_str:
|
|
404
|
+
try:
|
|
405
|
+
posted_at = datetime.fromisoformat(dt_str.replace("Z", "+00:00"))
|
|
406
|
+
age_hours = (datetime.now(timezone.utc) - posted_at).total_seconds() / 3600
|
|
407
|
+
except ValueError:
|
|
408
|
+
posted_at = None
|
|
409
|
+
age_hours = 24 # unknown age, penalize
|
|
410
|
+
else:
|
|
411
|
+
posted_at = None
|
|
412
|
+
age_hours = 24
|
|
413
|
+
|
|
414
|
+
tweet["age_hours"] = age_hours
|
|
415
|
+
tweet["author_followers"] = tweet.get("author_followers", 0)
|
|
416
|
+
|
|
417
|
+
# Hard age cutoff (2026-05-27): defense-in-depth against X's Latest tab
|
|
418
|
+
# silently degrading to "best available" results when our `since_time:`
|
|
419
|
+
# operator yields a sparse window. The pre-search hook
|
|
420
|
+
# (~/.claude/hooks/twitter-search-since-rewrite.py) injects
|
|
421
|
+
# `since_time:<now - FRESHNESS_HOURS_DISCOVER>` into every cycle query,
|
|
422
|
+
# and the harness scrape opens &f=live (Latest tab). In theory those
|
|
423
|
+
# two together cap age at the variant's freshness window. In practice
|
|
424
|
+
# x.com/search?f=live ignores `since_time:` on low-yield queries and
|
|
425
|
+
# falls back to whatever stale tweets it has. Without this cutoff,
|
|
426
|
+
# those stale rows land in twitter_candidates with virality ~0 (the
|
|
427
|
+
# 6h half-life decay floors them), survive into the post_twitter
|
|
428
|
+
# draft prompt, and get chosen when all candidates score near zero.
|
|
429
|
+
# We hard-drop here so they never reach the API, the draft prompt,
|
|
430
|
+
# or any per-row token spend. Reads the same env var the hook reads,
|
|
431
|
+
# so the cutoff matches the variant's window (1h for C/D, 6h for A/B).
|
|
432
|
+
# Falls back to 6h for non-cycle callers (legacy paths). Discovered
|
|
433
|
+
# 2026-05-27 after batches twcycle-20260527-134432 (Mediar) and
|
|
434
|
+
# twcycle-20260527-135430 (paperback-expert) posted under 49-77h-old
|
|
435
|
+
# threads that bypassed both layers.
|
|
436
|
+
try:
|
|
437
|
+
_freshness_cap = int(os.environ.get("FRESHNESS_HOURS_DISCOVER") or "6")
|
|
438
|
+
except ValueError:
|
|
439
|
+
_freshness_cap = 6
|
|
440
|
+
if age_hours > _freshness_cap:
|
|
441
|
+
skipped += 1
|
|
442
|
+
print(
|
|
443
|
+
f"[stale_age_skip] age_hours={age_hours:.1f} cap={_freshness_cap}h "
|
|
444
|
+
f"variant={os.environ.get('TWITTER_CYCLE_VARIANT') or ''} "
|
|
445
|
+
f"url={url}",
|
|
446
|
+
file=sys.stderr,
|
|
447
|
+
flush=True,
|
|
448
|
+
)
|
|
449
|
+
continue
|
|
450
|
+
|
|
451
|
+
# Tally kept_after_age for the verdict sidecar BEFORE the ceiling-D
|
|
452
|
+
# cap below. all_aged_out means "the freshness gate killed everything";
|
|
453
|
+
# ceiling-D is a quality filter that fires after age. Keeping the
|
|
454
|
+
# tallies separate prevents D-cycle queries from looking like aged-out
|
|
455
|
+
# to the next retry's drafter.
|
|
456
|
+
if scored_sidecar:
|
|
457
|
+
_q_age = (tweet.get("query") or "").strip()
|
|
458
|
+
if _q_age and _q_age in sidecar:
|
|
459
|
+
sidecar[_q_age]["kept_after_age"] += 1
|
|
460
|
+
|
|
461
|
+
# Variant D (2026-05-25): 2k-view ceiling cap on parent thread.
|
|
462
|
+
# Bucket analysis on 250+ mature posts showed view-share collapses
|
|
463
|
+
# from ~4% on 500-2k-view threads to ~0.1% on >10k-view threads —
|
|
464
|
+
# our reply is invisible to the audience of large threads. D drops
|
|
465
|
+
# any candidate whose T0 views exceed 2000; A/B/C let everything
|
|
466
|
+
# through unchanged. Comparing posted-quality (views/likes per
|
|
467
|
+
# surviving candidate) between D and C isolates the ceiling effect.
|
|
468
|
+
# No DB row written for rejects: the dashboard already groups by
|
|
469
|
+
# cycle_variant and the stderr marker below captures opportunity
|
|
470
|
+
# cost for later log-based analysis.
|
|
471
|
+
_ceiling_variant = os.environ.get("TWITTER_CYCLE_VARIANT") or ""
|
|
472
|
+
_ceiling_views = tweet.get("views", 0) or 0
|
|
473
|
+
if _ceiling_variant == "D" and _ceiling_views > 2000:
|
|
474
|
+
skipped += 1
|
|
475
|
+
print(
|
|
476
|
+
f"[ceiling_d_skip] views_t0={_ceiling_views} "
|
|
477
|
+
f"likes={tweet.get('likes', 0)} replies={tweet.get('replies', 0)} "
|
|
478
|
+
f"age_hours={age_hours:.2f} url={url}",
|
|
479
|
+
file=sys.stderr,
|
|
480
|
+
flush=True,
|
|
481
|
+
)
|
|
482
|
+
continue
|
|
483
|
+
|
|
484
|
+
score, velocity, rt_ratio = calculate_virality_score(tweet)
|
|
485
|
+
|
|
486
|
+
# Use LLM-assigned project if available, fall back to keyword matching
|
|
487
|
+
project = tweet.get("matched_project") or match_project(
|
|
488
|
+
tweet.get("text", ""),
|
|
489
|
+
tweet.get("search_topic", ""),
|
|
490
|
+
config,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
# Skip threads Claude already explicitly rejected for THIS project
|
|
494
|
+
# (status='skipped'). Per-project: a thread skipped as fazm can still be
|
|
495
|
+
# picked if this scan matched it to a different project, so we key on
|
|
496
|
+
# (url, project) rather than url alone. Done here (not at the posted
|
|
497
|
+
# dedup above) because the project isn't resolved until this point.
|
|
498
|
+
if (url, project) in skipped_pairs:
|
|
499
|
+
skipped += 1
|
|
500
|
+
skipped_already_rejected += 1
|
|
501
|
+
print(
|
|
502
|
+
f" [skipped_already_rejected] {project}: {url}",
|
|
503
|
+
file=sys.stderr,
|
|
504
|
+
)
|
|
505
|
+
continue
|
|
506
|
+
|
|
507
|
+
body = {
|
|
508
|
+
"tweet_url": url,
|
|
509
|
+
"author_handle": tweet.get("handle", ""),
|
|
510
|
+
"author_followers": tweet.get("author_followers", 0),
|
|
511
|
+
"tweet_text": tweet.get("text", "") or "",
|
|
512
|
+
"tweet_posted_at": posted_at.isoformat() if posted_at else None,
|
|
513
|
+
"likes": tweet.get("likes", 0),
|
|
514
|
+
"retweets": tweet.get("retweets", 0),
|
|
515
|
+
"replies": tweet.get("replies", 0),
|
|
516
|
+
"views": tweet.get("views", 0),
|
|
517
|
+
"bookmarks": tweet.get("bookmarks", 0),
|
|
518
|
+
"engagement_velocity": velocity,
|
|
519
|
+
"retweet_ratio": rt_ratio,
|
|
520
|
+
"virality_score": score,
|
|
521
|
+
"search_topic": tweet.get("search_topic", ""),
|
|
522
|
+
"matched_project": project,
|
|
523
|
+
"batch_id": batch_id,
|
|
524
|
+
"discovery_batch_id": batch_id,
|
|
525
|
+
"cycle_variant": os.environ.get("TWITTER_CYCLE_VARIANT") or None,
|
|
526
|
+
# Stamp the machine's Twitter handle so the (tweet_url, our_account)
|
|
527
|
+
# composite unique gives each account its own candidate row.
|
|
528
|
+
# Without this, account A's 'posted' status on tweet X would lock
|
|
529
|
+
# account B out of the same tweet (ON CONFLICT preserved 'posted').
|
|
530
|
+
# Defaults server-side to 'm13v_' if omitted; new callers should
|
|
531
|
+
# always pass it explicitly.
|
|
532
|
+
"our_account": _twitter_handle or "",
|
|
533
|
+
# Repost provenance (2026-06-04). The scan derives the author from
|
|
534
|
+
# the status URL, so author_handle/tweet_url already point at the
|
|
535
|
+
# ORIGINAL tweet; is_repost flags that it surfaced via a repost and
|
|
536
|
+
# reposted_by names the account that reposted. Only sent when the
|
|
537
|
+
# scan evaluated it (presence-detected server-side).
|
|
538
|
+
"is_repost": bool(tweet.get("is_repost", False)),
|
|
539
|
+
"reposted_by": tweet.get("reposted_by", "") or "",
|
|
540
|
+
}
|
|
541
|
+
# Stamp the exact discovering search_attempt when the scanner gave us
|
|
542
|
+
# the literal query that surfaced this tweet AND the log script wrote
|
|
543
|
+
# an attempts map. Dashboard SQL prefers this column over the legacy
|
|
544
|
+
# (batch_id, project_name) fanout, which credits dud queries with
|
|
545
|
+
# posts they never surfaced.
|
|
546
|
+
_q = (tweet.get("query") or "").strip()
|
|
547
|
+
if _q and attempts_map:
|
|
548
|
+
attempt_id = attempts_map.get((_q, project))
|
|
549
|
+
if attempt_id is None:
|
|
550
|
+
attempt_id = attempts_map.get((_q, None))
|
|
551
|
+
if attempt_id is not None:
|
|
552
|
+
body["search_attempt_id"] = int(attempt_id)
|
|
553
|
+
# T0 columns only stamped when this row was discovered inside a cycle
|
|
554
|
+
# batch, mirroring the conditional in the original SQL.
|
|
555
|
+
if batch_id:
|
|
556
|
+
body["likes_t0"] = tweet.get("likes", 0)
|
|
557
|
+
body["retweets_t0"] = tweet.get("retweets", 0)
|
|
558
|
+
body["replies_t0"] = tweet.get("replies", 0)
|
|
559
|
+
body["views_t0"] = tweet.get("views", 0)
|
|
560
|
+
body["bookmarks_t0"] = tweet.get("bookmarks", 0)
|
|
561
|
+
|
|
562
|
+
try:
|
|
563
|
+
api_post("/api/v1/twitter-candidates", body)
|
|
564
|
+
inserted += 1
|
|
565
|
+
if scored_sidecar:
|
|
566
|
+
_q_kept = (tweet.get("query") or "").strip()
|
|
567
|
+
if _q_kept and _q_kept in sidecar:
|
|
568
|
+
sidecar[_q_kept]["kept_after_skip"] += 1
|
|
569
|
+
try:
|
|
570
|
+
_tweet_iso = body.get("tweet_posted_at") or body.get("tweet_created_at") or ""
|
|
571
|
+
_disc_iso = body.get("discovered_at") or body.get("created_at") or ""
|
|
572
|
+
_url = body.get("tweet_url") or body.get("url") or ""
|
|
573
|
+
_age_h = ""
|
|
574
|
+
if _tweet_iso and _disc_iso:
|
|
575
|
+
from datetime import datetime as _dt
|
|
576
|
+
try:
|
|
577
|
+
_t = _dt.fromisoformat(_tweet_iso.replace("Z", "+00:00"))
|
|
578
|
+
_d = _dt.fromisoformat(_disc_iso.replace("Z", "+00:00"))
|
|
579
|
+
_age_h = f"{(_d.timestamp() - _t.timestamp()) / 3600:.2f}"
|
|
580
|
+
except Exception:
|
|
581
|
+
_age_h = ""
|
|
582
|
+
print(
|
|
583
|
+
f"[twitter_discovery] batch_id={batch_id} "
|
|
584
|
+
f"discovery_batch_id={batch_id} "
|
|
585
|
+
f"cycle_variant={os.environ.get('TWITTER_CYCLE_VARIANT') or ''} "
|
|
586
|
+
f"tweet_age_hours={_age_h} discovered_at={_disc_iso} url={_url}",
|
|
587
|
+
file=sys.stderr,
|
|
588
|
+
flush=True,
|
|
589
|
+
)
|
|
590
|
+
except Exception:
|
|
591
|
+
pass
|
|
592
|
+
except SystemExit as e:
|
|
593
|
+
# http_api raises SystemExit on terminal failure. Keep iterating;
|
|
594
|
+
# the cycle should not die because one URL hit a 4xx validation
|
|
595
|
+
# edge case.
|
|
596
|
+
print(f" Error inserting {url}: {e}", file=sys.stderr)
|
|
597
|
+
continue
|
|
598
|
+
|
|
599
|
+
# Expire old pending candidates past the freshness window. This is a
|
|
600
|
+
# freshness GATE (status flip), not a delete — we keep the row forever
|
|
601
|
+
# for analytics.
|
|
602
|
+
api_post("/api/v1/twitter-candidates/expire-stale", {"freshness_hours": EXPIRE_FRESHNESS_HOURS})
|
|
603
|
+
|
|
604
|
+
# NO PRUNING. We keep every twitter_candidates row forever (chosen, skipped,
|
|
605
|
+
# expired) so we can audit project routing, skip reasons, growth dynamics,
|
|
606
|
+
# and engagement curves over time. Per user instruction (2026-05-08): never
|
|
607
|
+
# add DELETE-by-age back here, regardless of retention window.
|
|
608
|
+
|
|
609
|
+
if _fgl:
|
|
610
|
+
_fgl.record_cycle(_twitter_handle, len(followed_handles), _follow_source, len(tweets), skipped_followed_author, batch_id)
|
|
611
|
+
print(f"Scored: {inserted} upserted, {skipped} skipped (already posted or fabricated ID: {skipped_fake_id}, already rejected for project: {skipped_already_rejected}, followed authors: {skipped_followed_author})")
|
|
612
|
+
|
|
613
|
+
# Emit the verdict sidecar for the retry loop's directional feedback. Best
|
|
614
|
+
# effort: never fatal if the path is unwritable, never overwrites the
|
|
615
|
+
# cycle's other state.
|
|
616
|
+
if scored_sidecar:
|
|
617
|
+
try:
|
|
618
|
+
with open(scored_sidecar, "w") as fh:
|
|
619
|
+
json.dump(sidecar, fh)
|
|
620
|
+
print(
|
|
621
|
+
f"scored_sidecar: wrote {len(sidecar)} query verdicts -> {scored_sidecar}",
|
|
622
|
+
file=sys.stderr,
|
|
623
|
+
)
|
|
624
|
+
except OSError as e:
|
|
625
|
+
print(
|
|
626
|
+
f"scored_sidecar: could not write {scored_sidecar}: {e}",
|
|
627
|
+
file=sys.stderr,
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
return inserted
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def main():
|
|
634
|
+
parser = argparse.ArgumentParser()
|
|
635
|
+
parser.add_argument("--file", help="Read tweets from JSON file instead of stdin")
|
|
636
|
+
parser.add_argument("--expire-only", action="store_true", help="Only expire stale pending rows (status flip; no row deletion)")
|
|
637
|
+
parser.add_argument("--batch-id", help="Tag these candidates with a batch id and populate T0 columns")
|
|
638
|
+
parser.add_argument(
|
|
639
|
+
"--attempts",
|
|
640
|
+
help="Path to JSON list [{query, project, attempt_id}, ...] from "
|
|
641
|
+
"log_twitter_search_attempts.py --attempts-out. When provided, "
|
|
642
|
+
"stamps twitter_candidates.search_attempt_id per tweet so the "
|
|
643
|
+
"dashboard can attribute posts to the exact discovering query.",
|
|
644
|
+
)
|
|
645
|
+
parser.add_argument(
|
|
646
|
+
"--scored-sidecar",
|
|
647
|
+
help="Path to write per-query verdict tallies for the retry loop "
|
|
648
|
+
"feedback (2026-05-28). Shape: {query: {raw, kept_after_age, "
|
|
649
|
+
"kept_after_skip}, ...}. Consumed by run-twitter-cycle.sh to "
|
|
650
|
+
"build the directional TRIED_QUERIES_JSON for the next attempt's "
|
|
651
|
+
"drafter prompt.",
|
|
652
|
+
)
|
|
653
|
+
args = parser.parse_args()
|
|
654
|
+
|
|
655
|
+
config_path = os.path.expanduser("~/social-autoposter/config.json")
|
|
656
|
+
config = {}
|
|
657
|
+
if os.path.exists(config_path):
|
|
658
|
+
with open(config_path) as f:
|
|
659
|
+
config = json.load(f)
|
|
660
|
+
|
|
661
|
+
if args.expire_only:
|
|
662
|
+
# Freshness gate only. NO PRUNING — see note in upsert_candidates().
|
|
663
|
+
# Server-side route runs the same UPDATE atomically; client just kicks
|
|
664
|
+
# it off and prints the count.
|
|
665
|
+
resp = api_post(
|
|
666
|
+
"/api/v1/twitter-candidates/expire-stale",
|
|
667
|
+
{"freshness_hours": EXPIRE_FRESHNESS_HOURS},
|
|
668
|
+
)
|
|
669
|
+
expired = (resp.get("data") or {}).get("expired_count", 0)
|
|
670
|
+
print(f"Expired {expired} old pending candidates (no row deletion)")
|
|
671
|
+
return
|
|
672
|
+
|
|
673
|
+
if args.file:
|
|
674
|
+
with open(args.file) as f:
|
|
675
|
+
tweets = json.load(f)
|
|
676
|
+
else:
|
|
677
|
+
tweets = json.load(sys.stdin)
|
|
678
|
+
|
|
679
|
+
if not isinstance(tweets, list):
|
|
680
|
+
tweets = [tweets]
|
|
681
|
+
|
|
682
|
+
attempts_map = {}
|
|
683
|
+
if args.attempts and os.path.exists(args.attempts):
|
|
684
|
+
try:
|
|
685
|
+
with open(args.attempts) as f:
|
|
686
|
+
rows = json.load(f)
|
|
687
|
+
for r in rows or []:
|
|
688
|
+
if not isinstance(r, dict):
|
|
689
|
+
continue
|
|
690
|
+
q = (r.get("query") or "").strip()
|
|
691
|
+
aid = r.get("attempt_id")
|
|
692
|
+
if not q or aid is None:
|
|
693
|
+
continue
|
|
694
|
+
proj = r.get("project") or None
|
|
695
|
+
attempts_map[(q, proj)] = int(aid)
|
|
696
|
+
print(
|
|
697
|
+
f"score_twitter_candidates: loaded {len(attempts_map)} "
|
|
698
|
+
f"(query, project) -> attempt_id entries from {args.attempts}",
|
|
699
|
+
file=sys.stderr,
|
|
700
|
+
)
|
|
701
|
+
except (OSError, ValueError) as e:
|
|
702
|
+
print(
|
|
703
|
+
f"score_twitter_candidates: could not read attempts map "
|
|
704
|
+
f"{args.attempts}: {e}",
|
|
705
|
+
file=sys.stderr,
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
upsert_candidates(
|
|
709
|
+
tweets,
|
|
710
|
+
config,
|
|
711
|
+
batch_id=args.batch_id,
|
|
712
|
+
attempts_map=attempts_map,
|
|
713
|
+
scored_sidecar=args.scored_sidecar,
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
if __name__ == "__main__":
|
|
718
|
+
main()
|