@m13v/s4l 1.6.197-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1314 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +497 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,761 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Daily reply-risk digest.
|
|
3
|
+
|
|
4
|
+
Scans recent inbound replies to our social replies, keeps the model's
|
|
5
|
+
existing skip/status classification in view, adds surrounding DB context, and
|
|
6
|
+
emails a concise daily digest of risks, learnings, and drafting suggestions.
|
|
7
|
+
|
|
8
|
+
The core pass is deterministic and read-only:
|
|
9
|
+
- replies: inbound text, our follow-up, status, skip_reason, engagement
|
|
10
|
+
- parent replies: context for true depth-2+ replies under our reply
|
|
11
|
+
- posts/mentions: stored thread and notification context
|
|
12
|
+
- author_blocklist + author history: account-level risk context
|
|
13
|
+
|
|
14
|
+
By default the script asks Claude to summarize the compact JSON envelope into
|
|
15
|
+
a plain-text operator email. If Claude is unavailable, it falls back to a
|
|
16
|
+
deterministic report so the daily pipeline still produces something useful.
|
|
17
|
+
|
|
18
|
+
Usage:
|
|
19
|
+
python3 scripts/reply_risk_digest.py --dry-run --no-claude
|
|
20
|
+
python3 scripts/reply_risk_digest.py --hours 24
|
|
21
|
+
python3 scripts/reply_risk_digest.py --platform all --dry-run
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import argparse
|
|
27
|
+
import atexit
|
|
28
|
+
import base64
|
|
29
|
+
import html
|
|
30
|
+
import json
|
|
31
|
+
import os
|
|
32
|
+
import re
|
|
33
|
+
import subprocess
|
|
34
|
+
import sys
|
|
35
|
+
import time
|
|
36
|
+
from collections import Counter, defaultdict
|
|
37
|
+
from datetime import datetime, timezone
|
|
38
|
+
from email.mime.text import MIMEText
|
|
39
|
+
from pathlib import Path
|
|
40
|
+
from typing import Any
|
|
41
|
+
|
|
42
|
+
REPO_DIR = Path(__file__).resolve().parent.parent
|
|
43
|
+
SCRIPT_DIR = REPO_DIR / "scripts"
|
|
44
|
+
sys.path.insert(0, str(SCRIPT_DIR))
|
|
45
|
+
|
|
46
|
+
from db import load_env # noqa: E402
|
|
47
|
+
from db_direct import get_conn # noqa: E402
|
|
48
|
+
|
|
49
|
+
RUN_STARTED = time.time()
|
|
50
|
+
SCRIPT_TAG = "reply-risk-digest"
|
|
51
|
+
OPERATOR_EMAIL = "i@m13v.com"
|
|
52
|
+
GMAIL_TOKEN_PATH = os.path.expanduser("~/gmail-api/token_i_at_m13v.com.json")
|
|
53
|
+
GMAIL_SCOPES = ["https://mail.google.com/"]
|
|
54
|
+
RUN_CLAUDE_PATH = REPO_DIR / "scripts" / "run_claude.sh"
|
|
55
|
+
|
|
56
|
+
RISK_RULES = [
|
|
57
|
+
("bot_callout", re.compile(r"\b(ai reply|ai bot|nice ai bot|bot account|are you a bot|automated ai|automated reply|fully automated|llm write|llm[- ]generated)\b", re.I), 7),
|
|
58
|
+
("ai_slop", re.compile(r"\b(ai slop|slop reply|slop\b|clanker response|awful ai reply)\b", re.I), 7),
|
|
59
|
+
("spam_callout", re.compile(r"\b(spam people|spam your|spamming|nobody needs your ai replies|treat me like a human|blocked for ai reply|auto[- ]reply block)\b", re.I), 8),
|
|
60
|
+
("hostile", re.compile(r"\b(fuck|fucking|bullshit|\bbs\b|scam|grift|trash|garbage|red flags?|what the hell|did i ask|stop it|lying|mentir|faux|fake)\b", re.I), 5),
|
|
61
|
+
("bot_detection_bait", re.compile(r"\b(write me a poem|ignore previous|prompt injection|banana|bananas)\b", re.I), 6),
|
|
62
|
+
("product_takedown", re.compile(r"\b(red flags?|no usen este producto|inferior|landing gen[eé]rica|page objects|propuesta de valor)\b", re.I), 5),
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
SKIP_RISK_HINTS = [
|
|
66
|
+
("hostile_user", 7),
|
|
67
|
+
("human_called_out_ai_reply", 9),
|
|
68
|
+
("ai_bot_callout", 9),
|
|
69
|
+
("user_explicitly_objected_to_ai_replies", 9),
|
|
70
|
+
("reply_hidden_flagged_as_ai", 9),
|
|
71
|
+
("llm_accusation_bait", 8),
|
|
72
|
+
("troll/bot-detection bait", 8),
|
|
73
|
+
("drive_by_mock", 6),
|
|
74
|
+
("templated_bot_reply", 6),
|
|
75
|
+
("author_blocked_us", 5),
|
|
76
|
+
("blocklist_added", 5),
|
|
77
|
+
("engagement_loop", 4),
|
|
78
|
+
("hostile_unsubstantive_rant", 4),
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
POSITIVE_RE = re.compile(
|
|
82
|
+
r"\b(thanks|thank you|appreciate|agree|exactly|makes sense|fair point|"
|
|
83
|
+
r"good point|smart|interesting|love|cool|great|true|correct)\b",
|
|
84
|
+
re.I,
|
|
85
|
+
)
|
|
86
|
+
QUESTION_RE = re.compile(r"\?")
|
|
87
|
+
LINK_RE = re.compile(r"https?://", re.I)
|
|
88
|
+
AI_DISCLOSURE_RE = re.compile(r"\bwritten with (ai|s4lai)\b", re.I)
|
|
89
|
+
PRODUCT_RE = re.compile(
|
|
90
|
+
r"\b(fazm|assrt|s4l|s4lai|podlog|claude-meter|runner|nightowl|cyrano|blurt)\b",
|
|
91
|
+
re.I,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _emit_run_log() -> None:
|
|
96
|
+
elapsed = max(0, int(time.time() - RUN_STARTED))
|
|
97
|
+
subprocess.run(
|
|
98
|
+
[
|
|
99
|
+
"python3",
|
|
100
|
+
str(REPO_DIR / "scripts" / "log_run.py"),
|
|
101
|
+
"--script",
|
|
102
|
+
SCRIPT_TAG,
|
|
103
|
+
"--posted",
|
|
104
|
+
"0",
|
|
105
|
+
"--skipped",
|
|
106
|
+
"0",
|
|
107
|
+
"--failed",
|
|
108
|
+
"0",
|
|
109
|
+
"--cost",
|
|
110
|
+
"0",
|
|
111
|
+
"--elapsed",
|
|
112
|
+
str(elapsed),
|
|
113
|
+
],
|
|
114
|
+
check=False,
|
|
115
|
+
stdout=subprocess.DEVNULL,
|
|
116
|
+
stderr=subprocess.DEVNULL,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
atexit.register(_emit_run_log)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _clip(text: str | None, limit: int = 700) -> str:
|
|
124
|
+
if not text:
|
|
125
|
+
return ""
|
|
126
|
+
one_line = re.sub(r"\s+", " ", str(text)).strip()
|
|
127
|
+
if len(one_line) <= limit:
|
|
128
|
+
return one_line
|
|
129
|
+
return one_line[: limit - 1].rstrip() + "…"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _as_iso(value: Any) -> str | None:
|
|
133
|
+
if value is None:
|
|
134
|
+
return None
|
|
135
|
+
if hasattr(value, "isoformat"):
|
|
136
|
+
return value.isoformat()
|
|
137
|
+
return str(value)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _gmail_service():
|
|
141
|
+
from google.auth.transport.requests import Request
|
|
142
|
+
from google.oauth2.credentials import Credentials
|
|
143
|
+
from googleapiclient.discovery import build
|
|
144
|
+
|
|
145
|
+
creds = Credentials.from_authorized_user_file(GMAIL_TOKEN_PATH, GMAIL_SCOPES)
|
|
146
|
+
if creds.expired and creds.refresh_token:
|
|
147
|
+
creds.refresh(Request())
|
|
148
|
+
with open(GMAIL_TOKEN_PATH, "w") as f:
|
|
149
|
+
f.write(creds.to_json())
|
|
150
|
+
return build("gmail", "v1", credentials=creds)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _send_email(to_addr: str, subject: str, body: str):
|
|
154
|
+
msg = MIMEText(body, "plain", "utf-8")
|
|
155
|
+
msg["to"] = to_addr
|
|
156
|
+
msg["from"] = OPERATOR_EMAIL
|
|
157
|
+
msg["subject"] = subject
|
|
158
|
+
raw = base64.urlsafe_b64encode(msg.as_bytes()).decode("utf-8")
|
|
159
|
+
return (
|
|
160
|
+
_gmail_service()
|
|
161
|
+
.users()
|
|
162
|
+
.messages()
|
|
163
|
+
.send(userId="me", body={"raw": raw})
|
|
164
|
+
.execute()
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def fetch_reply_rows(db, platform: str, hours: int, limit: int) -> list[dict[str, Any]]:
|
|
169
|
+
where = ["r.discovered_at >= NOW() - (%s * INTERVAL '1 hour')"]
|
|
170
|
+
params: list[Any] = [int(hours)]
|
|
171
|
+
if platform != "all":
|
|
172
|
+
where.append("r.platform = %s")
|
|
173
|
+
params.append(platform)
|
|
174
|
+
params.append(int(limit))
|
|
175
|
+
sql = f"""
|
|
176
|
+
SELECT
|
|
177
|
+
r.id, r.platform, r.depth, r.parent_reply_id, r.post_id, r.mention_id,
|
|
178
|
+
r.status, r.skip_reason, r.their_author, r.their_content,
|
|
179
|
+
r.their_comment_url, r.our_reply_id, r.our_reply_content,
|
|
180
|
+
r.our_reply_url, r.our_account, r.thread_author_handle,
|
|
181
|
+
r.discovered_at, r.replied_at, r.processing_at, r.project_name,
|
|
182
|
+
r.engagement_style, r.language, r.model, r.claude_session_id,
|
|
183
|
+
r.is_recommendation, r.campaign_id, r.upvotes, r.comments_count,
|
|
184
|
+
r.views, r.engagement_updated_at, r.autoposter_version,
|
|
185
|
+
p.thread_url, p.thread_author, p.thread_author_handle AS post_thread_author_handle,
|
|
186
|
+
p.thread_title, p.thread_content, p.thread_engagement,
|
|
187
|
+
p.top_comment_author, p.top_comment_content, p.top_comment_url,
|
|
188
|
+
p.our_content AS original_our_content, p.our_url AS original_our_url,
|
|
189
|
+
p.project_name AS post_project_name, p.search_topic,
|
|
190
|
+
m.mentioning_url, m.mentioning_handle, m.mentioning_text,
|
|
191
|
+
m.parent_views, m.parent_likes, m.parent_retweets,
|
|
192
|
+
pr.their_author AS parent_their_author,
|
|
193
|
+
pr.their_content AS parent_their_content,
|
|
194
|
+
pr.their_comment_url AS parent_their_comment_url,
|
|
195
|
+
pr.our_reply_content AS parent_our_reply_content,
|
|
196
|
+
pr.our_reply_url AS parent_our_reply_url,
|
|
197
|
+
pr.status AS parent_status,
|
|
198
|
+
pr.skip_reason AS parent_skip_reason,
|
|
199
|
+
pr.engagement_style AS parent_engagement_style
|
|
200
|
+
FROM replies r
|
|
201
|
+
LEFT JOIN posts p ON p.id = r.post_id
|
|
202
|
+
LEFT JOIN mentions m ON m.id = r.mention_id
|
|
203
|
+
LEFT JOIN replies pr ON pr.id = r.parent_reply_id
|
|
204
|
+
WHERE {" AND ".join(where)}
|
|
205
|
+
ORDER BY r.discovered_at DESC NULLS LAST, r.id DESC
|
|
206
|
+
LIMIT %s
|
|
207
|
+
"""
|
|
208
|
+
cur = db.execute(sql, params)
|
|
209
|
+
return [dict(r) for r in cur.fetchall()]
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def fetch_author_history(
|
|
213
|
+
db, rows: list[dict[str, Any]], platform: str, days: int = 30
|
|
214
|
+
) -> dict[str, dict[str, Any]]:
|
|
215
|
+
handles = {
|
|
216
|
+
(r.get("their_author") or "").lower()
|
|
217
|
+
for r in rows
|
|
218
|
+
if r.get("their_author")
|
|
219
|
+
}
|
|
220
|
+
if not handles:
|
|
221
|
+
return {}
|
|
222
|
+
|
|
223
|
+
where = ["discovered_at >= NOW() - (%s * INTERVAL '1 day')"]
|
|
224
|
+
params: list[Any] = [int(days)]
|
|
225
|
+
if platform != "all":
|
|
226
|
+
where.append("platform = %s")
|
|
227
|
+
params.append(platform)
|
|
228
|
+
|
|
229
|
+
cur = db.execute(
|
|
230
|
+
f"""
|
|
231
|
+
SELECT platform, their_author, status, skip_reason, discovered_at,
|
|
232
|
+
upvotes, comments_count, views
|
|
233
|
+
FROM replies
|
|
234
|
+
WHERE {" AND ".join(where)}
|
|
235
|
+
""",
|
|
236
|
+
params,
|
|
237
|
+
)
|
|
238
|
+
history: dict[str, dict[str, Any]] = {
|
|
239
|
+
h: {
|
|
240
|
+
"last_30d": 0,
|
|
241
|
+
"replied": 0,
|
|
242
|
+
"skipped": 0,
|
|
243
|
+
"riskish_skips": 0,
|
|
244
|
+
"child_replies": 0,
|
|
245
|
+
"upvotes": 0,
|
|
246
|
+
"views": 0,
|
|
247
|
+
"last_seen_at": None,
|
|
248
|
+
}
|
|
249
|
+
for h in handles
|
|
250
|
+
}
|
|
251
|
+
for raw in cur.fetchall():
|
|
252
|
+
r = dict(raw)
|
|
253
|
+
handle = (r.get("their_author") or "").lower()
|
|
254
|
+
if handle not in history:
|
|
255
|
+
continue
|
|
256
|
+
h = history[handle]
|
|
257
|
+
h["last_30d"] += 1
|
|
258
|
+
if r.get("status") == "replied":
|
|
259
|
+
h["replied"] += 1
|
|
260
|
+
if r.get("status") == "skipped":
|
|
261
|
+
h["skipped"] += 1
|
|
262
|
+
if _skip_reason_risk_score(r.get("skip_reason") or "") >= 4:
|
|
263
|
+
h["riskish_skips"] += 1
|
|
264
|
+
h["child_replies"] += int(r.get("comments_count") or 0)
|
|
265
|
+
h["upvotes"] += int(r.get("upvotes") or 0)
|
|
266
|
+
h["views"] += int(r.get("views") or 0)
|
|
267
|
+
seen = _as_iso(r.get("discovered_at"))
|
|
268
|
+
if seen and (not h["last_seen_at"] or seen > h["last_seen_at"]):
|
|
269
|
+
h["last_seen_at"] = seen
|
|
270
|
+
|
|
271
|
+
block_where = []
|
|
272
|
+
block_params: list[Any] = []
|
|
273
|
+
if platform != "all":
|
|
274
|
+
block_where.append("platform = %s")
|
|
275
|
+
block_params.append(platform)
|
|
276
|
+
block_sql = "SELECT platform, handle, classification, severity, reason, source_reply_id, created_at, updated_at, hit_count FROM author_blocklist"
|
|
277
|
+
if block_where:
|
|
278
|
+
block_sql += " WHERE " + " AND ".join(block_where)
|
|
279
|
+
try:
|
|
280
|
+
cur = db.execute(block_sql, block_params if block_params else None)
|
|
281
|
+
for raw in cur.fetchall():
|
|
282
|
+
b = dict(raw)
|
|
283
|
+
handle = (b.get("handle") or "").lower()
|
|
284
|
+
if handle not in history:
|
|
285
|
+
continue
|
|
286
|
+
history[handle]["blocklist"] = {
|
|
287
|
+
"platform": b.get("platform"),
|
|
288
|
+
"classification": b.get("classification"),
|
|
289
|
+
"severity": b.get("severity"),
|
|
290
|
+
"reason": b.get("reason"),
|
|
291
|
+
"source_reply_id": b.get("source_reply_id"),
|
|
292
|
+
"hit_count": b.get("hit_count"),
|
|
293
|
+
"created_at": _as_iso(b.get("created_at")),
|
|
294
|
+
"updated_at": _as_iso(b.get("updated_at")),
|
|
295
|
+
}
|
|
296
|
+
except Exception as e:
|
|
297
|
+
for h in history.values():
|
|
298
|
+
h["blocklist_error"] = str(e)
|
|
299
|
+
return history
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _skip_reason_risk_score(skip_reason: str) -> int:
|
|
303
|
+
reason = (skip_reason or "").lower()
|
|
304
|
+
score = 0
|
|
305
|
+
for marker, points in SKIP_RISK_HINTS:
|
|
306
|
+
if marker.lower() in reason:
|
|
307
|
+
score += points
|
|
308
|
+
return score
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def classify_row(row: dict[str, Any], author_history: dict[str, dict[str, Any]]):
|
|
312
|
+
inbound_and_classification = " ".join(
|
|
313
|
+
[
|
|
314
|
+
row.get("their_content") or "",
|
|
315
|
+
row.get("skip_reason") or "",
|
|
316
|
+
]
|
|
317
|
+
)
|
|
318
|
+
risk_score = _skip_reason_risk_score(row.get("skip_reason") or "")
|
|
319
|
+
insight_score = 0
|
|
320
|
+
tags: list[str] = []
|
|
321
|
+
|
|
322
|
+
for tag, pattern, points in RISK_RULES:
|
|
323
|
+
if pattern.search(inbound_and_classification):
|
|
324
|
+
tags.append(tag)
|
|
325
|
+
risk_score += points
|
|
326
|
+
|
|
327
|
+
if row.get("status") == "skipped" and row.get("skip_reason"):
|
|
328
|
+
tags.append("model_skipped")
|
|
329
|
+
risk_score += 1
|
|
330
|
+
if row.get("parent_reply_id") is not None or int(row.get("depth") or 1) > 1:
|
|
331
|
+
tags.append("true_nested_followup")
|
|
332
|
+
else:
|
|
333
|
+
tags.append("notification_capture")
|
|
334
|
+
|
|
335
|
+
our_text = row.get("our_reply_content") or ""
|
|
336
|
+
parent_our_text = row.get("parent_our_reply_content") or ""
|
|
337
|
+
if AI_DISCLOSURE_RE.search(our_text) or AI_DISCLOSURE_RE.search(parent_our_text):
|
|
338
|
+
tags.append("ai_disclosure_present")
|
|
339
|
+
risk_score += 1
|
|
340
|
+
if LINK_RE.search(our_text) or PRODUCT_RE.search(our_text):
|
|
341
|
+
tags.append("our_followup_productish")
|
|
342
|
+
risk_score += 1
|
|
343
|
+
if LINK_RE.search(parent_our_text) or PRODUCT_RE.search(parent_our_text):
|
|
344
|
+
tags.append("trigger_parent_productish")
|
|
345
|
+
risk_score += 1
|
|
346
|
+
|
|
347
|
+
if QUESTION_RE.search(row.get("their_content") or ""):
|
|
348
|
+
tags.append("question")
|
|
349
|
+
insight_score += 2
|
|
350
|
+
if POSITIVE_RE.search(row.get("their_content") or ""):
|
|
351
|
+
tags.append("positive_signal")
|
|
352
|
+
insight_score += 1
|
|
353
|
+
child_replies = int(row.get("comments_count") or 0)
|
|
354
|
+
likes = int(row.get("upvotes") or 0)
|
|
355
|
+
views = int(row.get("views") or 0)
|
|
356
|
+
if child_replies:
|
|
357
|
+
tags.append("our_followup_got_child_reply")
|
|
358
|
+
insight_score += min(5, child_replies + 1)
|
|
359
|
+
if likes:
|
|
360
|
+
insight_score += min(3, likes)
|
|
361
|
+
if views >= 50:
|
|
362
|
+
insight_score += 1
|
|
363
|
+
|
|
364
|
+
hist = author_history.get((row.get("their_author") or "").lower()) or {}
|
|
365
|
+
block = hist.get("blocklist")
|
|
366
|
+
if block:
|
|
367
|
+
if block.get("classification") == "velocity_auto":
|
|
368
|
+
tags.append("author_velocity_blocked")
|
|
369
|
+
risk_score += 1
|
|
370
|
+
else:
|
|
371
|
+
tags.append("author_blocklisted")
|
|
372
|
+
risk_score += 5
|
|
373
|
+
if hist.get("riskish_skips", 0) >= 2:
|
|
374
|
+
tags.append("repeat_risk_author")
|
|
375
|
+
risk_score += 2
|
|
376
|
+
if hist.get("last_30d", 0) >= 4 and hist.get("riskish_skips", 0) == 0:
|
|
377
|
+
tags.append("repeat_constructive_author")
|
|
378
|
+
insight_score += 1
|
|
379
|
+
|
|
380
|
+
return {
|
|
381
|
+
"risk_score": risk_score,
|
|
382
|
+
"insight_score": insight_score,
|
|
383
|
+
"tags": sorted(set(tags)),
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def compact_row(row: dict[str, Any], author_history: dict[str, dict[str, Any]]):
|
|
388
|
+
handle = (row.get("their_author") or "").lower()
|
|
389
|
+
return {
|
|
390
|
+
"id": row.get("id"),
|
|
391
|
+
"platform": row.get("platform"),
|
|
392
|
+
"discovered_at": _as_iso(row.get("discovered_at")),
|
|
393
|
+
"depth": row.get("depth"),
|
|
394
|
+
"parent_reply_id": row.get("parent_reply_id"),
|
|
395
|
+
"status": row.get("status"),
|
|
396
|
+
"skip_reason": row.get("skip_reason"),
|
|
397
|
+
"classification": row.get("_classification"),
|
|
398
|
+
"author": row.get("their_author"),
|
|
399
|
+
"author_history": author_history.get(handle) or {},
|
|
400
|
+
"inbound_reply": {
|
|
401
|
+
"text": _clip(row.get("their_content"), 900),
|
|
402
|
+
"url": row.get("their_comment_url"),
|
|
403
|
+
"views": int(row.get("views") or 0),
|
|
404
|
+
"likes": int(row.get("upvotes") or 0),
|
|
405
|
+
"child_replies": int(row.get("comments_count") or 0),
|
|
406
|
+
},
|
|
407
|
+
"our_followup_to_inbound": {
|
|
408
|
+
"text": _clip(row.get("our_reply_content"), 900),
|
|
409
|
+
"url": row.get("our_reply_url"),
|
|
410
|
+
"style": row.get("engagement_style"),
|
|
411
|
+
"model": row.get("model"),
|
|
412
|
+
"replied_at": _as_iso(row.get("replied_at")),
|
|
413
|
+
"campaign_id": row.get("campaign_id"),
|
|
414
|
+
"autoposter_version": row.get("autoposter_version"),
|
|
415
|
+
},
|
|
416
|
+
"stored_parent_context": {
|
|
417
|
+
"parent_author": row.get("parent_their_author"),
|
|
418
|
+
"parent_inbound_text": _clip(row.get("parent_their_content"), 600),
|
|
419
|
+
"parent_inbound_url": row.get("parent_their_comment_url"),
|
|
420
|
+
"parent_our_reply_text": _clip(row.get("parent_our_reply_content"), 900),
|
|
421
|
+
"parent_our_reply_url": row.get("parent_our_reply_url"),
|
|
422
|
+
"parent_status": row.get("parent_status"),
|
|
423
|
+
"parent_skip_reason": row.get("parent_skip_reason"),
|
|
424
|
+
"parent_style": row.get("parent_engagement_style"),
|
|
425
|
+
},
|
|
426
|
+
"thread_context": {
|
|
427
|
+
"thread_url": row.get("thread_url"),
|
|
428
|
+
"thread_author": row.get("thread_author"),
|
|
429
|
+
"thread_author_handle": (
|
|
430
|
+
row.get("thread_author_handle") or row.get("post_thread_author_handle")
|
|
431
|
+
),
|
|
432
|
+
"thread_title": _clip(row.get("thread_title"), 240),
|
|
433
|
+
"thread_content": _clip(row.get("thread_content"), 800),
|
|
434
|
+
"thread_engagement": row.get("thread_engagement"),
|
|
435
|
+
"top_comment_author": row.get("top_comment_author"),
|
|
436
|
+
"top_comment_content": _clip(row.get("top_comment_content"), 450),
|
|
437
|
+
"top_comment_url": row.get("top_comment_url"),
|
|
438
|
+
"original_our_content": _clip(row.get("original_our_content"), 700),
|
|
439
|
+
"original_our_url": row.get("original_our_url"),
|
|
440
|
+
"search_topic": row.get("search_topic"),
|
|
441
|
+
},
|
|
442
|
+
"mention_context": {
|
|
443
|
+
"mentioning_url": row.get("mentioning_url"),
|
|
444
|
+
"mentioning_handle": row.get("mentioning_handle"),
|
|
445
|
+
"mentioning_text": _clip(row.get("mentioning_text"), 500),
|
|
446
|
+
"parent_views": row.get("parent_views"),
|
|
447
|
+
"parent_likes": row.get("parent_likes"),
|
|
448
|
+
"parent_retweets": row.get("parent_retweets"),
|
|
449
|
+
},
|
|
450
|
+
"context_gaps": [
|
|
451
|
+
gap
|
|
452
|
+
for gap, missing in [
|
|
453
|
+
("no_post_thread_context", not row.get("post_id")),
|
|
454
|
+
("no_parent_reply_context", not row.get("parent_reply_id")),
|
|
455
|
+
("no_our_followup_because_skipped", row.get("status") == "skipped" and not row.get("our_reply_content")),
|
|
456
|
+
]
|
|
457
|
+
if missing
|
|
458
|
+
],
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def build_envelope(rows: list[dict[str, Any]], author_history: dict[str, dict[str, Any]], args):
|
|
463
|
+
for row in rows:
|
|
464
|
+
row["_classification"] = classify_row(row, author_history)
|
|
465
|
+
|
|
466
|
+
platform_counts = Counter(r.get("platform") or "unknown" for r in rows)
|
|
467
|
+
status_counts = Counter(r.get("status") or "unknown" for r in rows)
|
|
468
|
+
depth_counts = Counter(str(r.get("depth") or 1) for r in rows)
|
|
469
|
+
tag_counts = Counter(
|
|
470
|
+
tag for r in rows for tag in (r.get("_classification") or {}).get("tags", [])
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
risk_rows = sorted(
|
|
474
|
+
[r for r in rows if r["_classification"]["risk_score"] >= args.min_risk_score],
|
|
475
|
+
key=lambda r: (
|
|
476
|
+
r["_classification"]["risk_score"],
|
|
477
|
+
r.get("comments_count") or 0,
|
|
478
|
+
r.get("views") or 0,
|
|
479
|
+
),
|
|
480
|
+
reverse=True,
|
|
481
|
+
)
|
|
482
|
+
insight_rows = sorted(
|
|
483
|
+
[
|
|
484
|
+
r
|
|
485
|
+
for r in rows
|
|
486
|
+
if r["_classification"]["insight_score"] >= args.min_insight_score
|
|
487
|
+
and r["_classification"]["risk_score"] < args.min_risk_score
|
|
488
|
+
],
|
|
489
|
+
key=lambda r: (
|
|
490
|
+
r["_classification"]["insight_score"],
|
|
491
|
+
r.get("comments_count") or 0,
|
|
492
|
+
r.get("upvotes") or 0,
|
|
493
|
+
r.get("views") or 0,
|
|
494
|
+
),
|
|
495
|
+
reverse=True,
|
|
496
|
+
)
|
|
497
|
+
repeated_authors = []
|
|
498
|
+
for handle, h in author_history.items():
|
|
499
|
+
block = h.get("blocklist") or {}
|
|
500
|
+
non_velocity_block = block and block.get("classification") != "velocity_auto"
|
|
501
|
+
if h.get("riskish_skips", 0) or h.get("last_30d", 0) >= 4 or non_velocity_block:
|
|
502
|
+
repeated_authors.append({"handle": handle, **h})
|
|
503
|
+
repeated_authors.sort(
|
|
504
|
+
key=lambda h: (
|
|
505
|
+
bool(
|
|
506
|
+
h.get("blocklist")
|
|
507
|
+
and (h.get("blocklist") or {}).get("classification") != "velocity_auto"
|
|
508
|
+
),
|
|
509
|
+
h.get("riskish_skips", 0),
|
|
510
|
+
h.get("last_30d", 0),
|
|
511
|
+
),
|
|
512
|
+
reverse=True,
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
return {
|
|
516
|
+
"meta": {
|
|
517
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
518
|
+
"window_hours": args.hours,
|
|
519
|
+
"platform": args.platform,
|
|
520
|
+
"rows_scanned": len(rows),
|
|
521
|
+
"risk_threshold": args.min_risk_score,
|
|
522
|
+
"insight_threshold": args.min_insight_score,
|
|
523
|
+
"note": (
|
|
524
|
+
"notification_capture rows may lack the exact parent tweet that "
|
|
525
|
+
"triggered the inbound reply; true_nested_followup rows include "
|
|
526
|
+
"stored parent reply context via parent_reply_id."
|
|
527
|
+
),
|
|
528
|
+
},
|
|
529
|
+
"counts": {
|
|
530
|
+
"platforms": dict(platform_counts),
|
|
531
|
+
"statuses": dict(status_counts),
|
|
532
|
+
"depths": dict(depth_counts),
|
|
533
|
+
"tags": dict(tag_counts.most_common()),
|
|
534
|
+
"risk_items": len(risk_rows),
|
|
535
|
+
"insight_items": len(insight_rows),
|
|
536
|
+
"true_nested_followups": sum(
|
|
537
|
+
1
|
|
538
|
+
for r in rows
|
|
539
|
+
if r.get("parent_reply_id") is not None or int(r.get("depth") or 1) > 1
|
|
540
|
+
),
|
|
541
|
+
},
|
|
542
|
+
"risk_items": [
|
|
543
|
+
compact_row(r, author_history) for r in risk_rows[: args.risk_limit]
|
|
544
|
+
],
|
|
545
|
+
"insight_items": [
|
|
546
|
+
compact_row(r, author_history) for r in insight_rows[: args.insight_limit]
|
|
547
|
+
],
|
|
548
|
+
"author_watchlist": repeated_authors[: args.author_limit],
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def build_prompt(envelope: dict[str, Any]) -> str:
|
|
553
|
+
compact = json.dumps(envelope, ensure_ascii=False, indent=2)
|
|
554
|
+
return f"""You are writing a daily operator email for Matt about replies TO our social replies.
|
|
555
|
+
|
|
556
|
+
Use ONLY the JSON context below. Do not invent thread context when the JSON says it is missing.
|
|
557
|
+
The `skip_reason` is valuable: it is the model's existing assessment after reading the reply.
|
|
558
|
+
Preserve row IDs and URLs for any concrete examples.
|
|
559
|
+
|
|
560
|
+
Write a concise plain-text email body with these sections:
|
|
561
|
+
|
|
562
|
+
1. Executive summary: 3-5 bullets with counts and the day's risk level.
|
|
563
|
+
2. Risk replies: the most important bot/spam/hostility/product-trust risks. Explain what triggered them.
|
|
564
|
+
3. Learnings: what worked, what patterns generated constructive replies, and what reply shapes should be avoided.
|
|
565
|
+
4. Suggested changes: concrete drafting/skip/feedback-loop suggestions.
|
|
566
|
+
5. Rows to inspect: 3-8 row IDs/URLs with one-line reasons.
|
|
567
|
+
|
|
568
|
+
Rules:
|
|
569
|
+
- Focus on "what was the thread / what was our reply / what did they reply / how did we classify it".
|
|
570
|
+
- If stored parent/thread context is missing, say so briefly; do not pretend we know it.
|
|
571
|
+
- Distinguish true nested follow-ups from notification-captured replies.
|
|
572
|
+
- Quote only short snippets, never long tweet bodies.
|
|
573
|
+
- Keep the email under about 900 words.
|
|
574
|
+
|
|
575
|
+
JSON context:
|
|
576
|
+
{compact}
|
|
577
|
+
"""
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def summarize_with_claude(envelope: dict[str, Any], timeout: int) -> str | None:
|
|
581
|
+
if not RUN_CLAUDE_PATH.exists():
|
|
582
|
+
return None
|
|
583
|
+
prompt = build_prompt(envelope)
|
|
584
|
+
try:
|
|
585
|
+
proc = subprocess.run(
|
|
586
|
+
[
|
|
587
|
+
str(RUN_CLAUDE_PATH),
|
|
588
|
+
SCRIPT_TAG,
|
|
589
|
+
"--output-format",
|
|
590
|
+
"json",
|
|
591
|
+
"-p",
|
|
592
|
+
prompt,
|
|
593
|
+
],
|
|
594
|
+
capture_output=True,
|
|
595
|
+
text=True,
|
|
596
|
+
timeout=timeout,
|
|
597
|
+
cwd=str(REPO_DIR),
|
|
598
|
+
)
|
|
599
|
+
except Exception as e:
|
|
600
|
+
print(f"[reply_risk_digest] Claude summarizer failed to start: {e}", file=sys.stderr)
|
|
601
|
+
return None
|
|
602
|
+
if proc.returncode != 0:
|
|
603
|
+
print(
|
|
604
|
+
f"[reply_risk_digest] Claude summarizer exited {proc.returncode}: "
|
|
605
|
+
f"{(proc.stderr or proc.stdout)[-1200:]}",
|
|
606
|
+
file=sys.stderr,
|
|
607
|
+
)
|
|
608
|
+
return None
|
|
609
|
+
try:
|
|
610
|
+
data = json.loads(proc.stdout)
|
|
611
|
+
result = (data.get("result") or "").strip()
|
|
612
|
+
return _strip_embedded_subject(result) or None
|
|
613
|
+
except Exception:
|
|
614
|
+
text = proc.stdout.strip()
|
|
615
|
+
if text:
|
|
616
|
+
return _strip_embedded_subject(text)
|
|
617
|
+
return None
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def _strip_embedded_subject(text: str) -> str:
|
|
621
|
+
lines = text.splitlines()
|
|
622
|
+
if lines and lines[0].strip().lower().startswith("subject:"):
|
|
623
|
+
return "\n".join(lines[1:]).lstrip()
|
|
624
|
+
return text
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
def fallback_report(envelope: dict[str, Any]) -> str:
|
|
628
|
+
meta = envelope["meta"]
|
|
629
|
+
counts = envelope["counts"]
|
|
630
|
+
lines = [
|
|
631
|
+
f"Reply Risk Digest ({meta['platform']}, last {meta['window_hours']}h)",
|
|
632
|
+
"",
|
|
633
|
+
"Executive summary",
|
|
634
|
+
f"- Scanned {meta['rows_scanned']} replies.",
|
|
635
|
+
f"- Statuses: {counts['statuses']}",
|
|
636
|
+
f"- Risk items above threshold: {counts['risk_items']}. Insight items: {counts['insight_items']}.",
|
|
637
|
+
f"- True nested follow-ups with stored parent context: {counts['true_nested_followups']}.",
|
|
638
|
+
"",
|
|
639
|
+
"Risk replies",
|
|
640
|
+
]
|
|
641
|
+
if not envelope["risk_items"]:
|
|
642
|
+
lines.append("- No risk rows crossed the threshold.")
|
|
643
|
+
for item in envelope["risk_items"][:8]:
|
|
644
|
+
c = item["classification"]
|
|
645
|
+
lines.append(
|
|
646
|
+
f"- #{item['id']} @{item['author']} score={c['risk_score']} "
|
|
647
|
+
f"tags={','.join(c['tags'])}: "
|
|
648
|
+
f"{_clip(item['inbound_reply']['text'], 180)}"
|
|
649
|
+
)
|
|
650
|
+
if item.get("skip_reason"):
|
|
651
|
+
lines.append(f" skip_reason: {_clip(item['skip_reason'], 220)}")
|
|
652
|
+
lines.append(f" url: {item['inbound_reply']['url']}")
|
|
653
|
+
lines.extend(["", "Learnings / constructive replies"])
|
|
654
|
+
if not envelope["insight_items"]:
|
|
655
|
+
lines.append("- No insight rows crossed the threshold.")
|
|
656
|
+
for item in envelope["insight_items"][:8]:
|
|
657
|
+
c = item["classification"]
|
|
658
|
+
followup = item["our_followup_to_inbound"]
|
|
659
|
+
lines.append(
|
|
660
|
+
f"- #{item['id']} @{item['author']} score={c['insight_score']} "
|
|
661
|
+
f"style={followup.get('style')}: {_clip(followup.get('text'), 200)}"
|
|
662
|
+
)
|
|
663
|
+
lines.append(f" inbound: {_clip(item['inbound_reply']['text'], 160)}")
|
|
664
|
+
lines.extend(["", "Author watchlist"])
|
|
665
|
+
for author in envelope["author_watchlist"][:8]:
|
|
666
|
+
block = author.get("blocklist") or {}
|
|
667
|
+
block_label = block.get("classification") or ("yes" if block else "no")
|
|
668
|
+
lines.append(
|
|
669
|
+
f"- @{author['handle']}: last_30d={author.get('last_30d')} "
|
|
670
|
+
f"riskish_skips={author.get('riskish_skips')} "
|
|
671
|
+
f"blocklist={block_label}"
|
|
672
|
+
)
|
|
673
|
+
lines.extend(["", "Generated by scripts/reply_risk_digest.py"])
|
|
674
|
+
return "\n".join(lines)
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
def build_subject(envelope: dict[str, Any]) -> str:
|
|
678
|
+
counts = envelope["counts"]
|
|
679
|
+
platform = envelope["meta"]["platform"]
|
|
680
|
+
risk = counts["risk_items"]
|
|
681
|
+
rows = envelope["meta"]["rows_scanned"]
|
|
682
|
+
day = datetime.now(timezone.utc).date().isoformat()
|
|
683
|
+
if risk >= 10:
|
|
684
|
+
level = "HIGH"
|
|
685
|
+
elif risk >= 3:
|
|
686
|
+
level = "WARN"
|
|
687
|
+
else:
|
|
688
|
+
level = "OK"
|
|
689
|
+
return f"[reply-risk] {level} {platform} {day} ({risk} risk / {rows} scanned)"
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def parse_args():
|
|
693
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
694
|
+
parser.add_argument("--platform", default="x", help="'x' by default, or 'all'")
|
|
695
|
+
parser.add_argument("--hours", type=int, default=24)
|
|
696
|
+
parser.add_argument("--limit", type=int, default=500)
|
|
697
|
+
parser.add_argument("--risk-limit", type=int, default=25)
|
|
698
|
+
parser.add_argument("--insight-limit", type=int, default=18)
|
|
699
|
+
parser.add_argument("--author-limit", type=int, default=12)
|
|
700
|
+
parser.add_argument("--min-risk-score", type=int, default=5)
|
|
701
|
+
parser.add_argument("--min-insight-score", type=int, default=3)
|
|
702
|
+
parser.add_argument("--to", default=None, help="Recipient; defaults to NOTIFICATION_EMAIL or i@m13v.com")
|
|
703
|
+
parser.add_argument("--dry-run", action="store_true", help="Print email instead of sending")
|
|
704
|
+
parser.add_argument("--no-claude", action="store_true", help="Use deterministic fallback report")
|
|
705
|
+
parser.add_argument("--claude-timeout", type=int, default=420)
|
|
706
|
+
parser.add_argument("--send-empty", action="store_true", help="Send even when no rows were scanned")
|
|
707
|
+
parser.add_argument("--json-out", default=None, help="Write the compact JSON envelope to this path")
|
|
708
|
+
return parser.parse_args()
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
def main():
|
|
712
|
+
args = parse_args()
|
|
713
|
+
load_env()
|
|
714
|
+
recipient = args.to or os.environ.get("NOTIFICATION_EMAIL") or OPERATOR_EMAIL
|
|
715
|
+
platform = args.platform.lower().strip()
|
|
716
|
+
if platform == "twitter":
|
|
717
|
+
platform = "x"
|
|
718
|
+
args.platform = platform
|
|
719
|
+
|
|
720
|
+
db = get_conn()
|
|
721
|
+
try:
|
|
722
|
+
rows = fetch_reply_rows(db, platform, args.hours, args.limit)
|
|
723
|
+
author_history = fetch_author_history(db, rows, platform)
|
|
724
|
+
finally:
|
|
725
|
+
db.close()
|
|
726
|
+
|
|
727
|
+
envelope = build_envelope(rows, author_history, args)
|
|
728
|
+
if args.json_out:
|
|
729
|
+
path = Path(args.json_out).expanduser()
|
|
730
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
731
|
+
path.write_text(json.dumps(envelope, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
732
|
+
|
|
733
|
+
if not rows and not args.send_empty:
|
|
734
|
+
print("[reply_risk_digest] no rows in window; no email sent")
|
|
735
|
+
return
|
|
736
|
+
|
|
737
|
+
body = None if args.no_claude else summarize_with_claude(envelope, args.claude_timeout)
|
|
738
|
+
if not body:
|
|
739
|
+
body = fallback_report(envelope)
|
|
740
|
+
subject = build_subject(envelope)
|
|
741
|
+
footer = (
|
|
742
|
+
"\n\n---\n"
|
|
743
|
+
f"Generated by {html.escape(str(REPO_DIR / 'scripts' / 'reply_risk_digest.py'))}\n"
|
|
744
|
+
f"Window: last {args.hours}h, platform={args.platform}, scanned={len(rows)}\n"
|
|
745
|
+
)
|
|
746
|
+
if "Generated by scripts/reply_risk_digest.py" not in body:
|
|
747
|
+
body = body.rstrip() + footer
|
|
748
|
+
|
|
749
|
+
if args.dry_run:
|
|
750
|
+
print(f"To: {recipient}")
|
|
751
|
+
print(f"Subject: {subject}")
|
|
752
|
+
print("")
|
|
753
|
+
print(body)
|
|
754
|
+
return
|
|
755
|
+
|
|
756
|
+
result = _send_email(recipient, subject, body)
|
|
757
|
+
print(f"[reply_risk_digest] sent to {recipient} id={result.get('id')} subject={subject!r}")
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
if __name__ == "__main__":
|
|
761
|
+
main()
|