@m13v/s4l 1.6.197-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1314 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +497 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Append a summary line to the persistent run monitor log.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python3 scripts/log_run.py --script post_reddit --posted 5 --skipped 2 --failed 0 --cost 3.45 --elapsed 600
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import argparse
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
from datetime import datetime, timedelta, timezone
|
|
12
|
+
|
|
13
|
+
LOG_PATH = os.path.expanduser("~/social-autoposter/skill/logs/run_monitor.log")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Map a script-name prefix to the blocklist platform value. The blocklist
|
|
17
|
+
# table uses canonical 'x' for Twitter; everything else matches the prefix.
|
|
18
|
+
_PLATFORM_MAP = [
|
|
19
|
+
("reddit", "reddit"),
|
|
20
|
+
("twitter", "x"),
|
|
21
|
+
("linkedin", "linkedin"),
|
|
22
|
+
("github", "github_issues"),
|
|
23
|
+
("instagram", "instagram"),
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _platform_from_script(script_name):
|
|
28
|
+
name = (script_name or "").lower()
|
|
29
|
+
for prefix, plat in _PLATFORM_MAP:
|
|
30
|
+
if prefix in name:
|
|
31
|
+
return plat
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _detect_escape_hatch(script_name, elapsed_seconds):
|
|
36
|
+
"""Query /api/v1/blocklist for LLM/manual escape-hatch firings during
|
|
37
|
+
this run window, filtered by the script's platform.
|
|
38
|
+
|
|
39
|
+
Returns (count, details_list) where details_list contains 'handle:class'
|
|
40
|
+
strings. Velocity-auto rows are EXCLUDED — they fire programmatically
|
|
41
|
+
via the route.ts SQL path on every reply and would flood the pill on
|
|
42
|
+
discovery cycles. We only surface model-judgment / operator-judgment
|
|
43
|
+
classifications (bot, engagement_loop, manual_block).
|
|
44
|
+
|
|
45
|
+
Fail-safe: any API error returns (0, []) so the run line still writes.
|
|
46
|
+
"""
|
|
47
|
+
if not elapsed_seconds:
|
|
48
|
+
return 0, []
|
|
49
|
+
platform = _platform_from_script(script_name)
|
|
50
|
+
if not platform:
|
|
51
|
+
return 0, []
|
|
52
|
+
try:
|
|
53
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
54
|
+
from http_api import api_get # noqa: E402
|
|
55
|
+
cutoff = datetime.now(timezone.utc) - timedelta(seconds=float(elapsed_seconds) + 30)
|
|
56
|
+
resp = api_get("/api/v1/blocklist", query={"platform": platform, "limit": 100})
|
|
57
|
+
rows = ((resp or {}).get("data") or {}).get("blocklist") or []
|
|
58
|
+
hits = []
|
|
59
|
+
for r in rows:
|
|
60
|
+
created_at = r.get("created_at")
|
|
61
|
+
if not created_at:
|
|
62
|
+
continue
|
|
63
|
+
try:
|
|
64
|
+
ts = datetime.fromisoformat(str(created_at).replace("Z", "+00:00"))
|
|
65
|
+
except Exception:
|
|
66
|
+
continue
|
|
67
|
+
# Rows are ordered DESC by created_at, so once we cross the
|
|
68
|
+
# cutoff every remaining row is older — short-circuit.
|
|
69
|
+
if ts < cutoff:
|
|
70
|
+
break
|
|
71
|
+
classification = r.get("classification") or ""
|
|
72
|
+
if classification not in ("bot", "engagement_loop", "manual_block"):
|
|
73
|
+
continue
|
|
74
|
+
handle = (r.get("handle") or "?").replace(",", "").replace(":", "")
|
|
75
|
+
hits.append(f"{handle}:{classification}")
|
|
76
|
+
return len(hits), hits
|
|
77
|
+
except Exception:
|
|
78
|
+
return 0, []
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def main():
|
|
82
|
+
parser = argparse.ArgumentParser(description="Log a run summary line")
|
|
83
|
+
parser.add_argument("--script", required=True, help="Script name (e.g. post_reddit, engage_reddit)")
|
|
84
|
+
parser.add_argument("--posted", type=int, default=0, help="Number of successful posts")
|
|
85
|
+
parser.add_argument("--skipped", type=int, default=0, help="Number of skipped items")
|
|
86
|
+
parser.add_argument("--failed", type=int, default=0, help="Number of failures")
|
|
87
|
+
parser.add_argument("--cost", type=float, default=0.0, help="Total cost in USD")
|
|
88
|
+
parser.add_argument("--elapsed", type=float, default=0.0, help="Elapsed time in seconds")
|
|
89
|
+
parser.add_argument("--model", default="", help="Dominant Claude model id used in the run (optional)")
|
|
90
|
+
parser.add_argument("--replies-refreshed", type=int, default=0,
|
|
91
|
+
help="Number of per-reply stat rows refreshed in this run "
|
|
92
|
+
"(stats_*, engage_github). Surfaces as a separate pill "
|
|
93
|
+
"in the dashboard Jobs table.")
|
|
94
|
+
parser.add_argument("--checked", type=int, default=0,
|
|
95
|
+
help="Stats jobs only: rows the run actually hit the "
|
|
96
|
+
"platform API for (Reddit JSON, fxtwitter, LinkedIn "
|
|
97
|
+
"feed scrape, etc.). Excludes skipped-as-fresh and "
|
|
98
|
+
"skipped-as-stable. Renders as 'checked' pill.")
|
|
99
|
+
parser.add_argument("--updated", type=int, default=0,
|
|
100
|
+
help="Stats jobs only: legacy/back-compat field. Pre-2026-05-18 "
|
|
101
|
+
"this was 'rows where any tracked metric moved' but it "
|
|
102
|
+
"silently summed in Step 1 view-scrape counts too. Use "
|
|
103
|
+
"`--changed` for the new clean semantics; keep `--updated` "
|
|
104
|
+
"wired only for old log lines.")
|
|
105
|
+
parser.add_argument("--removed", type=int, default=0,
|
|
106
|
+
help="Stats jobs only: posts newly flagged deleted/removed in this run. "
|
|
107
|
+
"Renders as 'removed'.")
|
|
108
|
+
# 2026-05-18 stats-pill relabel pass. The legacy `updated` field conflated
|
|
109
|
+
# two distinct things (Step 1 view scrape count + Step 2 detail-leg
|
|
110
|
+
# changed count) which made "updated" balloon meaninglessly. The new
|
|
111
|
+
# split lets the dashboard show four clean pills:
|
|
112
|
+
# scanned -> total rows considered this run (= polled + skipped)
|
|
113
|
+
# checked -> rows we actually hit the platform API for
|
|
114
|
+
# changed -> subset of checked where any tracked metric moved
|
|
115
|
+
# views-refreshed -> rows where the cheap view-scrape leg wrote a value
|
|
116
|
+
# All four are optional, additive to the existing stats_segment, and
|
|
117
|
+
# default to 0 so existing callers don't have to change.
|
|
118
|
+
parser.add_argument("--scanned", type=int, default=0,
|
|
119
|
+
help="Stats jobs only: TOTAL rows considered this run "
|
|
120
|
+
"(polled + skipped + bypassed-as-fresh). "
|
|
121
|
+
"Renders as a 'scanned' pill on stats rows.")
|
|
122
|
+
parser.add_argument("--changed", type=int, default=0,
|
|
123
|
+
help="Stats jobs only: subset of `checked` where any "
|
|
124
|
+
"tracked metric actually moved. Renders as a "
|
|
125
|
+
"'changed' pill. Distinct from `--updated` which "
|
|
126
|
+
"stays for back-compat with the old field name.")
|
|
127
|
+
parser.add_argument("--views-refreshed", dest="views_refreshed", type=int, default=0,
|
|
128
|
+
help="Stats jobs only: rows where the cheap view-scrape "
|
|
129
|
+
"leg (Step 1 profile scrape on Reddit; built-in on "
|
|
130
|
+
"Twitter) wrote a fresh view count. Distinct from "
|
|
131
|
+
"`--changed`, which is the per-row JSON-API leg.")
|
|
132
|
+
parser.add_argument("--unavailable", type=int, default=0,
|
|
133
|
+
help="Stats jobs (LinkedIn): posts where the platform "
|
|
134
|
+
"explicitly returned a 'post unavailable' string. "
|
|
135
|
+
"Subset of removed; rendered as a separate pill.")
|
|
136
|
+
parser.add_argument("--not-found", dest="not_found", type=int, default=0,
|
|
137
|
+
help="Stats jobs (LinkedIn): posts still active but our "
|
|
138
|
+
"comment couldn't be located. Renders as 'not_found'.")
|
|
139
|
+
parser.add_argument("--salvaged", type=int, default=0,
|
|
140
|
+
help="Twitter cycle: number of pending candidates from "
|
|
141
|
+
"prior cycles re-assigned to this batch in Phase 0. "
|
|
142
|
+
"Surfaces as a 'salvaged' pill in the dashboard "
|
|
143
|
+
"Result column so an operator can tell that work "
|
|
144
|
+
"from a previously-failed cycle is being retried "
|
|
145
|
+
"rather than lost. Optional; 0 = omit segment.")
|
|
146
|
+
# ---- Discovery-stage counters (Twitter cycle, mirrors LinkedIn) -----
|
|
147
|
+
# Twitter wires the same shape LinkedIn already exposes
|
|
148
|
+
# (queries / candidates_found / dropped_below_floor) so the dashboard can
|
|
149
|
+
# render a single 'discover' tooltip across platforms. Each is an
|
|
150
|
+
# independent integer flag; pass 0 / omit to skip the segment. Stay
|
|
151
|
+
# backward-compatible: emitted as `key=N` after `salvaged` and before
|
|
152
|
+
# the cost/elapsed pair so older log lines (no discovery info) still
|
|
153
|
+
# parse via the existing positional regex in bin/server.js.
|
|
154
|
+
parser.add_argument("--queries", type=int, default=0,
|
|
155
|
+
help="Discovery: number of search queries the cycle "
|
|
156
|
+
"actually ran (raw count, including duds). "
|
|
157
|
+
"Twitter Phase 1 / LinkedIn Phase A.")
|
|
158
|
+
parser.add_argument("--duds", type=int, default=0,
|
|
159
|
+
help="Discovery: subset of --queries that returned "
|
|
160
|
+
"zero candidates. Used by the dashboard to show "
|
|
161
|
+
"query-quality drift over time.")
|
|
162
|
+
parser.add_argument("--tweets-pulled", dest="tweets_pulled", type=int, default=0,
|
|
163
|
+
help="Discovery: raw tweets/posts the scraper pulled "
|
|
164
|
+
"before the floor filter. Twitter only — "
|
|
165
|
+
"LinkedIn doesn't have a directly comparable "
|
|
166
|
+
"raw-volume number.")
|
|
167
|
+
parser.add_argument("--candidates", type=int, default=0,
|
|
168
|
+
help="Discovery: candidates that survived the "
|
|
169
|
+
"post-floor filter (Twitter T0 snapshot rows / "
|
|
170
|
+
"LinkedIn candidates_found). Includes salvaged "
|
|
171
|
+
"rows from prior cycles.")
|
|
172
|
+
parser.add_argument("--above-floor", dest="above_floor", type=int, default=0,
|
|
173
|
+
help="Discovery: candidates that cleared the "
|
|
174
|
+
"review-cap floor — for Twitter this is "
|
|
175
|
+
"Δ≥10 momentum (the same signal that flips "
|
|
176
|
+
"POST_LIMIT 1→3); for LinkedIn this is the "
|
|
177
|
+
"post-virality-floor count. Smaller than "
|
|
178
|
+
"--candidates.")
|
|
179
|
+
parser.add_argument("--failure-reasons", dest="failure_reasons", default="",
|
|
180
|
+
help="Optional comma-separated `reason:count` pairs "
|
|
181
|
+
"describing why a run reported failed>0 "
|
|
182
|
+
"(e.g. 'monthly_limit:5,timeout:1'). Surfaced in "
|
|
183
|
+
"the dashboard Result column so operators can "
|
|
184
|
+
"tell a hard cap from a transient error without "
|
|
185
|
+
"opening the log file. Reason keys are free-form "
|
|
186
|
+
"snake_case; the dashboard sorts by count desc "
|
|
187
|
+
"and shows the top one with the rest in tooltip.")
|
|
188
|
+
parser.add_argument("--skip-reasons", dest="skip_reasons", default="",
|
|
189
|
+
help="Optional comma-separated `reason:count` pairs "
|
|
190
|
+
"describing why a run reported skipped>0 "
|
|
191
|
+
"(e.g. 'duplicate_thread_pre_post:3,empty_reply_text:1'). "
|
|
192
|
+
"Distinct from --failure-reasons: skips are "
|
|
193
|
+
"intentional (dedup race guards, empty drafts, "
|
|
194
|
+
"rate-limited threads) and the dashboard renders "
|
|
195
|
+
"them as a yellow 'skipped: <reason>' pill rather "
|
|
196
|
+
"than the red 'failed: <reason>' pill. Same "
|
|
197
|
+
"sanitization rules as --failure-reasons.")
|
|
198
|
+
# Inbox/feed scan counters (engage-reddit, engage-twitter, etc.). Lets a
|
|
199
|
+
# pipeline that scans an inbox before engaging surface scan-stage
|
|
200
|
+
# granularity (seen / new / excluded / unmatched) in the dashboard Result
|
|
201
|
+
# column, so an empty cycle reads as "scanned 100 / 0 new" instead of just
|
|
202
|
+
# "0 0 0 0". Comma-separated `key=N` pairs; whitespace and the pipe char are
|
|
203
|
+
# stripped. Empty = omit the segment entirely (preserves backward compat).
|
|
204
|
+
parser.add_argument("--scan", dest="scan", default="",
|
|
205
|
+
help="Optional comma-separated `key=N` pairs from an "
|
|
206
|
+
"inbox/feed scan stage (e.g. "
|
|
207
|
+
"'seen=100,new=0,excluded=1,unmatched=0'). "
|
|
208
|
+
"Surfaces as scan-stage pills in the dashboard "
|
|
209
|
+
"Result column for engage runs.")
|
|
210
|
+
# Invent-topics hourly job counters. Carries the project picked, how many
|
|
211
|
+
# topics were invented, how many queries were drafted in total, how many
|
|
212
|
+
# queries surfaced ANY supply, and the per-topic query counts. Free-form
|
|
213
|
+
# key=value comma-separated string — keys with integer values are parsed
|
|
214
|
+
# as ints in the dashboard, `project` is parsed as a string, `qpt` is
|
|
215
|
+
# parsed as a `+`-separated int list. Example:
|
|
216
|
+
# --invent='project=fazm,topics=3,queries=15,queries_w_supply=1,qpt=5+5+5'
|
|
217
|
+
# Tails after scan= so the bin/server.js positional regex extends
|
|
218
|
+
# backward-compatibly (the new group is optional).
|
|
219
|
+
parser.add_argument("--invent", dest="invent", default="",
|
|
220
|
+
help="Invent-topics job stats. Comma-separated "
|
|
221
|
+
"key=value pairs (e.g. 'project=fazm,topics=3,"
|
|
222
|
+
"queries=15,queries_w_supply=1,qpt=5+5+5'). "
|
|
223
|
+
"Surfaces as the result-column pills on the "
|
|
224
|
+
"Invent Topics rows in the Status > Job History "
|
|
225
|
+
"tab.")
|
|
226
|
+
args = parser.parse_args()
|
|
227
|
+
|
|
228
|
+
timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
|
229
|
+
model_suffix = f" model={args.model}" if args.model else ""
|
|
230
|
+
# Inserted between failed=N and cost= so the existing positional regex in
|
|
231
|
+
# bin/server.js still parses old lines (the segment is optional in the regex).
|
|
232
|
+
replies_segment = (
|
|
233
|
+
f" replies_refreshed={args.replies_refreshed}"
|
|
234
|
+
if args.replies_refreshed else ""
|
|
235
|
+
)
|
|
236
|
+
# Stats-job per-run counters. The base segment (checked/updated/removed)
|
|
237
|
+
# stays as a single optional capture group for the bin/server.js regex.
|
|
238
|
+
# The LinkedIn-specific extras (unavailable/not_found) tail the base
|
|
239
|
+
# segment as their own optional groups so older lines still parse.
|
|
240
|
+
# 2026-05-18 relabel: scanned/changed/views_refreshed tail the segment as
|
|
241
|
+
# their own optional groups. Old log lines without them still parse.
|
|
242
|
+
# Trigger the segment if ANY stats-job field is set so the new fields
|
|
243
|
+
# surface even when the legacy three are zero.
|
|
244
|
+
_any_stats = (args.checked or args.updated or args.removed
|
|
245
|
+
or args.unavailable or args.not_found
|
|
246
|
+
or args.scanned or args.changed or args.views_refreshed)
|
|
247
|
+
stats_segment = (
|
|
248
|
+
f" checked={args.checked} updated={args.updated} removed={args.removed}"
|
|
249
|
+
if _any_stats else ""
|
|
250
|
+
)
|
|
251
|
+
if args.unavailable:
|
|
252
|
+
stats_segment += f" unavailable={args.unavailable}"
|
|
253
|
+
if args.not_found:
|
|
254
|
+
stats_segment += f" not_found={args.not_found}"
|
|
255
|
+
if args.scanned:
|
|
256
|
+
stats_segment += f" scanned={args.scanned}"
|
|
257
|
+
if args.changed:
|
|
258
|
+
stats_segment += f" changed={args.changed}"
|
|
259
|
+
if args.views_refreshed:
|
|
260
|
+
stats_segment += f" views_refreshed={args.views_refreshed}"
|
|
261
|
+
# `salvaged=N` segment tails the stats segment as its own optional capture
|
|
262
|
+
# so old log lines (no salvage info) still parse cleanly. Twitter-cycle
|
|
263
|
+
# specific today, but any pipeline that retries pending work cross-cycle
|
|
264
|
+
# can emit it.
|
|
265
|
+
salvaged_segment = f" salvaged={args.salvaged}" if args.salvaged else ""
|
|
266
|
+
# `discover` segment carries Phase-1/discovery counters the dashboard
|
|
267
|
+
# surfaces as a tooltip on the Result column (queries / duds /
|
|
268
|
+
# tweets_pulled / candidates / above_floor). Each sub-key is only
|
|
269
|
+
# emitted when non-zero so old log lines without discovery info still
|
|
270
|
+
# parse via the existing positional regex. The whole segment is opt-in:
|
|
271
|
+
# if every counter is zero, no `discover=` token appears at all.
|
|
272
|
+
discover_parts = []
|
|
273
|
+
if args.queries:
|
|
274
|
+
discover_parts.append(f"queries={args.queries}")
|
|
275
|
+
if args.duds:
|
|
276
|
+
discover_parts.append(f"duds={args.duds}")
|
|
277
|
+
if args.tweets_pulled:
|
|
278
|
+
discover_parts.append(f"tweets_pulled={args.tweets_pulled}")
|
|
279
|
+
if args.candidates:
|
|
280
|
+
discover_parts.append(f"candidates={args.candidates}")
|
|
281
|
+
if args.above_floor:
|
|
282
|
+
discover_parts.append(f"above_floor={args.above_floor}")
|
|
283
|
+
discover_segment = (
|
|
284
|
+
" discover=" + ",".join(discover_parts) if discover_parts else ""
|
|
285
|
+
)
|
|
286
|
+
# `failure_reasons` segment is appended after elapsed (and after the
|
|
287
|
+
# optional model suffix) so the existing positional regex in bin/server.js
|
|
288
|
+
# still parses old lines. Sanitize: strip whitespace and forbid the pipe
|
|
289
|
+
# char so the value can't break out of the log line column. Empty string
|
|
290
|
+
# = omit the segment entirely (preserves backward compat).
|
|
291
|
+
fr_raw = (args.failure_reasons or "").strip()
|
|
292
|
+
fr_clean = fr_raw.replace("|", "").replace(" ", "")
|
|
293
|
+
failure_segment = f" failure_reasons={fr_clean}" if fr_clean else ""
|
|
294
|
+
# `skip_reasons=` segment is the skip-side companion to failure_reasons.
|
|
295
|
+
# Tails after failure_reasons so the existing positional regex stays
|
|
296
|
+
# back-compat: old log lines that ended at failure_reasons (or earlier)
|
|
297
|
+
# still parse, the new group is optional. Same sanitization rules.
|
|
298
|
+
sr_raw = (args.skip_reasons or "").strip()
|
|
299
|
+
sr_clean = sr_raw.replace("|", "").replace(" ", "")
|
|
300
|
+
skip_segment = f" skip_reasons={sr_clean}" if sr_clean else ""
|
|
301
|
+
# `scan=` segment carries inbox/feed scan-stage counters. Same sanitization
|
|
302
|
+
# rules as failure_reasons (strip whitespace + pipe). Appended after
|
|
303
|
+
# discover= so the existing positional regex in bin/server.js can extend
|
|
304
|
+
# without breaking back-compat on old lines.
|
|
305
|
+
scan_raw = (args.scan or "").strip()
|
|
306
|
+
scan_clean = scan_raw.replace("|", "").replace(" ", "")
|
|
307
|
+
scan_segment = f" scan={scan_clean}" if scan_clean else ""
|
|
308
|
+
# `invent=` segment carries the invent-topics hourly job's per-run counts.
|
|
309
|
+
# Same sanitization rules as scan=/failure_reasons (strip whitespace + pipe).
|
|
310
|
+
# Tails after scan= so the bin/server.js positional regex extends
|
|
311
|
+
# backward-compatibly — old lines with no invent= still parse.
|
|
312
|
+
invent_raw = (args.invent or "").strip()
|
|
313
|
+
invent_clean = invent_raw.replace("|", "").replace(" ", "")
|
|
314
|
+
invent_segment = f" invent={invent_clean}" if invent_clean else ""
|
|
315
|
+
# `escape_hatch=` segment surfaces author_blocklist writes that happened
|
|
316
|
+
# during this run window (LLM-judgment via reply_db.py CLI, or manual
|
|
317
|
+
# operator adds). Auto-detected via the API so callers don't have to
|
|
318
|
+
# plumb it; fail-safe to empty on any error. Velocity-auto rows are
|
|
319
|
+
# excluded inside _detect_escape_hatch — they fire on every reply and
|
|
320
|
+
# would flood the pill. Tails after skip_reasons so old log lines (no
|
|
321
|
+
# escape-hatch info) still parse via the bin/server.js positional regex.
|
|
322
|
+
eh_count, eh_details = _detect_escape_hatch(args.script, args.elapsed)
|
|
323
|
+
if eh_count:
|
|
324
|
+
eh_details_clean = ",".join(eh_details).replace("|", "").replace(" ", "")
|
|
325
|
+
escape_hatch_segment = (
|
|
326
|
+
f" escape_hatch={eh_count} escape_hatch_details={eh_details_clean}"
|
|
327
|
+
)
|
|
328
|
+
else:
|
|
329
|
+
escape_hatch_segment = ""
|
|
330
|
+
line = (
|
|
331
|
+
f"{timestamp} | {args.script} | "
|
|
332
|
+
f"posted={args.posted} skipped={args.skipped} failed={args.failed}"
|
|
333
|
+
f"{replies_segment}{stats_segment}{salvaged_segment}{discover_segment}{scan_segment}{invent_segment} "
|
|
334
|
+
f"cost=${args.cost:.2f} elapsed={args.elapsed:.0f}s{model_suffix}{failure_segment}{skip_segment}{escape_hatch_segment}"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)
|
|
338
|
+
with open(LOG_PATH, "a") as f:
|
|
339
|
+
f.write(line + "\n")
|
|
340
|
+
|
|
341
|
+
print(line)
|
|
342
|
+
|
|
343
|
+
# Silent-failure warning fires when a posting job claims `failed>0` but
|
|
344
|
+
# never posted anything. Stats/audit jobs legitimately run with posted=0
|
|
345
|
+
# while doing real work (scanning rows, checking the API); suppress the
|
|
346
|
+
# warning when any of `--checked / --scanned / --replies-refreshed` is
|
|
347
|
+
# non-zero so audit and stats rows don't trip a false positive.
|
|
348
|
+
_real_work = (args.checked or args.scanned or args.replies_refreshed
|
|
349
|
+
or args.changed or args.updated or args.views_refreshed)
|
|
350
|
+
# SCAN_ONLY runs (the Desktop-session autopilot's scan step) never post by
|
|
351
|
+
# design — an empty scan is "nothing fresh on-theme right now", not a silent
|
|
352
|
+
# failure. run-twitter-cycle.sh exports SCAN_ONLY=1 and log_run.py inherits
|
|
353
|
+
# it, so suppress the alarm for that path. The legacy full-cycle / plist
|
|
354
|
+
# autopilot runs WITHOUT SCAN_ONLY, so its behavior is byte-for-byte unchanged.
|
|
355
|
+
_scan_only = os.environ.get("SCAN_ONLY") == "1"
|
|
356
|
+
if args.posted == 0 and args.failed > 0 and not _real_work and not _scan_only:
|
|
357
|
+
warning = f"WARNING: {args.script} posted=0 failed={args.failed} -- possible silent failure"
|
|
358
|
+
with open(LOG_PATH, "a") as f:
|
|
359
|
+
f.write(f"{timestamp} | {warning}\n")
|
|
360
|
+
print(warning)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
if __name__ == "__main__":
|
|
364
|
+
main()
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Persist captured thread media on a twitter_candidates row.
|
|
3
|
+
|
|
4
|
+
Deterministic, model-free companion to the main posting cycle (2026-06-03
|
|
5
|
+
thread-media feature). The cycle pre-fetches the media of every candidate it is
|
|
6
|
+
about to draft against (twitter_browser.py thread-media-batch), then calls this
|
|
7
|
+
script once per candidate to persist the media into
|
|
8
|
+
twitter_candidates.thread_media so the reply-writer prompt can "see" the
|
|
9
|
+
image / video / GIF / link-card it is replying to, and the record survives
|
|
10
|
+
independent of the model.
|
|
11
|
+
|
|
12
|
+
Media shape: a JSON array of {url, alt, type} objects, type in
|
|
13
|
+
image|video|gif|card. An empty array [] is valid and meaningful ("captured,
|
|
14
|
+
none found", distinct from NULL = "never captured").
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
# Pass media JSON inline:
|
|
18
|
+
python3 scripts/log_thread_media.py --candidate-id 12345 \\
|
|
19
|
+
--media '[{"url":"https://pbs.twimg.com/...","alt":"Image","type":"image"}]'
|
|
20
|
+
|
|
21
|
+
# Or read the media JSON array from a file (handy for batch wiring):
|
|
22
|
+
python3 scripts/log_thread_media.py --candidate-id 12345 --media-file /tmp/m.json
|
|
23
|
+
|
|
24
|
+
Output (JSON):
|
|
25
|
+
{"logged": true, "candidate_id": 12345, "media_count": 1}
|
|
26
|
+
{"error": "CANDIDATE_NOT_FOUND", ...}
|
|
27
|
+
{"error": "BAD_MEDIA_JSON", ...}
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import argparse
|
|
31
|
+
import json
|
|
32
|
+
import os
|
|
33
|
+
import sys
|
|
34
|
+
|
|
35
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
36
|
+
from http_api import api_patch
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _load_media(args):
|
|
40
|
+
"""Return a parsed media list (or raise ValueError) from --media/--media-file."""
|
|
41
|
+
raw = None
|
|
42
|
+
if args.media_file:
|
|
43
|
+
with open(args.media_file) as f:
|
|
44
|
+
raw = f.read()
|
|
45
|
+
elif args.media is not None:
|
|
46
|
+
raw = args.media
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError("one of --media or --media-file is required")
|
|
49
|
+
raw = (raw or "").strip()
|
|
50
|
+
if raw == "":
|
|
51
|
+
# Treat an empty arg as "captured, none found" -> [].
|
|
52
|
+
return []
|
|
53
|
+
parsed = json.loads(raw)
|
|
54
|
+
if not isinstance(parsed, list):
|
|
55
|
+
raise ValueError("media must be a JSON array")
|
|
56
|
+
return parsed
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def main():
|
|
60
|
+
p = argparse.ArgumentParser()
|
|
61
|
+
p.add_argument("--candidate-id", type=int, required=True)
|
|
62
|
+
p.add_argument(
|
|
63
|
+
"--media", default=None,
|
|
64
|
+
help='JSON array of {url,alt,type}. Empty/"" means captured-none ([]).',
|
|
65
|
+
)
|
|
66
|
+
p.add_argument(
|
|
67
|
+
"--media-file", default=None,
|
|
68
|
+
help="Path to a file containing the media JSON array (alternative to --media).",
|
|
69
|
+
)
|
|
70
|
+
args = p.parse_args()
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
media = _load_media(args)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
print(json.dumps({"error": "BAD_MEDIA_JSON", "detail": str(e)}))
|
|
76
|
+
sys.exit(1)
|
|
77
|
+
|
|
78
|
+
payload = {
|
|
79
|
+
"id": args.candidate_id,
|
|
80
|
+
"action": "set_media",
|
|
81
|
+
"thread_media": media,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
resp = api_patch(
|
|
85
|
+
"/api/v1/twitter-candidates/by-id", payload,
|
|
86
|
+
ok_on_conflict=True, ok_on_404=True,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if (resp or {}).get("_not_found"):
|
|
90
|
+
print(json.dumps({"error": "CANDIDATE_NOT_FOUND", "candidate_id": args.candidate_id}))
|
|
91
|
+
sys.exit(1)
|
|
92
|
+
if not (resp or {}).get("ok"):
|
|
93
|
+
print(json.dumps({
|
|
94
|
+
"error": "SET_MEDIA_FAILED",
|
|
95
|
+
"candidate_id": args.candidate_id,
|
|
96
|
+
"detail": (resp or {}).get("error"),
|
|
97
|
+
}))
|
|
98
|
+
sys.exit(1)
|
|
99
|
+
|
|
100
|
+
print(json.dumps({
|
|
101
|
+
"logged": True,
|
|
102
|
+
"candidate_id": args.candidate_id,
|
|
103
|
+
"media_count": len(media),
|
|
104
|
+
}))
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == "__main__":
|
|
108
|
+
main()
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
log_twitter_search_attempts.py
|
|
4
|
+
|
|
5
|
+
Insert one row per (query, project, tweets_found) into twitter_search_attempts.
|
|
6
|
+
Reads a JSON array on stdin shaped like:
|
|
7
|
+
|
|
8
|
+
[
|
|
9
|
+
{"query": "...", "project": "fazm", "tweets_found": 0},
|
|
10
|
+
{"query": "...", "project": "mediar", "tweets_found": 3},
|
|
11
|
+
...
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
Used by run-twitter-cycle.sh after Phase 1 scan parses queries_used out of the
|
|
15
|
+
LLM envelope. Logging zero-result queries here is the whole point — the
|
|
16
|
+
twitter_candidates table only has rows for tweets that were actually scraped,
|
|
17
|
+
so duds were previously invisible. Pair with top_dud_twitter_queries.py.
|
|
18
|
+
|
|
19
|
+
python3 scripts/log_twitter_search_attempts.py --batch-id <id> < queries.json
|
|
20
|
+
python3 scripts/log_twitter_search_attempts.py --batch-id <id> \
|
|
21
|
+
--attempts-out /tmp/attempts.json < queries.json
|
|
22
|
+
|
|
23
|
+
When --attempts-out is provided, writes a JSON list of
|
|
24
|
+
[{"query": ..., "project": ..., "attempt_id": <int>}, ...]
|
|
25
|
+
to that path so the downstream scorer can stamp twitter_candidates.search_
|
|
26
|
+
attempt_id and the dashboard gets exact 1:1 query<->post attribution. Without
|
|
27
|
+
this, the dashboard falls back to a (batch_id, project_name) fanout that
|
|
28
|
+
credits every query in the batch — including dud ones — with every posted
|
|
29
|
+
candidate (the bug user spotted 2026-05-21).
|
|
30
|
+
|
|
31
|
+
Migrated 2026-05-18: writes now POST to /api/v1/twitter-search-attempts via
|
|
32
|
+
scripts/http_api.py instead of opening a psycopg2 connection.
|
|
33
|
+
"""
|
|
34
|
+
import argparse
|
|
35
|
+
import json
|
|
36
|
+
import os
|
|
37
|
+
import sys
|
|
38
|
+
|
|
39
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
40
|
+
from http_api import api_post # noqa: E402
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def main():
|
|
44
|
+
p = argparse.ArgumentParser()
|
|
45
|
+
p.add_argument("--batch-id", default=None)
|
|
46
|
+
p.add_argument(
|
|
47
|
+
"--attempts-out",
|
|
48
|
+
default=None,
|
|
49
|
+
help="Optional path; if set, write JSON list of "
|
|
50
|
+
"[{query, project, attempt_id}, ...] for the scorer to consume.",
|
|
51
|
+
)
|
|
52
|
+
# kind: which pipeline drafted these attempts. 'cycle' (default) preserves
|
|
53
|
+
# back-compat for every existing caller (run-twitter-cycle.sh + friends).
|
|
54
|
+
# invent_topics.py passes --kind invent so qualified_query_bank can union
|
|
55
|
+
# the proven invented set into the Phase 1 bank.
|
|
56
|
+
p.add_argument("--kind", default="cycle", choices=("cycle", "invent"),
|
|
57
|
+
help="Pipeline lane writing these attempts.")
|
|
58
|
+
args = p.parse_args()
|
|
59
|
+
|
|
60
|
+
raw = sys.stdin.read().strip()
|
|
61
|
+
if not raw:
|
|
62
|
+
print("log_twitter_search_attempts: empty stdin, nothing to log", file=sys.stderr)
|
|
63
|
+
if args.attempts_out:
|
|
64
|
+
# Write empty list so the caller can still pass --attempts to the
|
|
65
|
+
# scorer without a missing-file race.
|
|
66
|
+
with open(args.attempts_out, "w") as f:
|
|
67
|
+
json.dump([], f)
|
|
68
|
+
return 0
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
rows = json.loads(raw)
|
|
72
|
+
except json.JSONDecodeError as e:
|
|
73
|
+
print(f"log_twitter_search_attempts: bad JSON on stdin: {e}", file=sys.stderr)
|
|
74
|
+
return 1
|
|
75
|
+
|
|
76
|
+
if not isinstance(rows, list) or not rows:
|
|
77
|
+
print("log_twitter_search_attempts: not a list or empty list, nothing to log", file=sys.stderr)
|
|
78
|
+
if args.attempts_out:
|
|
79
|
+
with open(args.attempts_out, "w") as f:
|
|
80
|
+
json.dump([], f)
|
|
81
|
+
return 0
|
|
82
|
+
|
|
83
|
+
inserted = 0
|
|
84
|
+
attempts_map = []
|
|
85
|
+
for r in rows:
|
|
86
|
+
if not isinstance(r, dict):
|
|
87
|
+
continue
|
|
88
|
+
query = (r.get("query") or "").strip()
|
|
89
|
+
project = (r.get("project") or "").strip() or None
|
|
90
|
+
tweets_found = r.get("tweets_found")
|
|
91
|
+
try:
|
|
92
|
+
tweets_found = int(tweets_found if tweets_found is not None else 0)
|
|
93
|
+
except (TypeError, ValueError):
|
|
94
|
+
tweets_found = 0
|
|
95
|
+
# search_topic is the higher-level theme driving this query (set by
|
|
96
|
+
# pick_search_topic.py at the start of the cycle). Optional, because
|
|
97
|
+
# run-twitter-cycle.sh hasn't been threaded through the queries_used
|
|
98
|
+
# envelope yet; score_twitter_candidates.py also backfills it from
|
|
99
|
+
# twitter_candidates.search_topic on its end of the pipeline.
|
|
100
|
+
search_topic = (r.get("search_topic") or "").strip() or None
|
|
101
|
+
if not query:
|
|
102
|
+
continue
|
|
103
|
+
try:
|
|
104
|
+
payload = {
|
|
105
|
+
"query": query,
|
|
106
|
+
"project_name": project,
|
|
107
|
+
"tweets_found": tweets_found,
|
|
108
|
+
"batch_id": args.batch_id,
|
|
109
|
+
"kind": args.kind,
|
|
110
|
+
}
|
|
111
|
+
if search_topic:
|
|
112
|
+
payload["search_topic"] = search_topic
|
|
113
|
+
resp = api_post(
|
|
114
|
+
"/api/v1/twitter-search-attempts",
|
|
115
|
+
payload,
|
|
116
|
+
)
|
|
117
|
+
inserted += 1
|
|
118
|
+
attempt_id = ((resp.get("data") or {}).get("attempt") or {}).get("id")
|
|
119
|
+
if attempt_id is not None:
|
|
120
|
+
attempts_map.append({
|
|
121
|
+
"query": query,
|
|
122
|
+
"project": project,
|
|
123
|
+
"attempt_id": int(attempt_id),
|
|
124
|
+
})
|
|
125
|
+
except SystemExit as e:
|
|
126
|
+
# http_api raises SystemExit on terminal failure. Log and keep
|
|
127
|
+
# going so a single bad row doesn't drop the rest of the batch.
|
|
128
|
+
print(f"log_twitter_search_attempts: API error for {query!r}: {e}", file=sys.stderr)
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
if args.attempts_out:
|
|
132
|
+
with open(args.attempts_out, "w") as f:
|
|
133
|
+
json.dump(attempts_map, f)
|
|
134
|
+
print(
|
|
135
|
+
f"log_twitter_search_attempts: wrote {len(attempts_map)} attempt-id "
|
|
136
|
+
f"entries to {args.attempts_out}",
|
|
137
|
+
file=sys.stderr,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
duds = sum(1 for r in rows if isinstance(r, dict) and not int(r.get("tweets_found") or 0))
|
|
141
|
+
print(
|
|
142
|
+
f"log_twitter_search_attempts: inserted {inserted} rows ({duds} duds) "
|
|
143
|
+
f"for batch={args.batch_id}",
|
|
144
|
+
file=sys.stderr,
|
|
145
|
+
)
|
|
146
|
+
return 0
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
if __name__ == "__main__":
|
|
150
|
+
sys.exit(main())
|