@m13v/s4l 1.6.197-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1314 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +497 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,771 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Programmatically pick ONE search_topic per project per cycle.
|
|
3
|
+
|
|
4
|
+
Mirrors the engagement_styles.pick_style_for_post() pattern so that
|
|
5
|
+
search_topic gets the same treatment: force-picked in Python, stamped on
|
|
6
|
+
the candidate row, then propagated to posts. This replaces the legacy
|
|
7
|
+
"show Claude the entire search_topics[] array and let it improvise"
|
|
8
|
+
flow, which made end-to-end attribution noisy because the same tweet
|
|
9
|
+
could be tagged with different topics on re-discovery.
|
|
10
|
+
|
|
11
|
+
Universe: project_search_topics table via /api/v1/project-search-topics
|
|
12
|
+
(install-scoped, status='active' only). config.json is seed-only and is
|
|
13
|
+
NEVER consulted at pick time. Run scripts/seed_search_topics.py once
|
|
14
|
+
per install to mirror config.json into the DB; from then on,
|
|
15
|
+
paused/excluded topics and invented winners live in the DB and the
|
|
16
|
+
picker honors them. If the DB is unreachable or the project has zero
|
|
17
|
+
active topics, the picker raises PickerError and the cycle aborts
|
|
18
|
+
loudly — there is no config.json fallback.
|
|
19
|
+
|
|
20
|
+
Performance signal: top_search_topics.query(project, "twitter", ...)
|
|
21
|
+
which aggregates from twitter_candidates -> posts -> post_link_clicks.
|
|
22
|
+
|
|
23
|
+
Single-mode (post-2026-05-28 architectural split):
|
|
24
|
+
|
|
25
|
+
USE: weighted random sample over the FULL universe. Every topic active
|
|
26
|
+
in project_search_topics is eligible, including ones with no post
|
|
27
|
+
history. Weights are LOG-SMOOTHED so the top performer lands around
|
|
28
|
+
20-30% (vs raw proportional which would give 75-95% to one dominant
|
|
29
|
+
topic) and unscored topics get an explicit floor weight around 0.5-1%
|
|
30
|
+
per topic (low but never zero). This way every active topic always has
|
|
31
|
+
a real shot.
|
|
32
|
+
|
|
33
|
+
base(score>0) = log_e(score + 1) + 1.0
|
|
34
|
+
base(score==0) = COLD_TOPIC_WEIGHT (= 0.15)
|
|
35
|
+
|
|
36
|
+
2026-05-28: base weight is adjusted by ONE of a few mutually-exclusive
|
|
37
|
+
factors reading from twitter_candidates (posts/clicks) and
|
|
38
|
+
twitter_search_attempts (the supply join in
|
|
39
|
+
top_search_topics._query_twitter). Math lives in _compute_weight;
|
|
40
|
+
concretely:
|
|
41
|
+
|
|
42
|
+
- attempts_n == 0 → return base unchanged
|
|
43
|
+
- 0 supply across N tries → base * SUPPLY_DEAD_WEIGHT (0.3x)
|
|
44
|
+
- posted_n >= MIN_POSTS_FOR_FIT→ base * clicks-per-post fit (CTR)
|
|
45
|
+
- else, has clicks → return base (thin-supply winner)
|
|
46
|
+
- else, no clicks → base * conversion (posts/attempt)
|
|
47
|
+
|
|
48
|
+
The CTR factor (3rd branch) replaced a flat posts/attempt conversion on
|
|
49
|
+
2026-05-28: supply and clicks turned out anti-correlated on NightOwl, so
|
|
50
|
+
rewarding posting VOLUME kept high-post/low-click noise magnets in
|
|
51
|
+
rotation. Clicks-per-post demotes them without touching thin-supply click
|
|
52
|
+
winners.
|
|
53
|
+
|
|
54
|
+
Floor at base*DEAD_FLOOR_FRACTION so no topic ever locks out entirely;
|
|
55
|
+
we always keep a small retest probability in case X's firehose or our
|
|
56
|
+
criteria shift.
|
|
57
|
+
|
|
58
|
+
EXPLORE_INVENT was REMOVED 2026-05-28. Invention is now the
|
|
59
|
+
responsibility of the standalone `invent_topics.py` job (hourly, picks
|
|
60
|
+
one project per run, runs a propose-refine loop with topic-ledger
|
|
61
|
+
lookups, writes committed inventions directly to project_search_topics).
|
|
62
|
+
This picker is pure use-mode selection over the universe — no
|
|
63
|
+
in-cycle invention, no fallback branches.
|
|
64
|
+
|
|
65
|
+
When `exclude_topics` filters the universe to empty (small-project
|
|
66
|
+
mid-cycle case), the picker raises UniverseExhaustedError. Callers
|
|
67
|
+
must catch it and stop gracefully — there is no invent fallback here.
|
|
68
|
+
|
|
69
|
+
Output schema (single JSON object to stdout, one row per --project):
|
|
70
|
+
|
|
71
|
+
{
|
|
72
|
+
"mode": "use",
|
|
73
|
+
"search_topic": str,
|
|
74
|
+
"project": str,
|
|
75
|
+
"platform": "twitter",
|
|
76
|
+
"score": float, # composite_score
|
|
77
|
+
"reference_topics": [ # full pool, sorted by score DESC
|
|
78
|
+
{"search_topic", "composite_score", "posts",
|
|
79
|
+
"clicks_total", "posted_n", "skipped_n", "weight_pct"},
|
|
80
|
+
...
|
|
81
|
+
],
|
|
82
|
+
"universe_size": int,
|
|
83
|
+
"scored_n": int, # topics with composite > 0
|
|
84
|
+
"cold_n": int, # topics with composite == 0
|
|
85
|
+
"window_days": int,
|
|
86
|
+
"picked_at": ISO-8601 UTC
|
|
87
|
+
}
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
import argparse
|
|
91
|
+
import json
|
|
92
|
+
import math
|
|
93
|
+
import os
|
|
94
|
+
import random
|
|
95
|
+
import sys
|
|
96
|
+
from datetime import datetime, timezone
|
|
97
|
+
|
|
98
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class PickerError(RuntimeError):
|
|
102
|
+
"""Raised when the picker cannot get a valid universe from the DB.
|
|
103
|
+
|
|
104
|
+
Callers (run-twitter-cycle.sh's heredoc, CLI main) must treat this
|
|
105
|
+
as a hard stop: do NOT silently degrade to free-form picking or
|
|
106
|
+
config.json reads. The DB is the only source of truth for what's
|
|
107
|
+
eligible (paused/excluded/invented topics all live there).
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class UniverseExhaustedError(RuntimeError):
|
|
112
|
+
"""Raised when `exclude_topics` has filtered the universe to empty.
|
|
113
|
+
|
|
114
|
+
Happens mid-cycle when the retry loop has already tried every active
|
|
115
|
+
topic for this project. Callers should catch this distinctly from
|
|
116
|
+
PickerError and stop the retry loop gracefully (log
|
|
117
|
+
`universe_exhausted:1` as the cycle's failure reason and proceed to
|
|
118
|
+
Phase 2 with whatever candidates accumulated). There is no invent
|
|
119
|
+
fallback here by design (2026-05-28): invention is owned by the
|
|
120
|
+
standalone `invent_topics.py` job, not this in-cycle picker.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
WINDOW_DAYS = 30
|
|
125
|
+
|
|
126
|
+
# 2026-05-28: dedicated explore branch for freshly-invented topics.
|
|
127
|
+
# The standalone invent_topics.py job writes new (source='invented') topics
|
|
128
|
+
# into project_search_topics, but those topics start with composite_score=0
|
|
129
|
+
# and attempts_n=0, which would put them at COLD_TOPIC_WEIGHT (0.15) — about
|
|
130
|
+
# 0.2% selection probability in a 48-topic universe. At 5 picks/day per
|
|
131
|
+
# project that means ~31 days until first sample, effectively a dead end.
|
|
132
|
+
#
|
|
133
|
+
# This branch reserves 10% of every pick for "give a brand-new invention
|
|
134
|
+
# its first shot": with probability INVENTED_UNTRIED_EXPLORE_RATE we look
|
|
135
|
+
# for any pool row with source='invented' AND attempts_n=0 and pick
|
|
136
|
+
# uniformly from that set. If none exist, we fall through to the normal
|
|
137
|
+
# weighted-random branch — no double dip, no separate "explore" mode.
|
|
138
|
+
#
|
|
139
|
+
# Why 10%: per Thompson-sampling intuition, an explore rate roughly matching
|
|
140
|
+
# the expected fraction of "topics worth testing" gives invention output a
|
|
141
|
+
# guaranteed sampling cadence without starving the proven winners. With ~5
|
|
142
|
+
# picks/day per project, 10% means ~1 invented-untried sample every other
|
|
143
|
+
# day per project, so a new invention typically gets at least one shot within
|
|
144
|
+
# 24-48h of being committed by invent_topics.py.
|
|
145
|
+
INVENTED_UNTRIED_EXPLORE_RATE = 0.10
|
|
146
|
+
|
|
147
|
+
# Log-smoothed weighting constants. See module docstring for the curve.
|
|
148
|
+
# COLD_TOPIC_WEIGHT is intentionally non-zero so any topic in config.json
|
|
149
|
+
# can be selected even with no history (the floor maps to ~0.5-1% in a
|
|
150
|
+
# typical 15-25 topic universe). Tune by editing this number directly.
|
|
151
|
+
COLD_TOPIC_WEIGHT = 0.15
|
|
152
|
+
|
|
153
|
+
# How many top entries from the full pool to surface to the prompt as
|
|
154
|
+
# context. We expose more than the old trusted-top-5 because the model
|
|
155
|
+
# can now genuinely weigh a long tail of underperformers when inventing.
|
|
156
|
+
REFERENCE_TOP_N = 10
|
|
157
|
+
|
|
158
|
+
# 2026-05-27 conversion + supply gates (closes the "S4L empty-batch" gap:
|
|
159
|
+
# a topic with N attempts and 0 posts used to weight identically to a
|
|
160
|
+
# brand-new topic — see top_search_topics._query_twitter for the upstream
|
|
161
|
+
# join that makes this signal visible).
|
|
162
|
+
#
|
|
163
|
+
# DEAD_FLOOR_FRACTION: even the worst-performing topic keeps this fraction
|
|
164
|
+
# of its base weight, so we always retest occasionally in case supply
|
|
165
|
+
# recovers. Tune up to retest more often, down to lock duds out harder.
|
|
166
|
+
DEAD_FLOOR_FRACTION = 0.02
|
|
167
|
+
# SUPPLY_DEAD_WEIGHT: when X returns 0 tweets across many attempts, the
|
|
168
|
+
# topic isn't necessarily a bad fit — supply is dead. Apply a mild fixed
|
|
169
|
+
# penalty rather than the heavy conversion math (which would drive the
|
|
170
|
+
# weight to ~0 even though the fault is partly external).
|
|
171
|
+
SUPPLY_DEAD_WEIGHT = 0.3
|
|
172
|
+
# Need at least this many attempts before calling a topic supply-dead.
|
|
173
|
+
# A single dry attempt isn't evidence; 3 in a row across a 30d window is.
|
|
174
|
+
MIN_ATTEMPTS_FOR_SUPPLY_VERDICT = 3
|
|
175
|
+
|
|
176
|
+
# 2026-05-28 click-efficiency gate (replaces the posts-per-attempt
|
|
177
|
+
# conversion for topics that have posted enough to judge). Empirically
|
|
178
|
+
# (NightOwl 30d) supply and clicks are ANTI-correlated: the best-supply
|
|
179
|
+
# topics ("Laravel observability" 96 tweets/20 posts, "Laravel monitoring"
|
|
180
|
+
# 83 tweets/30 posts) earn the FEWEST clicks (1 and 4), while the click
|
|
181
|
+
# winner ("Laravel Horizon" 78 clicks) has thin/spiky supply (9 tweets,
|
|
182
|
+
# 1 post). The old conversion = posts/attempts REWARDED posting volume, so
|
|
183
|
+
# those noise-magnet topics kept full weight and kept burning comment budget
|
|
184
|
+
# for ~0 clicks. Click-efficiency (clicks per posted candidate) demotes them
|
|
185
|
+
# while leaving thin-supply click winners untouched.
|
|
186
|
+
#
|
|
187
|
+
# MIN_POSTS_FOR_FIT: posts needed before clicks-per-post is a real sample.
|
|
188
|
+
# Below this we can't tell a low-CTR noise magnet from an unlucky small
|
|
189
|
+
# sample, so we don't apply the CTR penalty (see _compute_weight branch 2/3).
|
|
190
|
+
MIN_POSTS_FOR_FIT = 5
|
|
191
|
+
# TARGET_CLICKS_PER_POST: the clicks-per-post at which a topic earns full
|
|
192
|
+
# weight (fit factor caps at 1.0). Topics below it are scaled down
|
|
193
|
+
# proportionally; topics at/above it are all treated as "efficient enough"
|
|
194
|
+
# and ranked among themselves by base (total-click) weight. 0.5 = "we want
|
|
195
|
+
# at least 1 click per 2 posted comments." Cleanly separates NightOwl's
|
|
196
|
+
# performers (>=0.67 CTR) from its noise magnets (<=0.18 CTR).
|
|
197
|
+
TARGET_CLICKS_PER_POST = 0.5
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _compute_weight(r):
|
|
201
|
+
"""Final weighted-sampling weight for one pool row.
|
|
202
|
+
|
|
203
|
+
base, then ONE of four mutually-exclusive adjustments:
|
|
204
|
+
|
|
205
|
+
1. base = log(composite_score+1)+1 (positive performance), or
|
|
206
|
+
COLD_TOPIC_WEIGHT (=0.15) when never scored. Log-smoothing
|
|
207
|
+
compresses the right-skewed composite distribution so the
|
|
208
|
+
top performer lands around 20-30%, not 90%+.
|
|
209
|
+
|
|
210
|
+
Untried topics (attempts_n == 0) return base unchanged, so cold topics
|
|
211
|
+
keep full exploration weight. Then, for topics with attempts:
|
|
212
|
+
|
|
213
|
+
2. supply-dead backstop: attempts_n >= MIN_ATTEMPTS_FOR_SUPPLY_VERDICT
|
|
214
|
+
and tweets_found_total == 0 → X just isn't returning tweets. Mild
|
|
215
|
+
0.3x (supply failure is partly external), kept for occasional retest.
|
|
216
|
+
|
|
217
|
+
3. click-efficiency (posted_n >= MIN_POSTS_FOR_FIT): weight by
|
|
218
|
+
clicks-per-post, capped at 1.0 via TARGET_CLICKS_PER_POST. This is
|
|
219
|
+
the 2026-05-28 change. The OLD conversion (posts/attempts) rewarded
|
|
220
|
+
posting VOLUME, so noise-magnet topics that post a lot and earn ~0
|
|
221
|
+
clicks ("Laravel observability": 20 posts, 1 click) kept full weight.
|
|
222
|
+
Clicks-per-post demotes them. NOTE: clicks are partly counted in base
|
|
223
|
+
already (composite = clicks*100 + ...), so this is value (base) x
|
|
224
|
+
efficiency (CTR) by design, not double-counting: total clicks set the
|
|
225
|
+
ceiling, CTR decides how much of it the topic keeps.
|
|
226
|
+
|
|
227
|
+
4. conversion (posted_n < MIN_POSTS_FOR_FIT): too few posts to judge CTR.
|
|
228
|
+
Topics with real clicks on a small sample (thin-supply winners like
|
|
229
|
+
"Laravel Horizon": 1 post, 78 clicks) keep full base — do NOT penalize
|
|
230
|
+
them for the low post-rate that thin supply forces. Topics with zero
|
|
231
|
+
clicks fall back to posts-per-attempt, Laplace-smoothed and capped at
|
|
232
|
+
1.0, so a topic searched many times that rarely converts to a post
|
|
233
|
+
(surface-and-skip noise, or supply too thin to post) is penalized.
|
|
234
|
+
|
|
235
|
+
A floor of base*DEAD_FLOOR_FRACTION ensures no topic drops to zero
|
|
236
|
+
weight: we always want some chance to retest a stale dud in case
|
|
237
|
+
supply/fit/CTR changes (X firehose shifts, project description evolves).
|
|
238
|
+
"""
|
|
239
|
+
score = float(r.get("composite_score") or 0)
|
|
240
|
+
posted_n = int(r.get("posted_n") or 0)
|
|
241
|
+
attempts_n = int(r.get("attempts_n") or 0)
|
|
242
|
+
tweets_found_total = int(r.get("tweets_found_total") or 0)
|
|
243
|
+
clicks_total = int(r.get("clicks_total") or 0)
|
|
244
|
+
|
|
245
|
+
if score > 0:
|
|
246
|
+
base = math.log(score + 1.0) + 1.0
|
|
247
|
+
else:
|
|
248
|
+
base = COLD_TOPIC_WEIGHT
|
|
249
|
+
|
|
250
|
+
if attempts_n == 0:
|
|
251
|
+
return base
|
|
252
|
+
|
|
253
|
+
if (
|
|
254
|
+
tweets_found_total == 0
|
|
255
|
+
and attempts_n >= MIN_ATTEMPTS_FOR_SUPPLY_VERDICT
|
|
256
|
+
):
|
|
257
|
+
return max(base * DEAD_FLOOR_FRACTION, base * SUPPLY_DEAD_WEIGHT)
|
|
258
|
+
|
|
259
|
+
if posted_n >= MIN_POSTS_FOR_FIT:
|
|
260
|
+
click_eff = clicks_total / posted_n
|
|
261
|
+
fit = min(1.0, click_eff / TARGET_CLICKS_PER_POST)
|
|
262
|
+
return max(base * DEAD_FLOOR_FRACTION, base * fit)
|
|
263
|
+
|
|
264
|
+
if clicks_total > 0:
|
|
265
|
+
return base
|
|
266
|
+
|
|
267
|
+
conversion = min(1.0, (posted_n + 1.0) / (attempts_n + 1.0))
|
|
268
|
+
return max(base * DEAD_FLOOR_FRACTION, base * conversion)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _load_universe(project_name):
|
|
272
|
+
"""Return the project's active search topics (unique, ordered).
|
|
273
|
+
|
|
274
|
+
GET /api/v1/project-search-topics?project=X&status=active. Each
|
|
275
|
+
install sees its own rows plus the legacy null-install bucket (same
|
|
276
|
+
null-claim pattern as posts/replies). Only 'active' rows are used
|
|
277
|
+
so paused/excluded topics drop out without any local config.
|
|
278
|
+
|
|
279
|
+
Raises PickerError on API failure or zero rows. There is NO
|
|
280
|
+
config.json fallback by design (per 2026-05-27): a misconfigured
|
|
281
|
+
install must fail loud rather than silently posting against a stale
|
|
282
|
+
seed list. Cold-start procedure: run scripts/seed_search_topics.py
|
|
283
|
+
once per install to mirror config.json into the DB, then the
|
|
284
|
+
picker has a universe to work with.
|
|
285
|
+
"""
|
|
286
|
+
try:
|
|
287
|
+
from http_api import api_get
|
|
288
|
+
resp = api_get(
|
|
289
|
+
"/api/v1/project-search-topics",
|
|
290
|
+
query={"project": project_name, "status": "active"},
|
|
291
|
+
)
|
|
292
|
+
except Exception as e:
|
|
293
|
+
raise PickerError(
|
|
294
|
+
f"project-search-topics API unreachable for project="
|
|
295
|
+
f"{project_name!r}: {e}"
|
|
296
|
+
) from e
|
|
297
|
+
data = (resp or {}).get("data") or {}
|
|
298
|
+
rows = data.get("topics") or []
|
|
299
|
+
seen = set()
|
|
300
|
+
out = []
|
|
301
|
+
source_map = {} # topic -> source (first occurrence wins)
|
|
302
|
+
source_counts = {"seed": 0, "invented": 0, "manual": 0}
|
|
303
|
+
for r in rows:
|
|
304
|
+
t = (r.get("topic") or "").strip()
|
|
305
|
+
if not t or t in seen:
|
|
306
|
+
continue
|
|
307
|
+
seen.add(t)
|
|
308
|
+
out.append(t)
|
|
309
|
+
src = (r.get("source") or "").strip()
|
|
310
|
+
source_map[t] = src or "seed"
|
|
311
|
+
if src in source_counts:
|
|
312
|
+
source_counts[src] += 1
|
|
313
|
+
if not out:
|
|
314
|
+
raise PickerError(
|
|
315
|
+
f"no active search topics for project={project_name!r} in "
|
|
316
|
+
f"project_search_topics. Seed via scripts/seed_search_topics.py "
|
|
317
|
+
f"or activate at least one row."
|
|
318
|
+
)
|
|
319
|
+
# Grep-able marker so cycle logs show the new universe source explicitly.
|
|
320
|
+
# active= is the count the picker actually uses; seed/invented/manual
|
|
321
|
+
# split surfaces auto-promoted topics vs the original seed pool so
|
|
322
|
+
# invention activity is visible without a DB query.
|
|
323
|
+
sys.stderr.write(
|
|
324
|
+
f"[pick_search_topic] universe_source=db project={project_name!r} "
|
|
325
|
+
f"active={len(out)} seed={source_counts['seed']} "
|
|
326
|
+
f"invented={source_counts['invented']} "
|
|
327
|
+
f"manual={source_counts['manual']}\n"
|
|
328
|
+
)
|
|
329
|
+
return out, source_map
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _load_signal(project_name, platform, window_days):
|
|
333
|
+
"""Pull per-topic performance for this project from top_search_topics.
|
|
334
|
+
|
|
335
|
+
Returns a list of dicts keyed by search_topic. Empty list on any
|
|
336
|
+
failure (no DB, no rows yet, etc.) so the picker still works in
|
|
337
|
+
pure cold-start mode.
|
|
338
|
+
"""
|
|
339
|
+
try:
|
|
340
|
+
from top_search_topics import query as _top_query
|
|
341
|
+
rows = _top_query(
|
|
342
|
+
project=project_name,
|
|
343
|
+
platform=platform,
|
|
344
|
+
window_days=window_days,
|
|
345
|
+
limit=200,
|
|
346
|
+
)
|
|
347
|
+
return rows or []
|
|
348
|
+
except Exception:
|
|
349
|
+
return []
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _build_pool(universe, signal_rows, source_map=None):
|
|
353
|
+
"""Pool = full DB universe (project_search_topics, status='active') with
|
|
354
|
+
scores attached.
|
|
355
|
+
|
|
356
|
+
Every active topic is eligible, scored or not. Unscored topics get
|
|
357
|
+
composite_score=0 and rely on COLD_TOPIC_WEIGHT to remain in play.
|
|
358
|
+
|
|
359
|
+
The pre-picker era wrote entire query strings into search_topic
|
|
360
|
+
(e.g. `("foo" OR "bar") min_faves:50 since:...`); those would pollute
|
|
361
|
+
the pool, so universe membership (not top_search_topics) is the
|
|
362
|
+
source of truth. Invented topics written by
|
|
363
|
+
scripts/invent_topics.py carry source='invented' in
|
|
364
|
+
project_search_topics and are eligible the same way seeds are. The
|
|
365
|
+
optional `source_map` is a topic -> source dict so per-row source
|
|
366
|
+
can be surfaced to the trace without a second API call.
|
|
367
|
+
|
|
368
|
+
Returns the pool sorted by composite_score DESC.
|
|
369
|
+
"""
|
|
370
|
+
signal_map = {
|
|
371
|
+
r.get("search_topic"): r
|
|
372
|
+
for r in signal_rows
|
|
373
|
+
if r.get("search_topic")
|
|
374
|
+
}
|
|
375
|
+
source_map = source_map or {}
|
|
376
|
+
|
|
377
|
+
pool = []
|
|
378
|
+
for topic in universe:
|
|
379
|
+
r = signal_map.get(topic, {})
|
|
380
|
+
score = float(r.get("composite_score") or 0)
|
|
381
|
+
pool.append({
|
|
382
|
+
"search_topic": topic,
|
|
383
|
+
"source": source_map.get(topic, "seed"),
|
|
384
|
+
"composite_score": score,
|
|
385
|
+
"posts": int(r.get("posts") or 0),
|
|
386
|
+
"clicks_total": int(r.get("clicks_total") or 0),
|
|
387
|
+
"posted_n": int(r.get("posted_n") or 0),
|
|
388
|
+
"skipped_n": int(r.get("skipped_n") or 0),
|
|
389
|
+
"attempts_n": int(r.get("attempts_n") or 0),
|
|
390
|
+
"tweets_found_total": int(r.get("tweets_found_total") or 0),
|
|
391
|
+
"zero_supply_attempts": int(r.get("zero_supply_attempts") or 0),
|
|
392
|
+
})
|
|
393
|
+
|
|
394
|
+
pool.sort(key=lambda r: (-r["composite_score"], -r["posts"]))
|
|
395
|
+
return pool
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def _ref_meta(r, weight_pct):
|
|
399
|
+
"""Strip the pool row down to the fields the prompt block surfaces.
|
|
400
|
+
|
|
401
|
+
attempts_n / tweets_found_total are surfaced so Claude can see the
|
|
402
|
+
fit-vs-supply story in the explore_invent branch ("topic X had 10
|
|
403
|
+
attempts and 50 tweets_found but zero posts → fit failure, propose a
|
|
404
|
+
different angle on the same audience").
|
|
405
|
+
"""
|
|
406
|
+
return {
|
|
407
|
+
"search_topic": r["search_topic"],
|
|
408
|
+
"composite_score": round(r["composite_score"], 2),
|
|
409
|
+
"posts": r["posts"],
|
|
410
|
+
"clicks_total": r["clicks_total"],
|
|
411
|
+
"posted_n": r["posted_n"],
|
|
412
|
+
"skipped_n": r["skipped_n"],
|
|
413
|
+
"attempts_n": r.get("attempts_n", 0),
|
|
414
|
+
"tweets_found_total": r.get("tweets_found_total", 0),
|
|
415
|
+
"zero_supply_attempts": r.get("zero_supply_attempts", 0),
|
|
416
|
+
"weight_pct": round(weight_pct, 2),
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _verdict_for_row(r):
|
|
421
|
+
"""Same FIT_FAIL / SUPPLY_DEAD classification used in the prompt
|
|
422
|
+
block, returned as a flat string for trace-log consumers (greppable
|
|
423
|
+
after the fact)."""
|
|
424
|
+
attempts_n = int(r.get("attempts_n") or 0)
|
|
425
|
+
tweets_found_total = int(r.get("tweets_found_total") or 0)
|
|
426
|
+
posted_n = int(r.get("posted_n") or 0)
|
|
427
|
+
if attempts_n >= MIN_ATTEMPTS_FOR_SUPPLY_VERDICT and tweets_found_total == 0:
|
|
428
|
+
return "SUPPLY_DEAD"
|
|
429
|
+
if attempts_n >= 3 and posted_n == 0 and tweets_found_total > 0:
|
|
430
|
+
return "FIT_FAIL"
|
|
431
|
+
return None
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _emit_trace(assignment, pool, weight_pcts, chosen_idx):
|
|
435
|
+
"""Write a single JSON line to stderr capturing the entire pick
|
|
436
|
+
decision: project, mode, picked topic + weight%, and the full pool
|
|
437
|
+
with weights/stats/verdicts. Grep-friendly tag `[pick_search_topic]`
|
|
438
|
+
so cycle logs (skill/logs/twitter-cycle-*.log, which capture stderr
|
|
439
|
+
of the bash pipeline) carry the full audit trail without the prompt
|
|
440
|
+
needing to.
|
|
441
|
+
|
|
442
|
+
Failures here are swallowed so a logging hiccup never breaks the
|
|
443
|
+
actual pick.
|
|
444
|
+
"""
|
|
445
|
+
try:
|
|
446
|
+
pool_entries = [
|
|
447
|
+
{
|
|
448
|
+
"topic": r["search_topic"],
|
|
449
|
+
"source": r.get("source", "seed"),
|
|
450
|
+
"weight_pct": round(weight_pcts[i], 2),
|
|
451
|
+
"score": round(r["composite_score"], 2),
|
|
452
|
+
"posts": r["posts"],
|
|
453
|
+
"clicks": r["clicks_total"],
|
|
454
|
+
"posted_n": r["posted_n"],
|
|
455
|
+
"skipped_n": r["skipped_n"],
|
|
456
|
+
"attempts": r.get("attempts_n", 0),
|
|
457
|
+
"supply": r.get("tweets_found_total", 0),
|
|
458
|
+
"verdict": _verdict_for_row(r),
|
|
459
|
+
"chosen": (chosen_idx is not None and i == chosen_idx),
|
|
460
|
+
}
|
|
461
|
+
for i, r in enumerate(pool)
|
|
462
|
+
]
|
|
463
|
+
# Compact time-series snapshot of every invented topic in the
|
|
464
|
+
# active pool — answers "is Laravel Horizon's score growing?"
|
|
465
|
+
# straight from the cycle log without a DB query. Picked flag
|
|
466
|
+
# is included so post-hoc you can also answer "was an invented
|
|
467
|
+
# topic ever drawn?" by greping `"invented_in_pool".*"picked":\s*true`.
|
|
468
|
+
invented_in_pool = [
|
|
469
|
+
{
|
|
470
|
+
"topic": e["topic"],
|
|
471
|
+
"weight_pct": e["weight_pct"],
|
|
472
|
+
"score": e["score"],
|
|
473
|
+
"posts": e["posts"],
|
|
474
|
+
"clicks": e["clicks"],
|
|
475
|
+
"supply": e["supply"],
|
|
476
|
+
"picked": e["chosen"],
|
|
477
|
+
}
|
|
478
|
+
for e in pool_entries
|
|
479
|
+
if e["source"] == "invented"
|
|
480
|
+
]
|
|
481
|
+
trace = {
|
|
482
|
+
"project": assignment.get("project"),
|
|
483
|
+
"platform": assignment.get("platform"),
|
|
484
|
+
"mode": assignment.get("mode"),
|
|
485
|
+
"picked": assignment.get("search_topic"),
|
|
486
|
+
"picked_weight_pct": assignment.get("picked_weight_pct"),
|
|
487
|
+
"universe_size": assignment.get("universe_size"),
|
|
488
|
+
"scored_n": assignment.get("scored_n"),
|
|
489
|
+
"cold_n": assignment.get("cold_n"),
|
|
490
|
+
"window_days": assignment.get("window_days"),
|
|
491
|
+
"picked_at": assignment.get("picked_at"),
|
|
492
|
+
"invented_in_pool": invented_in_pool,
|
|
493
|
+
"pool": pool_entries,
|
|
494
|
+
}
|
|
495
|
+
sys.stderr.write("[pick_search_topic] " + json.dumps(trace) + "\n")
|
|
496
|
+
sys.stderr.flush()
|
|
497
|
+
except Exception:
|
|
498
|
+
pass
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def pick_topic_for_project(project_name, platform="twitter",
|
|
502
|
+
window_days=WINDOW_DAYS,
|
|
503
|
+
exclude_topics=None,
|
|
504
|
+
rng=None):
|
|
505
|
+
"""Pick ONE search_topic for this project on this platform.
|
|
506
|
+
|
|
507
|
+
Returns the assignment dict described in the module docstring.
|
|
508
|
+
Raises PickerError when the DB universe lookup fails or the project
|
|
509
|
+
has zero active topics. Raises UniverseExhaustedError when
|
|
510
|
+
`exclude_topics` filters the universe to empty.
|
|
511
|
+
|
|
512
|
+
`exclude_topics` is an optional iterable of topic strings to drop from
|
|
513
|
+
the universe before sampling (case-insensitive, whitespace-trimmed).
|
|
514
|
+
Used by `run-twitter-cycle.sh`'s Phase 1 retry loop to force a fresh
|
|
515
|
+
topic on each scan attempt so the model isn't pinned to one assigned
|
|
516
|
+
topic across all retries. When the exclusion list empties the universe,
|
|
517
|
+
we raise `UniverseExhaustedError` and the shell breaks the retry loop
|
|
518
|
+
cleanly — no invent fallback. Invention is the standalone
|
|
519
|
+
`invent_topics.py` job's responsibility (2026-05-28 architectural
|
|
520
|
+
split); this picker is pure use-mode selection over the universe.
|
|
521
|
+
"""
|
|
522
|
+
rnd = rng or random
|
|
523
|
+
universe, source_map = _load_universe(project_name)
|
|
524
|
+
picked_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
525
|
+
|
|
526
|
+
signal = _load_signal(project_name, platform, window_days)
|
|
527
|
+
|
|
528
|
+
excluded_set = {
|
|
529
|
+
(t or "").strip().lower()
|
|
530
|
+
for t in (exclude_topics or [])
|
|
531
|
+
if t and isinstance(t, str)
|
|
532
|
+
}
|
|
533
|
+
if excluded_set:
|
|
534
|
+
filtered_universe = [
|
|
535
|
+
t for t in universe
|
|
536
|
+
if (t or "").strip().lower() not in excluded_set
|
|
537
|
+
]
|
|
538
|
+
if not filtered_universe:
|
|
539
|
+
# All topics in the universe were already tried this cycle.
|
|
540
|
+
# Hard stop. The shell's retry loop catches this and exits
|
|
541
|
+
# Phase 1 with whatever candidates accumulated; the cycle's
|
|
542
|
+
# log_run summary surfaces `universe_exhausted:1` so the
|
|
543
|
+
# dashboard distinguishes this from empty_batch / phase1_no_tweets.
|
|
544
|
+
sys.stderr.write(
|
|
545
|
+
f"[pick_search_topic] universe_exhausted project={project_name!r} "
|
|
546
|
+
f"excluded={len(excluded_set)} active={len(universe)}\n"
|
|
547
|
+
)
|
|
548
|
+
raise UniverseExhaustedError(
|
|
549
|
+
f"project={project_name!r} exhausted: all "
|
|
550
|
+
f"{len(universe)} active topics already tried this cycle "
|
|
551
|
+
f"(excluded={len(excluded_set)})"
|
|
552
|
+
)
|
|
553
|
+
sys.stderr.write(
|
|
554
|
+
f"[pick_search_topic] excluded={len(excluded_set)} "
|
|
555
|
+
f"remaining_universe={len(filtered_universe)} project={project_name!r}\n"
|
|
556
|
+
)
|
|
557
|
+
universe = filtered_universe
|
|
558
|
+
|
|
559
|
+
pool = _build_pool(universe, signal, source_map=source_map)
|
|
560
|
+
|
|
561
|
+
weights = [_compute_weight(r) for r in pool]
|
|
562
|
+
weight_total = sum(weights) or 1.0
|
|
563
|
+
weight_pcts = [w / weight_total * 100.0 for w in weights]
|
|
564
|
+
|
|
565
|
+
scored_n = sum(1 for r in pool if r["composite_score"] > 0)
|
|
566
|
+
cold_n = sum(1 for r in pool if r["composite_score"] <= 0)
|
|
567
|
+
|
|
568
|
+
reference_topics = [
|
|
569
|
+
_ref_meta(pool[i], weight_pcts[i])
|
|
570
|
+
for i in range(min(REFERENCE_TOP_N, len(pool)))
|
|
571
|
+
]
|
|
572
|
+
|
|
573
|
+
# 2026-05-28 dedicated explore branch for freshly-invented topics.
|
|
574
|
+
# 10% of the time, if there's any source='invented' + attempts_n==0
|
|
575
|
+
# topic in the pool, pick one of those uniformly. Gives invent_topics.py
|
|
576
|
+
# outputs a guaranteed sampling cadence (~1 sample every other day per
|
|
577
|
+
# project at default 5 picks/day) instead of languishing at the
|
|
578
|
+
# COLD_TOPIC_WEIGHT floor for weeks. Falls through to the normal
|
|
579
|
+
# weighted-random branch when (a) no eligible invented-untried topics
|
|
580
|
+
# exist OR (b) the random roll lands outside the explore rate.
|
|
581
|
+
invented_untried_idxs = [
|
|
582
|
+
i for i, r in enumerate(pool)
|
|
583
|
+
if r.get("source") == "invented"
|
|
584
|
+
and int(r.get("attempts_n") or 0) == 0
|
|
585
|
+
]
|
|
586
|
+
if invented_untried_idxs and rnd.random() < INVENTED_UNTRIED_EXPLORE_RATE:
|
|
587
|
+
chosen_idx = rnd.choice(invented_untried_idxs)
|
|
588
|
+
sys.stderr.write(
|
|
589
|
+
f"[pick_search_topic] invented_untried_explore "
|
|
590
|
+
f"project={project_name!r} pool_size={len(invented_untried_idxs)} "
|
|
591
|
+
f"chosen={pool[chosen_idx]['search_topic']!r}\n"
|
|
592
|
+
)
|
|
593
|
+
chosen = pool[chosen_idx]
|
|
594
|
+
assignment = {
|
|
595
|
+
"project": project_name,
|
|
596
|
+
"platform": platform,
|
|
597
|
+
"reference_topics": reference_topics,
|
|
598
|
+
"universe_size": len(universe),
|
|
599
|
+
"scored_n": scored_n,
|
|
600
|
+
"cold_n": cold_n,
|
|
601
|
+
"pool_size": len(pool),
|
|
602
|
+
"window_days": window_days,
|
|
603
|
+
"picked_at": picked_at,
|
|
604
|
+
"mode": "use",
|
|
605
|
+
"search_topic": chosen["search_topic"],
|
|
606
|
+
"score": round(chosen["composite_score"], 2),
|
|
607
|
+
# weight_pct on this row is informational only — the explore
|
|
608
|
+
# branch ignored weights for this pick. Surfacing it anyway so
|
|
609
|
+
# callers see the gap between the branch decision and what the
|
|
610
|
+
# weight model would have produced.
|
|
611
|
+
"picked_weight_pct": round(weight_pcts[chosen_idx], 2),
|
|
612
|
+
"explore_branch": "invented_untried",
|
|
613
|
+
}
|
|
614
|
+
_emit_trace(assignment, pool, weight_pcts, chosen_idx=chosen_idx)
|
|
615
|
+
return assignment
|
|
616
|
+
|
|
617
|
+
# USE: weighted random over the (possibly filtered) pool. This is the
|
|
618
|
+
# default path — EXPLORE_INVENT was removed 2026-05-28 in favor of
|
|
619
|
+
# the standalone invent_topics.py job that writes new topics directly
|
|
620
|
+
# into project_search_topics.
|
|
621
|
+
needle = rnd.uniform(0.0, weight_total)
|
|
622
|
+
cum = 0.0
|
|
623
|
+
chosen_idx = 0
|
|
624
|
+
for i, w in enumerate(weights):
|
|
625
|
+
cum += w
|
|
626
|
+
if needle <= cum:
|
|
627
|
+
chosen_idx = i
|
|
628
|
+
break
|
|
629
|
+
chosen = pool[chosen_idx]
|
|
630
|
+
|
|
631
|
+
assignment = {
|
|
632
|
+
"project": project_name,
|
|
633
|
+
"platform": platform,
|
|
634
|
+
"reference_topics": reference_topics,
|
|
635
|
+
"universe_size": len(universe),
|
|
636
|
+
"scored_n": scored_n,
|
|
637
|
+
"cold_n": cold_n,
|
|
638
|
+
"pool_size": len(pool),
|
|
639
|
+
"window_days": window_days,
|
|
640
|
+
"picked_at": picked_at,
|
|
641
|
+
"mode": "use",
|
|
642
|
+
"search_topic": chosen["search_topic"],
|
|
643
|
+
"score": round(chosen["composite_score"], 2),
|
|
644
|
+
"picked_weight_pct": round(weight_pcts[chosen_idx], 2),
|
|
645
|
+
}
|
|
646
|
+
_emit_trace(assignment, pool, weight_pcts, chosen_idx=chosen_idx)
|
|
647
|
+
return assignment
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _format_pool_table(refs):
|
|
651
|
+
"""Render the pool stats as a compact markdown table for the prompt.
|
|
652
|
+
|
|
653
|
+
Single-purpose post-2026-05-28: the picker only has one mode (use),
|
|
654
|
+
so the table is always rendered as "context for an already-assigned
|
|
655
|
+
topic". The `mode` param was removed when explore_invent was deleted.
|
|
656
|
+
"""
|
|
657
|
+
if not refs:
|
|
658
|
+
return "(no stats yet for any topic in this project)"
|
|
659
|
+
lines = []
|
|
660
|
+
header = "### Pool stats (your topic is already assigned, this is context only)"
|
|
661
|
+
lines.append(header)
|
|
662
|
+
for r in refs:
|
|
663
|
+
attempts_n = r.get("attempts_n", 0)
|
|
664
|
+
tweets_found_total = r.get("tweets_found_total", 0)
|
|
665
|
+
verdict = ""
|
|
666
|
+
if attempts_n >= MIN_ATTEMPTS_FOR_SUPPLY_VERDICT and tweets_found_total == 0:
|
|
667
|
+
verdict = " [SUPPLY_DEAD]"
|
|
668
|
+
elif attempts_n >= 3 and r["posted_n"] == 0 and tweets_found_total > 0:
|
|
669
|
+
verdict = " [FIT_FAIL]"
|
|
670
|
+
lines.append(
|
|
671
|
+
f"- **{r['search_topic']}** "
|
|
672
|
+
f"(weight {r['weight_pct']:.2f}%, "
|
|
673
|
+
f"score {r['composite_score']:.1f}, "
|
|
674
|
+
f"posts {r['posts']}, clicks {r['clicks_total']}, "
|
|
675
|
+
f"posted_n {r['posted_n']}, skipped_n {r['skipped_n']}, "
|
|
676
|
+
f"attempts {attempts_n}, supply {tweets_found_total}){verdict}"
|
|
677
|
+
)
|
|
678
|
+
lines.append(
|
|
679
|
+
" ([SUPPLY_DEAD] = ≥3 attempts and 0 tweets returned; X isn't surfacing "
|
|
680
|
+
"anything for this topic. [FIT_FAIL] = ≥3 attempts and tweets found but "
|
|
681
|
+
"0 posted; the topic surfaces noise we keep rejecting.)"
|
|
682
|
+
)
|
|
683
|
+
return "\n".join(lines)
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def get_assigned_topic_prompt(assignment):
|
|
687
|
+
"""Compact prompt block built from a pick_topic_for_project() assignment.
|
|
688
|
+
|
|
689
|
+
Single-mode: the picker always returns a use-mode assignment now
|
|
690
|
+
(2026-05-28 explore_invent removal). Invention is owned by the
|
|
691
|
+
standalone `invent_topics.py` job.
|
|
692
|
+
"""
|
|
693
|
+
if not assignment:
|
|
694
|
+
return "(no search_topics defined for this project)"
|
|
695
|
+
|
|
696
|
+
topic = assignment.get("search_topic") or ""
|
|
697
|
+
|
|
698
|
+
# Programmatic pick is final; the model gets the topic and the
|
|
699
|
+
# instruction, nothing else. The full pool with weights/verdicts is
|
|
700
|
+
# emitted to the cycle log via the `[pick_search_topic]` trace line
|
|
701
|
+
# in pick_topic_for_project, so any post-hoc tracing reads from the
|
|
702
|
+
# log, not the prompt.
|
|
703
|
+
lines = [
|
|
704
|
+
f"## Your assigned search topic: **{topic}**",
|
|
705
|
+
"",
|
|
706
|
+
(
|
|
707
|
+
f"Draft ONE Twitter advanced-search query that surfaces fresh "
|
|
708
|
+
f"tweets about this exact topic. Do not substitute a different "
|
|
709
|
+
f"topic."
|
|
710
|
+
),
|
|
711
|
+
"",
|
|
712
|
+
(
|
|
713
|
+
"In the JSON you emit per tweet, set `search_topic` to "
|
|
714
|
+
f"exactly \"{topic}\" (string match). The scoring pipeline "
|
|
715
|
+
"will reject any row whose search_topic does not equal the "
|
|
716
|
+
"assigned value."
|
|
717
|
+
),
|
|
718
|
+
]
|
|
719
|
+
return "\n".join(lines)
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
def main():
|
|
723
|
+
ap = argparse.ArgumentParser(description=__doc__)
|
|
724
|
+
ap.add_argument("--project", required=True, help="Project name from config.json")
|
|
725
|
+
ap.add_argument("--platform", default="twitter", help="Platform (default: twitter)")
|
|
726
|
+
ap.add_argument("--window-days", type=int, default=WINDOW_DAYS)
|
|
727
|
+
ap.add_argument("--seed", type=int, default=None, help="Deterministic RNG seed for tests")
|
|
728
|
+
ap.add_argument("--out", default=None, help="Optional path to also write the JSON to (mirrors styles.sh pattern)")
|
|
729
|
+
ap.add_argument("--prompt", action="store_true", help="Print the prompt block to stdout instead of JSON")
|
|
730
|
+
ap.add_argument("--exclude-topics", default="", help="JSON array of topic strings to drop from the universe before sampling (used by Phase 1 retry loop)")
|
|
731
|
+
args = ap.parse_args()
|
|
732
|
+
|
|
733
|
+
rng = random.Random(args.seed) if args.seed is not None else None
|
|
734
|
+
excluded = []
|
|
735
|
+
if args.exclude_topics:
|
|
736
|
+
try:
|
|
737
|
+
excluded = json.loads(args.exclude_topics) or []
|
|
738
|
+
if not isinstance(excluded, list):
|
|
739
|
+
excluded = []
|
|
740
|
+
except json.JSONDecodeError:
|
|
741
|
+
excluded = []
|
|
742
|
+
try:
|
|
743
|
+
assignment = pick_topic_for_project(
|
|
744
|
+
args.project,
|
|
745
|
+
platform=args.platform,
|
|
746
|
+
window_days=args.window_days,
|
|
747
|
+
exclude_topics=excluded,
|
|
748
|
+
rng=rng,
|
|
749
|
+
)
|
|
750
|
+
except UniverseExhaustedError as e:
|
|
751
|
+
# CLI surface for the same exhaustion signal the shell catches.
|
|
752
|
+
# Distinct exit code 3 so callers can branch on it.
|
|
753
|
+
sys.stderr.write(f"pick_search_topic: {e}\n")
|
|
754
|
+
sys.exit(3)
|
|
755
|
+
except PickerError as e:
|
|
756
|
+
sys.stderr.write(f"pick_search_topic: {e}\n")
|
|
757
|
+
sys.exit(2)
|
|
758
|
+
|
|
759
|
+
if args.out:
|
|
760
|
+
with open(args.out, "w") as f:
|
|
761
|
+
json.dump(assignment, f)
|
|
762
|
+
|
|
763
|
+
if args.prompt:
|
|
764
|
+
print(get_assigned_topic_prompt(assignment))
|
|
765
|
+
else:
|
|
766
|
+
json.dump(assignment, sys.stdout)
|
|
767
|
+
sys.stdout.write("\n")
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
if __name__ == "__main__":
|
|
771
|
+
main()
|