@m13v/s4l 1.6.197-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1336 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +513 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,2668 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Reddit posting orchestrator.
|
|
3
|
+
|
|
4
|
+
Spawns a Claude session per post that uses reddit_tools.py (search, fetch) to find
|
|
5
|
+
threads and drafts replies. Python orchestrator handles CDP posting and DB logging.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python3 scripts/post_reddit.py
|
|
9
|
+
python3 scripts/post_reddit.py --dry-run # Print prompt without executing
|
|
10
|
+
python3 scripts/post_reddit.py --limit 3 # Post at most 3 comments
|
|
11
|
+
python3 scripts/post_reddit.py --timeout 3600 # Global timeout in seconds
|
|
12
|
+
python3 scripts/post_reddit.py --project Cyrano # Override project selection
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations # PEP 604 unions (str | None) for Python 3.9 launchd
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import errno
|
|
19
|
+
import json
|
|
20
|
+
import os
|
|
21
|
+
import random
|
|
22
|
+
import re
|
|
23
|
+
import shutil
|
|
24
|
+
import subprocess
|
|
25
|
+
import sys
|
|
26
|
+
import time
|
|
27
|
+
import uuid
|
|
28
|
+
|
|
29
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
30
|
+
from http_api import api_get, api_post, api_patch
|
|
31
|
+
from author_history_block import render as _render_author_history
|
|
32
|
+
from project_topics import topics_for_project
|
|
33
|
+
|
|
34
|
+
REPO_DIR = os.path.expanduser("~/social-autoposter")
|
|
35
|
+
CONFIG_PATH = os.path.join(REPO_DIR, "config.json")
|
|
36
|
+
REDDIT_BROWSER = os.path.join(REPO_DIR, "scripts", "reddit_browser.py")
|
|
37
|
+
REDDIT_BROWSER_LOCK = os.path.join(REPO_DIR, "scripts", "reddit_browser_lock.py")
|
|
38
|
+
REDDIT_TOOLS = os.path.join(REPO_DIR, "scripts", "reddit_tools.py")
|
|
39
|
+
|
|
40
|
+
# Interpreter every child subprocess must run under. A bare PYTHON resolved
|
|
41
|
+
# to the user's system python, which lacks the pipeline deps (Playwright and
|
|
42
|
+
# friends) that live only in the owned uv runtime — so on a fresh box every
|
|
43
|
+
# reddit_browser.py reply died (the same class as the Karol/Twitter bug,
|
|
44
|
+
# 2026-06-22). Honor the authoritative S4L_PYTHON pin (set by the launchd
|
|
45
|
+
# plist), else sys.executable (the owned interpreter the MCP launches us under).
|
|
46
|
+
# Never the literal PYTHON: that re-rolls the PATH dice. Re-exported so
|
|
47
|
+
# grandchildren inherit it.
|
|
48
|
+
PYTHON = os.environ.get("S4L_PYTHON") or sys.executable
|
|
49
|
+
os.environ["S4L_PYTHON"] = PYTHON
|
|
50
|
+
RATELIMIT_FILE = "/tmp/reddit_ratelimit.json"
|
|
51
|
+
PREFLIGHT_WAIT_BUDGET_SECONDS = 180
|
|
52
|
+
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
# reddit_candidates queue parameters (mirrors twitter_candidates intent).
|
|
55
|
+
#
|
|
56
|
+
# 2026-05-06: persistent queue replaces the ephemeral tmpfile-only flow so
|
|
57
|
+
# transient post failures (CDP timeout, comment_box_not_found, browser crash)
|
|
58
|
+
# get retried on the next cycle's Phase 0 salvage rather than losing the
|
|
59
|
+
# discover+ripen+draft cost as wholesale waste. Permanent failures
|
|
60
|
+
# (thread_locked at submit time, archived, deleted, account_blocked) get
|
|
61
|
+
# marked status='failed' so we never re-evaluate them.
|
|
62
|
+
#
|
|
63
|
+
# Window choices:
|
|
64
|
+
# FRESHNESS_HOURS=24 Reddit threads stay actionable longer than tweets
|
|
65
|
+
# (FRESHNESS_HOURS=6 on Twitter), so the hard-expire
|
|
66
|
+
# cutoff is wider. Past 24h the comment is unlikely
|
|
67
|
+
# to be seen.
|
|
68
|
+
# MAX_ATTEMPTS=3 Cap retry budget so a chronically-broken thread
|
|
69
|
+
# (subreddit gone private mid-cycle, AutoMod glitch)
|
|
70
|
+
# drops out instead of recurring forever.
|
|
71
|
+
# RETRY_BACKOFF_MIN=30 Don't re-attempt a freshly-failed candidate within
|
|
72
|
+
# the same 15-min cycle; let the failure reason
|
|
73
|
+
# stabilize before retrying.
|
|
74
|
+
# DRAFT_TTL_MIN=60 A salvaged candidate whose draft was written < 60
|
|
75
|
+
# min ago re-uses it as-is (skips LLM redraft). Keeps
|
|
76
|
+
# us from paying $0.20-$0.40 of Claude cost twice on
|
|
77
|
+
# the same comment when the post step retries.
|
|
78
|
+
FRESHNESS_HOURS = 24
|
|
79
|
+
MAX_ATTEMPTS = 3
|
|
80
|
+
RETRY_BACKOFF_MIN = 30
|
|
81
|
+
DRAFT_TTL_MIN = 60
|
|
82
|
+
|
|
83
|
+
# Discover-phase search budget. Was hardcoded as "AT MOST 2 searches" inline
|
|
84
|
+
# in build_discover_prompt; bumped to 10 (2026-05-08) so each cycle gets a
|
|
85
|
+
# wider top-of-funnel and the new draft-gate-omit feedback report can steer
|
|
86
|
+
# rephrasings without starving the next attempt of fresh angles. Override via
|
|
87
|
+
# S4L_REDDIT_MAX_SEARCHES env var without code change.
|
|
88
|
+
MAX_DISCOVER_SEARCHES = int(os.environ.get("S4L_REDDIT_MAX_SEARCHES", "3"))
|
|
89
|
+
|
|
90
|
+
# CDP-error → permanence map. Permanent failures mark status='failed' and are
|
|
91
|
+
# never re-evaluated. Transient failures stay status='pending' with
|
|
92
|
+
# attempt_count++; Phase 0 salvages them on the next cycle.
|
|
93
|
+
_PERMANENT_CDP_ERRORS = {
|
|
94
|
+
"thread_locked",
|
|
95
|
+
"thread_archived",
|
|
96
|
+
"thread_not_found",
|
|
97
|
+
"account_blocked_in_sub",
|
|
98
|
+
"no_permalink", # we couldn't verify the post landed; retrying would dupe
|
|
99
|
+
}
|
|
100
|
+
_TRANSIENT_CDP_ERRORS = {
|
|
101
|
+
"all_attempts_failed",
|
|
102
|
+
"comment_box_not_found",
|
|
103
|
+
"not_logged_in",
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
from engagement_styles import (
|
|
107
|
+
VALID_STYLES, get_styles_prompt, get_content_rules, validate_or_register,
|
|
108
|
+
pick_style_for_post, get_voice_relationship_rule,
|
|
109
|
+
)
|
|
110
|
+
# Audience-page routing: tells Claude which curated landing pages exist for the
|
|
111
|
+
# project so it can bake a deep URL (e.g. https://s4l.ai/ghostwriting) into the
|
|
112
|
+
# draft when the thread topic matches. See scripts/audience_pages.py + the
|
|
113
|
+
# landing_pages.audience_pages block in config.json.
|
|
114
|
+
from audience_pages import (
|
|
115
|
+
prompt_block as _audience_prompt_block,
|
|
116
|
+
classify_url_as_audience_page as _audience_classify_url,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
# reddit_candidates helpers.
|
|
122
|
+
#
|
|
123
|
+
# All DB-touching helpers swallow exceptions and log to stderr. The pipeline
|
|
124
|
+
# remains functional even if the queue table is unreachable; we just lose the
|
|
125
|
+
# salvage benefit for that cycle. This matches the cautious posture of
|
|
126
|
+
# log_post / campaign_bump / log_draft elsewhere in the file.
|
|
127
|
+
|
|
128
|
+
def _subreddit_from_url(thread_url):
|
|
129
|
+
"""Pull the bare subreddit name out of a Reddit thread URL, or None."""
|
|
130
|
+
if not thread_url:
|
|
131
|
+
return None
|
|
132
|
+
m = re.search(r"/r/([^/]+)/", thread_url)
|
|
133
|
+
return m.group(1).lower() if m else None
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _db_upsert_discovered_candidate(candidate, batch_id, project_name):
|
|
137
|
+
"""INSERT a freshly-discovered candidate row via /api/v1/reddit-candidates.
|
|
138
|
+
|
|
139
|
+
Server-side ON CONFLICT keeps the existing row's status, attempt_count,
|
|
140
|
+
post linkage, AND original T0 intact (see route source); batch_id is
|
|
141
|
+
updated to the current cycle so the dashboard's queue counts surface
|
|
142
|
+
this run.
|
|
143
|
+
"""
|
|
144
|
+
thread_url = (candidate.get("thread_url") or "").strip()
|
|
145
|
+
if not thread_url:
|
|
146
|
+
return
|
|
147
|
+
try:
|
|
148
|
+
score_raw = candidate.get("score")
|
|
149
|
+
comments_raw = candidate.get("num_comments")
|
|
150
|
+
body = {
|
|
151
|
+
"thread_url": thread_url,
|
|
152
|
+
"thread_author": candidate.get("thread_author"),
|
|
153
|
+
"thread_title": candidate.get("thread_title"),
|
|
154
|
+
"thread_selftext": candidate.get("selftext") or candidate.get("thread_selftext"),
|
|
155
|
+
"subreddit": _subreddit_from_url(thread_url),
|
|
156
|
+
"matched_project": project_name,
|
|
157
|
+
"search_topic": candidate.get("search_topic"),
|
|
158
|
+
"batch_id": batch_id,
|
|
159
|
+
"draft_engagement_style": candidate.get("engagement_style"),
|
|
160
|
+
"score_t0": int(score_raw) if score_raw is not None else None,
|
|
161
|
+
"comments_t0": int(comments_raw) if comments_raw is not None else None,
|
|
162
|
+
}
|
|
163
|
+
api_post("/api/v1/reddit-candidates", body)
|
|
164
|
+
except Exception as e:
|
|
165
|
+
print(f"[post_reddit] WARNING: upsert candidate failed for {thread_url}: {e}",
|
|
166
|
+
file=sys.stderr)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _db_save_draft(thread_url, text, engagement_style):
|
|
170
|
+
"""Persist a freshly-written draft so a later salvage reuses it.
|
|
171
|
+
|
|
172
|
+
Routes through /api/v1/reddit-candidates/by-thread-url action=save_draft.
|
|
173
|
+
Returns 404 silently when there is no pending row for the URL (e.g. when
|
|
174
|
+
the discover-side INSERT race hadn't completed yet); a save_draft on a
|
|
175
|
+
row that already moved past 'pending' would be a no-op anyway.
|
|
176
|
+
"""
|
|
177
|
+
if not thread_url or not text:
|
|
178
|
+
return
|
|
179
|
+
try:
|
|
180
|
+
api_patch(
|
|
181
|
+
"/api/v1/reddit-candidates/by-thread-url",
|
|
182
|
+
{
|
|
183
|
+
"thread_url": thread_url,
|
|
184
|
+
"action": "save_draft",
|
|
185
|
+
"draft_text": text,
|
|
186
|
+
"draft_engagement_style": engagement_style,
|
|
187
|
+
},
|
|
188
|
+
ok_on_404=True,
|
|
189
|
+
)
|
|
190
|
+
except Exception as e:
|
|
191
|
+
print(f"[post_reddit] WARNING: save_draft failed for {thread_url}: {e}",
|
|
192
|
+
file=sys.stderr)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _db_load_fresh_draft(thread_url):
|
|
196
|
+
"""Return (text, style) for a still-fresh draft, or (None, None).
|
|
197
|
+
|
|
198
|
+
Calls /api/v1/reddit-candidates?thread_url=...&has_fresh_draft=true&fresh_draft_minutes=N
|
|
199
|
+
so the server enforces the TTL window at the SQL level.
|
|
200
|
+
"""
|
|
201
|
+
if not thread_url:
|
|
202
|
+
return None, None
|
|
203
|
+
try:
|
|
204
|
+
resp = api_get(
|
|
205
|
+
"/api/v1/reddit-candidates",
|
|
206
|
+
query={
|
|
207
|
+
"thread_url": thread_url,
|
|
208
|
+
"has_fresh_draft": "true",
|
|
209
|
+
"fresh_draft_minutes": DRAFT_TTL_MIN,
|
|
210
|
+
"limit": 1,
|
|
211
|
+
},
|
|
212
|
+
)
|
|
213
|
+
rows = ((resp or {}).get("data") or {}).get("candidates") or []
|
|
214
|
+
if rows:
|
|
215
|
+
r = rows[0]
|
|
216
|
+
return r.get("draft_text"), r.get("draft_engagement_style")
|
|
217
|
+
except Exception as e:
|
|
218
|
+
print(f"[post_reddit] WARNING: load_fresh_draft failed for {thread_url}: {e}",
|
|
219
|
+
file=sys.stderr)
|
|
220
|
+
return None, None
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _db_mark_candidate_posted(thread_url, post_id):
|
|
224
|
+
"""Mark a candidate as successfully posted with linkage to posts.id.
|
|
225
|
+
|
|
226
|
+
The server-side action=mark_posted runs the same two recovery layers as
|
|
227
|
+
the previous Python implementation: if post_id is NULL, it first tries
|
|
228
|
+
`SELECT id FROM posts WHERE thread_url=...` to recover, then falls back
|
|
229
|
+
to status='failed' with last_failure_reason='log_post_returned_null'.
|
|
230
|
+
See scripts/post_reddit.py CLAUDE.md commentary for the rationale.
|
|
231
|
+
"""
|
|
232
|
+
if not thread_url:
|
|
233
|
+
return
|
|
234
|
+
try:
|
|
235
|
+
body = {"thread_url": thread_url, "action": "mark_posted"}
|
|
236
|
+
if post_id is not None:
|
|
237
|
+
body["post_id"] = int(post_id)
|
|
238
|
+
resp = api_patch(
|
|
239
|
+
"/api/v1/reddit-candidates/by-thread-url",
|
|
240
|
+
body,
|
|
241
|
+
ok_on_404=True,
|
|
242
|
+
)
|
|
243
|
+
data = (resp or {}).get("data") or {}
|
|
244
|
+
if data.get("recovery") == "marked_failed_no_post_id":
|
|
245
|
+
print(
|
|
246
|
+
f"[post_reddit] WARNING: log_post returned None and posts.thread_url "
|
|
247
|
+
f"lookup failed for {thread_url}. Marked status='failed' to prevent "
|
|
248
|
+
f"Phase 0 re-post (would dupe). Comment is live on Reddit; backfill "
|
|
249
|
+
f"required for click attribution.",
|
|
250
|
+
file=sys.stderr,
|
|
251
|
+
)
|
|
252
|
+
elif data.get("recovery") == "ok" and post_id is None:
|
|
253
|
+
# Server-side recovery succeeded — log for parity with the prior
|
|
254
|
+
# Python WARNING so dashboard ingestion is unchanged.
|
|
255
|
+
recovered = ((data.get("candidate") or {}).get("post_id"))
|
|
256
|
+
print(
|
|
257
|
+
f"[post_reddit] WARNING: recovered post_id={recovered} via posts.thread_url "
|
|
258
|
+
f"after log_post returned None for {thread_url}",
|
|
259
|
+
file=sys.stderr,
|
|
260
|
+
)
|
|
261
|
+
except Exception as e:
|
|
262
|
+
print(f"[post_reddit] WARNING: mark_posted failed for {thread_url}: {e}",
|
|
263
|
+
file=sys.stderr)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _db_mark_candidate_attempt(thread_url, reason, permanent=False):
|
|
267
|
+
"""Record a failed post attempt via /api/v1/reddit-candidates/by-thread-url.
|
|
268
|
+
|
|
269
|
+
Server-side action=mark_attempt mirrors the previous Python branching
|
|
270
|
+
(permanent vs transient with auto-promote at MAX_ATTEMPTS).
|
|
271
|
+
"""
|
|
272
|
+
if not thread_url:
|
|
273
|
+
return
|
|
274
|
+
try:
|
|
275
|
+
api_patch(
|
|
276
|
+
"/api/v1/reddit-candidates/by-thread-url",
|
|
277
|
+
{
|
|
278
|
+
"thread_url": thread_url,
|
|
279
|
+
"action": "mark_attempt",
|
|
280
|
+
"reason": reason,
|
|
281
|
+
"permanent": bool(permanent),
|
|
282
|
+
"max_attempts": MAX_ATTEMPTS,
|
|
283
|
+
},
|
|
284
|
+
ok_on_404=True,
|
|
285
|
+
)
|
|
286
|
+
except Exception as e:
|
|
287
|
+
print(f"[post_reddit] WARNING: mark_attempt failed for {thread_url}: {e}",
|
|
288
|
+
file=sys.stderr)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _db_phase0_salvage(batch_id, freshness_hours=FRESHNESS_HOURS,
|
|
292
|
+
max_attempts=MAX_ATTEMPTS,
|
|
293
|
+
retry_backoff_min=RETRY_BACKOFF_MIN):
|
|
294
|
+
"""Phase 0 via /api/v1/reddit-candidates/phase0-salvage.
|
|
295
|
+
|
|
296
|
+
The route runs the same single-transaction WITH _lock / expired / salvaged
|
|
297
|
+
CTE that this function used to issue directly. Returns (expired, salvaged).
|
|
298
|
+
"""
|
|
299
|
+
try:
|
|
300
|
+
resp = api_post(
|
|
301
|
+
"/api/v1/reddit-candidates/phase0-salvage",
|
|
302
|
+
{
|
|
303
|
+
"batch_id": batch_id,
|
|
304
|
+
"freshness_hours": int(freshness_hours),
|
|
305
|
+
"max_attempts": int(max_attempts),
|
|
306
|
+
"retry_backoff_minutes": int(retry_backoff_min),
|
|
307
|
+
},
|
|
308
|
+
)
|
|
309
|
+
data = (resp or {}).get("data") or {}
|
|
310
|
+
return int(data.get("expired_count") or 0), int(data.get("salvaged_count") or 0)
|
|
311
|
+
except Exception as e:
|
|
312
|
+
print(f"[post_reddit] WARNING: phase0 salvage failed: {e}",
|
|
313
|
+
file=sys.stderr)
|
|
314
|
+
return 0, 0
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _db_pick_salvage_candidates(batch_id, limit=1):
|
|
318
|
+
"""Pull up to `limit` salvage-eligible rows from a SINGLE project.
|
|
319
|
+
|
|
320
|
+
Routes through /api/v1/reddit-candidates/pick-salvage, which performs
|
|
321
|
+
the same two-step (project picker + atomic claim) inside a single PG
|
|
322
|
+
transaction. The route stamps last_attempt_at=NOW() at pick-time using
|
|
323
|
+
FOR UPDATE SKIP LOCKED so two concurrent post phases can never re-pick
|
|
324
|
+
the same row. See route source for the full SQL.
|
|
325
|
+
|
|
326
|
+
Returns {project_name, decisions:[...], cost:0, salvaged:True, ...} or
|
|
327
|
+
None if no eligible row remains.
|
|
328
|
+
"""
|
|
329
|
+
limit = max(1, int(limit or 1))
|
|
330
|
+
try:
|
|
331
|
+
resp = api_post(
|
|
332
|
+
"/api/v1/reddit-candidates/pick-salvage",
|
|
333
|
+
{
|
|
334
|
+
"batch_id": batch_id,
|
|
335
|
+
"max_attempts": MAX_ATTEMPTS,
|
|
336
|
+
"draft_ttl_minutes": DRAFT_TTL_MIN,
|
|
337
|
+
"limit": limit,
|
|
338
|
+
},
|
|
339
|
+
)
|
|
340
|
+
data = (resp or {}).get("data") or {}
|
|
341
|
+
if not data.get("decisions"):
|
|
342
|
+
return None
|
|
343
|
+
return {
|
|
344
|
+
"project_name": data.get("project_name") or "general",
|
|
345
|
+
"decisions": data.get("decisions") or [],
|
|
346
|
+
"cost": float(data.get("cost") or 0.0),
|
|
347
|
+
"salvaged": bool(data.get("salvaged", True)),
|
|
348
|
+
"salvaged_attempt": int(data.get("salvaged_attempt") or 0),
|
|
349
|
+
"salvaged_count": int(data.get("salvaged_count") or 0),
|
|
350
|
+
}
|
|
351
|
+
except Exception as e:
|
|
352
|
+
print(f"[post_reddit] WARNING: pick_salvage_candidates failed: {e}",
|
|
353
|
+
file=sys.stderr)
|
|
354
|
+
return None
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# Back-compat shim: older callers (and tests) may still call the singular
|
|
358
|
+
# name. Routes through the multi-row helper with limit=1 so we don't keep
|
|
359
|
+
# two SQL paths in sync.
|
|
360
|
+
def _db_pick_salvage_candidate(batch_id):
|
|
361
|
+
return _db_pick_salvage_candidates(batch_id, limit=1)
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _apply_rate_limit_policy(remaining, reset_seconds, source, budget_seconds):
|
|
365
|
+
"""Given current quota, decide: proceed (True), wait then proceed, or skip (False)."""
|
|
366
|
+
if remaining > 2 or reset_seconds <= 0:
|
|
367
|
+
return True
|
|
368
|
+
if reset_seconds > budget_seconds:
|
|
369
|
+
print(f"[post_reddit] Reddit rate-limited ({source}), reset in "
|
|
370
|
+
f"{int(reset_seconds)}s (> {budget_seconds}s budget). Skipping run.")
|
|
371
|
+
return False
|
|
372
|
+
wait = int(reset_seconds) + 3
|
|
373
|
+
print(f"[post_reddit] Reddit rate-limited ({source}), waiting {wait}s "
|
|
374
|
+
f"for reset before spawning Claude...")
|
|
375
|
+
time.sleep(wait)
|
|
376
|
+
return True
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _probe_reddit_quota():
|
|
380
|
+
"""One cheap request to Reddit to learn the live quota.
|
|
381
|
+
|
|
382
|
+
Updates RATELIMIT_FILE so downstream reddit_tools.py calls share the
|
|
383
|
+
fresh state. Returns (remaining, reset_seconds) or None on network error.
|
|
384
|
+
"""
|
|
385
|
+
import urllib.request
|
|
386
|
+
import urllib.error
|
|
387
|
+
url = "https://old.reddit.com/r/popular.json?limit=1"
|
|
388
|
+
req = urllib.request.Request(
|
|
389
|
+
url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"}
|
|
390
|
+
)
|
|
391
|
+
try:
|
|
392
|
+
resp = urllib.request.urlopen(req, timeout=10)
|
|
393
|
+
remaining = float(resp.headers.get("X-Ratelimit-Remaining", 100))
|
|
394
|
+
reset = float(resp.headers.get("X-Ratelimit-Reset", 0))
|
|
395
|
+
with open(RATELIMIT_FILE, "w") as f:
|
|
396
|
+
json.dump({"remaining": remaining, "reset_at": time.time() + reset}, f)
|
|
397
|
+
return remaining, reset
|
|
398
|
+
except urllib.error.HTTPError as e:
|
|
399
|
+
if e.code == 429:
|
|
400
|
+
reset = float(e.headers.get("X-Ratelimit-Reset", 60))
|
|
401
|
+
with open(RATELIMIT_FILE, "w") as f:
|
|
402
|
+
json.dump({"remaining": 0, "reset_at": time.time() + reset}, f)
|
|
403
|
+
return 0.0, reset
|
|
404
|
+
return None
|
|
405
|
+
except Exception:
|
|
406
|
+
return None
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def preflight_rate_limit(budget_seconds=PREFLIGHT_WAIT_BUDGET_SECONDS):
|
|
410
|
+
"""Block or bail before spawning Claude if Reddit search is throttled.
|
|
411
|
+
|
|
412
|
+
Strategy:
|
|
413
|
+
1. Cheap probe to Reddit to read live X-Ratelimit-Remaining headers.
|
|
414
|
+
This catches the case where the shared state file is stale but the
|
|
415
|
+
server still throttles us (10-min rolling window).
|
|
416
|
+
2. Fall back to the cached state file if the probe network-fails.
|
|
417
|
+
A $0.44 Claude spawn with 5 rate-limited searches is the cost we're
|
|
418
|
+
avoiding; a single probe request is ~300ms.
|
|
419
|
+
"""
|
|
420
|
+
probe = _probe_reddit_quota()
|
|
421
|
+
if probe is not None:
|
|
422
|
+
remaining, reset = probe
|
|
423
|
+
print(f"[post_reddit] Reddit quota probe: remaining={remaining:.0f} "
|
|
424
|
+
f"reset_in={int(reset)}s")
|
|
425
|
+
return _apply_rate_limit_policy(remaining, reset, "probe", budget_seconds)
|
|
426
|
+
try:
|
|
427
|
+
with open(RATELIMIT_FILE) as f:
|
|
428
|
+
rl = json.load(f)
|
|
429
|
+
except Exception:
|
|
430
|
+
return True
|
|
431
|
+
wait = int(rl.get("reset_at", 0) - time.time())
|
|
432
|
+
return _apply_rate_limit_policy(
|
|
433
|
+
rl.get("remaining", 100), wait, "cached", budget_seconds,
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
# ---------------------------------------------------------------------------
|
|
438
|
+
# subreddit_bans audit shape (introduced 2026-05-11)
|
|
439
|
+
# ---------------------------------------------------------------------------
|
|
440
|
+
# Each entry in subreddit_bans.comment_blocked / .thread_blocked is now an
|
|
441
|
+
# object with the audit metadata we wished we'd been recording all along:
|
|
442
|
+
# {"sub": "powerbi", "added_at": "2026-05-11T00:31:49Z",
|
|
443
|
+
# "reason": "account_blocked_in_sub", "project": "WhatsApp MCP"}
|
|
444
|
+
#
|
|
445
|
+
# Pre-migration entries are bare strings; the readers/writers handle both
|
|
446
|
+
# shapes transparently. The migration script
|
|
447
|
+
# (scripts/migrate_subreddit_bans_to_objects.py) backfills existing strings to
|
|
448
|
+
# objects with null metadata.
|
|
449
|
+
#
|
|
450
|
+
# _ban_entry_sub(entry): extract the sub slug from either shape (returns
|
|
451
|
+
# lowercase string or None).
|
|
452
|
+
# _ban_entries_to_subs(L): set of lowercase sub slugs in a ban list.
|
|
453
|
+
# _make_ban_entry(...): build a fresh entry with current UTC timestamp.
|
|
454
|
+
|
|
455
|
+
def _ban_entry_sub(entry) -> str | None:
|
|
456
|
+
"""Return the lowercased sub slug from a ban-list entry (str or dict)."""
|
|
457
|
+
if isinstance(entry, str):
|
|
458
|
+
s = entry.strip().lower()
|
|
459
|
+
return s or None
|
|
460
|
+
if isinstance(entry, dict):
|
|
461
|
+
s = (entry.get("sub") or "").strip().lower()
|
|
462
|
+
return s or None
|
|
463
|
+
return None
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
def _ban_entries_to_subs(entries) -> set[str]:
|
|
467
|
+
out: set[str] = set()
|
|
468
|
+
for e in entries or []:
|
|
469
|
+
s = _ban_entry_sub(e)
|
|
470
|
+
if s:
|
|
471
|
+
out.add(s)
|
|
472
|
+
return out
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _make_ban_entry(sub: str, reason: str | None, project: str | None) -> dict:
|
|
476
|
+
"""Build a new ban-list entry with the current UTC timestamp.
|
|
477
|
+
|
|
478
|
+
Stamps the current Reddit account (top-level config.json reddit_account
|
|
479
|
+
.username) so per-account scoping in reddit_tools._load_comment_blocked_subs
|
|
480
|
+
can ignore this entry on other machines posting as a different account.
|
|
481
|
+
Returns account=None if the config has no reddit_account, in which case
|
|
482
|
+
the reader treats the entry as global (back-compat with pre-2026-05-15).
|
|
483
|
+
|
|
484
|
+
Project scope (2026-05-19 cleanup): subreddit_bans.comment_blocked entries
|
|
485
|
+
are ALWAYS account-level by definition: if a sub silently strips the
|
|
486
|
+
comment form (or other account-triggered automod gate) for our account,
|
|
487
|
+
that gate applies regardless of which project's pipeline noticed it.
|
|
488
|
+
Project-specific relevance rejects live in `project_search_excludes`,
|
|
489
|
+
NOT here. So we drop the `project` field semantically (kept as audit
|
|
490
|
+
breadcrumb `noticed_by_project` for forensics, but the reader ignores
|
|
491
|
+
it). Account is the only scope dimension.
|
|
492
|
+
"""
|
|
493
|
+
from datetime import datetime, timezone
|
|
494
|
+
account = None
|
|
495
|
+
try:
|
|
496
|
+
with open(CONFIG_PATH) as _f:
|
|
497
|
+
account = (json.load(_f).get("reddit_account") or {}).get("username") or None
|
|
498
|
+
except Exception:
|
|
499
|
+
pass
|
|
500
|
+
return {
|
|
501
|
+
"sub": sub.strip().lower(),
|
|
502
|
+
"added_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
503
|
+
"reason": reason or None,
|
|
504
|
+
# Kept for audit (who first hit this); reader ignores. Use `account`
|
|
505
|
+
# for actual scoping.
|
|
506
|
+
"noticed_by_project": project or None,
|
|
507
|
+
"account": account,
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def mark_comment_blocked(thread_url: str,
|
|
512
|
+
reason: str | None = "account_blocked_in_sub",
|
|
513
|
+
project: str | None = None) -> None:
|
|
514
|
+
"""Add a subreddit to config.json subreddit_bans.comment_blocked at runtime.
|
|
515
|
+
|
|
516
|
+
Called when the bot's comment attempt is rejected (no comment form, locked,
|
|
517
|
+
restricted). The sub gets blocked for future comment attempts so the
|
|
518
|
+
drafter never targets it again. Thread-posting eligibility is tracked
|
|
519
|
+
separately in subreddit_bans.thread_blocked.
|
|
520
|
+
|
|
521
|
+
Records audit metadata (added_at / reason / project) on the entry.
|
|
522
|
+
"""
|
|
523
|
+
sub_match = re.search(r'/r/([^/]+)/', thread_url)
|
|
524
|
+
if not sub_match:
|
|
525
|
+
return
|
|
526
|
+
sub = sub_match.group(1).lower()
|
|
527
|
+
try:
|
|
528
|
+
with open(CONFIG_PATH) as f:
|
|
529
|
+
config = json.load(f)
|
|
530
|
+
bans = config.setdefault("subreddit_bans", {})
|
|
531
|
+
blocked = bans.setdefault("comment_blocked", [])
|
|
532
|
+
existing = _ban_entries_to_subs(blocked)
|
|
533
|
+
if sub not in existing:
|
|
534
|
+
blocked.append(_make_ban_entry(sub, reason, project))
|
|
535
|
+
blocked.sort(key=lambda e: _ban_entry_sub(e) or "")
|
|
536
|
+
with open(CONFIG_PATH, "w") as f:
|
|
537
|
+
json.dump(config, f, indent=2)
|
|
538
|
+
f.write("\n")
|
|
539
|
+
print(f"[post_reddit] Added r/{sub} to subreddit_bans.comment_blocked "
|
|
540
|
+
f"(reason={reason!r} project={project!r})")
|
|
541
|
+
except Exception as e:
|
|
542
|
+
print(f"[post_reddit] WARNING: could not persist blocked sub r/{sub}: {e}")
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
# Keywords that indicate a permanent account/subreddit block rather than a
|
|
546
|
+
# transient failure. Case-insensitive match against Claude's abort_reason.
|
|
547
|
+
# Tuned 2026-04-29: broaden to catch mod-rule bans expressed in present tense
|
|
548
|
+
# ("the sub bans software", "no software allowed") in addition to account-level
|
|
549
|
+
# bans ("u/X has been banned"). Each new pattern observed from real abort logs.
|
|
550
|
+
_THREAD_BLOCK_PATTERNS = [
|
|
551
|
+
r"\bbanned\b",
|
|
552
|
+
r"\bbans\b\s+(all|any|every|every kind|posts?|comments?|software|websites?|self[- ]promo|advertising|promotional)",
|
|
553
|
+
r"\bban\b.*\b(software|posts?|websites?|self[- ]promo|advertising)\b",
|
|
554
|
+
r"access was denied",
|
|
555
|
+
r"\b403\b",
|
|
556
|
+
r"link[- ]only",
|
|
557
|
+
r"text posts? (are )?disabled",
|
|
558
|
+
r"text (tab|option) (is )?disabled",
|
|
559
|
+
r"does not allow text",
|
|
560
|
+
r"not allowed to post",
|
|
561
|
+
r"posting.*restricted",
|
|
562
|
+
r"no (software|self[- ]promo|promotional|advertising|ads)",
|
|
563
|
+
r"\bprohibit(ed|s)?\b",
|
|
564
|
+
r"\bremoved\b.*\b(rule|mod)\b", # "would be removed per rule X"
|
|
565
|
+
r"would (get )?removed",
|
|
566
|
+
r"\bnot permitted\b",
|
|
567
|
+
r"approved (submitter|user)s? only",
|
|
568
|
+
r"forbidden",
|
|
569
|
+
]
|
|
570
|
+
|
|
571
|
+
def _abort_is_permanent_block(abort_reason: str) -> bool:
|
|
572
|
+
"""Return True if abort_reason signals a permanent account/sub block."""
|
|
573
|
+
lower = abort_reason.lower()
|
|
574
|
+
for pat in _THREAD_BLOCK_PATTERNS:
|
|
575
|
+
if re.search(pat, lower):
|
|
576
|
+
return True
|
|
577
|
+
return False
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def mark_thread_blocked(subreddit: str, abort_reason: str = "",
|
|
581
|
+
project: str | None = None,
|
|
582
|
+
force: bool = False) -> None:
|
|
583
|
+
"""Add a subreddit to config.json subreddit_bans.thread_blocked at runtime.
|
|
584
|
+
|
|
585
|
+
Called when a thread-post attempt is permanently blocked (account banned,
|
|
586
|
+
link-only sub, text posts disabled, 403). The sub is skipped by
|
|
587
|
+
pick_thread_target.py on all future runs. Comment eligibility is tracked
|
|
588
|
+
separately in subreddit_bans.comment_blocked.
|
|
589
|
+
|
|
590
|
+
subreddit may be bare ('programming') or prefixed ('r/programming').
|
|
591
|
+
|
|
592
|
+
Records audit metadata (added_at / reason / project) on the entry.
|
|
593
|
+
The reason field captures the abort_reason verbatim (truncated to 280
|
|
594
|
+
chars) so we can audit why the sub got blocked months later.
|
|
595
|
+
|
|
596
|
+
force=True bypasses the abort_reason regex gate (used when an upstream
|
|
597
|
+
signal — e.g. the model's permanent_block=true — has already decided
|
|
598
|
+
this is permanent and the reason text alone wouldn't match the patterns).
|
|
599
|
+
"""
|
|
600
|
+
sub = re.sub(r"^r/", "", subreddit, flags=re.IGNORECASE).strip().lower()
|
|
601
|
+
if not sub:
|
|
602
|
+
return
|
|
603
|
+
if not force and abort_reason and not _abort_is_permanent_block(abort_reason):
|
|
604
|
+
return
|
|
605
|
+
reason_str: str | None = (abort_reason or "").strip()[:280] or None
|
|
606
|
+
try:
|
|
607
|
+
with open(CONFIG_PATH) as f:
|
|
608
|
+
config = json.load(f)
|
|
609
|
+
bans = config.setdefault("subreddit_bans", {})
|
|
610
|
+
blocked = bans.setdefault("thread_blocked", [])
|
|
611
|
+
existing = _ban_entries_to_subs(blocked)
|
|
612
|
+
if sub not in existing:
|
|
613
|
+
blocked.append(_make_ban_entry(sub, reason_str, project))
|
|
614
|
+
blocked.sort(key=lambda e: _ban_entry_sub(e) or "")
|
|
615
|
+
with open(CONFIG_PATH, "w") as f:
|
|
616
|
+
json.dump(config, f, indent=2)
|
|
617
|
+
f.write("\n")
|
|
618
|
+
print(f"[post_reddit] Auto-blocked r/{sub} from future thread posts "
|
|
619
|
+
f"(reason={reason_str!r} project={project!r})")
|
|
620
|
+
else:
|
|
621
|
+
print(f"[post_reddit] r/{sub} already in thread_blocked, skipping")
|
|
622
|
+
except Exception as e:
|
|
623
|
+
print(f"[post_reddit] WARNING: could not persist thread-blocked sub r/{sub}: {e}")
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
def load_config():
|
|
627
|
+
with open(CONFIG_PATH) as f:
|
|
628
|
+
return json.load(f)
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def pick_project(platform="reddit", exclude=None):
|
|
632
|
+
try:
|
|
633
|
+
cmd = [PYTHON, os.path.join(REPO_DIR, "scripts", "pick_project.py"),
|
|
634
|
+
"--platform", platform, "--json"]
|
|
635
|
+
if exclude:
|
|
636
|
+
cmd.extend(["--exclude", ",".join(exclude)])
|
|
637
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
|
|
638
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
639
|
+
return json.loads(result.stdout.strip())
|
|
640
|
+
except Exception:
|
|
641
|
+
pass
|
|
642
|
+
return None
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def get_top_performers(project_name, platform="reddit", style=None):
|
|
646
|
+
"""Fetch the top_performers feedback report.
|
|
647
|
+
|
|
648
|
+
2026-05-19: optional `style` arg passes through to top_performers.py
|
|
649
|
+
as --style so the per-style exemplars section gets restricted to the
|
|
650
|
+
style assigned by pick_style_for_post(). When None, returns the full
|
|
651
|
+
multi-style report (legacy behavior, still used in invent mode and by
|
|
652
|
+
callers that have not flipped to the picker yet).
|
|
653
|
+
"""
|
|
654
|
+
try:
|
|
655
|
+
cmd = [PYTHON, os.path.join(REPO_DIR, "scripts", "top_performers.py"),
|
|
656
|
+
"--platform", platform, "--project", project_name]
|
|
657
|
+
if style:
|
|
658
|
+
cmd.extend(["--style", style])
|
|
659
|
+
result = subprocess.run(
|
|
660
|
+
cmd, capture_output=True, text=True, timeout=15,
|
|
661
|
+
)
|
|
662
|
+
if result.returncode == 0:
|
|
663
|
+
return result.stdout.strip()
|
|
664
|
+
except Exception:
|
|
665
|
+
pass
|
|
666
|
+
return ""
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def get_top_search_topics(project_name, platform="reddit", limit=8, window_days=30):
|
|
670
|
+
"""Return a short text block of best-performing search_topic seeds for this
|
|
671
|
+
project on this platform, or '' if no data yet. See top_search_topics.py."""
|
|
672
|
+
try:
|
|
673
|
+
result = subprocess.run(
|
|
674
|
+
[PYTHON, os.path.join(REPO_DIR, "scripts", "top_search_topics.py"),
|
|
675
|
+
"--project", project_name, "--platform", platform,
|
|
676
|
+
"--window-days", str(window_days), "--limit", str(limit)],
|
|
677
|
+
capture_output=True, text=True, timeout=15,
|
|
678
|
+
)
|
|
679
|
+
if result.returncode == 0:
|
|
680
|
+
return result.stdout.strip()
|
|
681
|
+
except Exception:
|
|
682
|
+
pass
|
|
683
|
+
return ""
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
def get_omitted_reddit_topics(project_name, limit=10, window_hours=168, min_omits=2):
|
|
687
|
+
"""Return a JSON list (as a string) of search_topic seeds that have
|
|
688
|
+
consistently produced threads which survive the ripen gate but get
|
|
689
|
+
OMITTED by the draft-time SELECTION GATE (build_draft_prompt's bridge
|
|
690
|
+
test). These are category-level mismatches the LLM should drop or
|
|
691
|
+
rephrase. See scripts/top_omitted_reddit_topics.py.
|
|
692
|
+
|
|
693
|
+
`min_omits=2` suppresses one-off omits (could be noise) and surfaces
|
|
694
|
+
only seeds where the pattern has repeated.
|
|
695
|
+
"""
|
|
696
|
+
try:
|
|
697
|
+
result = subprocess.run(
|
|
698
|
+
[PYTHON, os.path.join(REPO_DIR, "scripts", "top_omitted_reddit_topics.py"),
|
|
699
|
+
"--project", project_name,
|
|
700
|
+
"--window-hours", str(window_hours),
|
|
701
|
+
"--limit", str(limit),
|
|
702
|
+
"--min-omits", str(min_omits)],
|
|
703
|
+
capture_output=True, text=True, timeout=15,
|
|
704
|
+
)
|
|
705
|
+
if result.returncode == 0:
|
|
706
|
+
return result.stdout.strip()
|
|
707
|
+
except Exception:
|
|
708
|
+
pass
|
|
709
|
+
return ""
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def get_dud_reddit_queries(project_name, limit=15, window_hours=168):
|
|
713
|
+
"""Return a JSON list (as a string) of recent dud Reddit queries for this
|
|
714
|
+
project so build_prompt can paste an anti-list into the LLM scanner.
|
|
715
|
+
|
|
716
|
+
Source: reddit_search_attempts (one row per cmd_search call), surfaced via
|
|
717
|
+
scripts/top_dud_reddit_queries.py. Window mirrors the LinkedIn-style 7d
|
|
718
|
+
default — Reddit cycles fire every 30min, so 7d gives a wide enough sample
|
|
719
|
+
to flag truly dead phrasings without overweighting same-day noise.
|
|
720
|
+
"""
|
|
721
|
+
try:
|
|
722
|
+
result = subprocess.run(
|
|
723
|
+
[PYTHON, os.path.join(REPO_DIR, "scripts", "top_dud_reddit_queries.py"),
|
|
724
|
+
"--project", project_name,
|
|
725
|
+
"--window-hours", str(window_hours),
|
|
726
|
+
"--limit", str(limit)],
|
|
727
|
+
capture_output=True, text=True, timeout=15,
|
|
728
|
+
)
|
|
729
|
+
if result.returncode == 0:
|
|
730
|
+
return result.stdout.strip()
|
|
731
|
+
except Exception:
|
|
732
|
+
pass
|
|
733
|
+
return ""
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
def _recent_comment_text(item):
|
|
737
|
+
"""Accept either str (legacy shape) or (id, content) tuple (2026-05-12
|
|
738
|
+
shape) and return the content string. Lets all three prompt builders
|
|
739
|
+
consume recent_comments without caring which shape they got. If
|
|
740
|
+
you're refactoring the upstream shape again, update this one place."""
|
|
741
|
+
if isinstance(item, (list, tuple)) and len(item) >= 2:
|
|
742
|
+
return item[1] or ""
|
|
743
|
+
return item or ""
|
|
744
|
+
|
|
745
|
+
|
|
746
|
+
def _strip_active_suffixes(text, active_campaigns):
|
|
747
|
+
"""Remove any active-campaign suffix from `text` (idempotent, trailing-only).
|
|
748
|
+
|
|
749
|
+
Mirrors engage_reddit.strip_active_suffixes (commit 8cdde18) so we have
|
|
750
|
+
the same protection for the post_reddit drafting path. Without this,
|
|
751
|
+
`get_recent_comments()` feeds the LLM prior `posts.our_content` rows
|
|
752
|
+
that already end in the campaign suffix (e.g. " written with s4lai"),
|
|
753
|
+
the LLM copies the literal suffix into its draft because it looks like
|
|
754
|
+
part of our voice, and the tool-level append at line ~2092 stacks a
|
|
755
|
+
SECOND suffix on top. Observed in production 2026-05-18 on Deep_Ad1959
|
|
756
|
+
(reply rows 70412 + 70413) via engage_reddit; same risk exists here.
|
|
757
|
+
|
|
758
|
+
Strips trailing suffix repeatedly so a historically-doubled row also
|
|
759
|
+
collapses to clean text. Active campaign list is passed in by the
|
|
760
|
+
caller so we only strip patterns we're actively using (avoids
|
|
761
|
+
unbounded false-positive matches on incidental phrasing).
|
|
762
|
+
"""
|
|
763
|
+
if not text or not active_campaigns:
|
|
764
|
+
return text
|
|
765
|
+
cleaned = text.rstrip()
|
|
766
|
+
changed = True
|
|
767
|
+
while changed:
|
|
768
|
+
changed = False
|
|
769
|
+
for camp in active_campaigns:
|
|
770
|
+
suffix = (camp.get("suffix") or "").strip()
|
|
771
|
+
if suffix and cleaned.endswith(suffix):
|
|
772
|
+
cleaned = cleaned[: -len(suffix)].rstrip()
|
|
773
|
+
changed = True
|
|
774
|
+
return cleaned
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
def get_recent_comments(limit=5):
|
|
778
|
+
"""Recent Reddit posts.our_content via /api/v1/posts.
|
|
779
|
+
|
|
780
|
+
Returns list of (id, content) tuples (2026-05-12 change). The IDs
|
|
781
|
+
feed into the generation_trace audit blob so a later reader can
|
|
782
|
+
JOIN back to the source posts; the content still feeds the prompt
|
|
783
|
+
builders verbatim. Prompt-builders below were updated to accept
|
|
784
|
+
both the old (str) and new (tuple) shapes so any straggler caller
|
|
785
|
+
keeps working without a coordinated change.
|
|
786
|
+
|
|
787
|
+
2026-05-18: active-campaign suffixes are stripped from `our_content`
|
|
788
|
+
BEFORE returning, so the LLM never sees suffixed exemplars and
|
|
789
|
+
cannot copy the campaign tag into its draft (which would then get
|
|
790
|
+
a SECOND tool-level append, producing "written with s4lai written
|
|
791
|
+
with s4lai"). See `_strip_active_suffixes` docstring.
|
|
792
|
+
"""
|
|
793
|
+
resp = api_get(
|
|
794
|
+
"/api/v1/posts",
|
|
795
|
+
query={"platform": "reddit", "limit": int(limit)},
|
|
796
|
+
)
|
|
797
|
+
rows = ((resp or {}).get("data") or {}).get("posts") or []
|
|
798
|
+
raw = [
|
|
799
|
+
(int(r["id"]), r.get("our_content") or "")
|
|
800
|
+
for r in rows
|
|
801
|
+
if r.get("our_content") and r.get("id") is not None
|
|
802
|
+
]
|
|
803
|
+
# Sanitize exemplars against the currently-active campaign suffixes.
|
|
804
|
+
# If the campaign-load call fails we fall back to raw content (better
|
|
805
|
+
# than crashing the discover/draft pipeline over a degraded API call).
|
|
806
|
+
try:
|
|
807
|
+
active_camps = load_active_reddit_campaigns()
|
|
808
|
+
except Exception as e:
|
|
809
|
+
print(f"[post_reddit] WARNING: load_active_reddit_campaigns failed "
|
|
810
|
+
f"during recent_comments sanitize ({e}); returning raw content",
|
|
811
|
+
file=sys.stderr)
|
|
812
|
+
return raw
|
|
813
|
+
cleaned = []
|
|
814
|
+
for pid, content in raw:
|
|
815
|
+
stripped = _strip_active_suffixes(content, active_camps)
|
|
816
|
+
if stripped:
|
|
817
|
+
cleaned.append((pid, stripped))
|
|
818
|
+
return cleaned
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
def load_active_reddit_campaigns():
|
|
822
|
+
"""Active Reddit campaigns that carry a literal suffix.
|
|
823
|
+
|
|
824
|
+
Tool-level enforcement: the LLM never sees these. We append the suffix to
|
|
825
|
+
the drafted text in Python before posting, so the literal text is
|
|
826
|
+
guaranteed to land on Reddit. sample_rate gates the per-post coin flip
|
|
827
|
+
for concurrent A/B (e.g. 0.5 = ~half of posts get tagged).
|
|
828
|
+
|
|
829
|
+
Calls /api/v1/campaigns?status=active&platform=reddit&has_suffix=true&with_budget_remaining=true.
|
|
830
|
+
"""
|
|
831
|
+
resp = api_get(
|
|
832
|
+
"/api/v1/campaigns",
|
|
833
|
+
query={
|
|
834
|
+
"status": "active",
|
|
835
|
+
"platform": "reddit",
|
|
836
|
+
"has_suffix": "true",
|
|
837
|
+
"with_budget_remaining": "true",
|
|
838
|
+
"limit": 500,
|
|
839
|
+
},
|
|
840
|
+
)
|
|
841
|
+
rows = ((resp or {}).get("data") or {}).get("campaigns") or []
|
|
842
|
+
return [
|
|
843
|
+
{
|
|
844
|
+
"id": int(r["id"]),
|
|
845
|
+
"suffix": r.get("suffix"),
|
|
846
|
+
"sample_rate": float(r.get("sample_rate") if r.get("sample_rate") is not None else 1.0),
|
|
847
|
+
}
|
|
848
|
+
for r in rows
|
|
849
|
+
]
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
def _angle_str(v):
|
|
853
|
+
if isinstance(v, str):
|
|
854
|
+
return v.strip()
|
|
855
|
+
if isinstance(v, dict):
|
|
856
|
+
return "; ".join(f"{k}: {_angle_str(x)}" for k, x in v.items() if x)
|
|
857
|
+
if isinstance(v, (list, tuple)):
|
|
858
|
+
return ", ".join(_angle_str(x) for x in v if x)
|
|
859
|
+
return str(v) if v else ""
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
def build_content_angle(project, config):
|
|
863
|
+
"""Prefer project-specific positioning over the global config angle.
|
|
864
|
+
|
|
865
|
+
Always appends the project's audience-pages block (when configured) so the
|
|
866
|
+
draft prompt knows which curated landing pages it should link to for
|
|
867
|
+
topic-matched threads. Single source of truth flows through every caller
|
|
868
|
+
that consumes content_angle.
|
|
869
|
+
"""
|
|
870
|
+
if project.get("content_angle"):
|
|
871
|
+
base = project["content_angle"]
|
|
872
|
+
else:
|
|
873
|
+
parts = []
|
|
874
|
+
for key in ("description", "differentiator", "icp", "setup"):
|
|
875
|
+
s = _angle_str(project.get(key))
|
|
876
|
+
if s:
|
|
877
|
+
parts.append(s)
|
|
878
|
+
|
|
879
|
+
messaging = project.get("messaging", {}) or {}
|
|
880
|
+
for key in ("lead_with_pain", "solution", "proof"):
|
|
881
|
+
s = _angle_str(messaging.get(key))
|
|
882
|
+
if s:
|
|
883
|
+
parts.append(s)
|
|
884
|
+
|
|
885
|
+
voice = project.get("voice", {}) or {}
|
|
886
|
+
if voice.get("tone"):
|
|
887
|
+
parts.append(f"Voice: {voice['tone']}")
|
|
888
|
+
if voice.get("never"):
|
|
889
|
+
parts.append("Never: " + "; ".join(voice["never"]))
|
|
890
|
+
examples = voice.get("examples") or voice.get("examples_good") or []
|
|
891
|
+
if examples:
|
|
892
|
+
parts.append("Voice examples: " + " | ".join(examples[:3]))
|
|
893
|
+
|
|
894
|
+
base = " ".join(parts) if parts else config.get("content_angle", "")
|
|
895
|
+
|
|
896
|
+
try:
|
|
897
|
+
ap_block = _audience_prompt_block(project.get("name") or "")
|
|
898
|
+
except Exception:
|
|
899
|
+
ap_block = ""
|
|
900
|
+
if ap_block:
|
|
901
|
+
return (base + "\n\n" + ap_block).strip() if base else ap_block.strip()
|
|
902
|
+
return base
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
def build_discover_prompt(project, config, limit, top_report, recent_comments,
|
|
906
|
+
top_topics_report="", dud_queries_report="",
|
|
907
|
+
omitted_topics_report=""):
|
|
908
|
+
"""DISCOVER phase: scan-only. Model picks search queries, runs them in
|
|
909
|
+
OPAQUE mode (never sees thread content), outputs DONE. No fetching, no
|
|
910
|
+
judging, no drafting. The dump_dir harvest converts raw search results
|
|
911
|
+
into candidates passed to ripen.
|
|
912
|
+
|
|
913
|
+
Mirrors Twitter's scan phase: the only Claude work here is choosing
|
|
914
|
+
search queries. Style picking, top_performers filtering, and the
|
|
915
|
+
actual comment drafting all happen in the draft phase (the only
|
|
916
|
+
Claude call in this cycle that writes a comment).
|
|
917
|
+
"""
|
|
918
|
+
content_angle = build_content_angle(project, config)
|
|
919
|
+
topics_list = list(topics_for_project(project.get("name") or ""))
|
|
920
|
+
project_json = json.dumps({
|
|
921
|
+
"name": project.get("name"),
|
|
922
|
+
"description": project.get("description"),
|
|
923
|
+
"search_topics": topics_list,
|
|
924
|
+
}, indent=2)
|
|
925
|
+
|
|
926
|
+
recent_ctx = ""
|
|
927
|
+
if recent_comments:
|
|
928
|
+
# _recent_comment_text handles both legacy str and current (id, content) shapes.
|
|
929
|
+
snippets = "\n".join(
|
|
930
|
+
f" - {_recent_comment_text(c)}"
|
|
931
|
+
for c in recent_comments
|
|
932
|
+
if _recent_comment_text(c)
|
|
933
|
+
)
|
|
934
|
+
recent_ctx = f"\nYour last {len(recent_comments)} comments (don't repeat these threads):\n{snippets}\n"
|
|
935
|
+
|
|
936
|
+
top_ctx = ""
|
|
937
|
+
if top_report:
|
|
938
|
+
lines = top_report.split("\n")[:20]
|
|
939
|
+
top_ctx = f"\n## Past performance feedback:\n{chr(10).join(lines)}\n"
|
|
940
|
+
|
|
941
|
+
top_topics_ctx = ""
|
|
942
|
+
if top_topics_report:
|
|
943
|
+
top_topics_ctx = (
|
|
944
|
+
"\n## Past top-performing search topics "
|
|
945
|
+
"(sorted by clicks DESC first, then composite-scored: "
|
|
946
|
+
"clicks*100 + comments + upvotes). "
|
|
947
|
+
"CLICKS ARE THE PRIORITY SIGNAL. Any topic with `clicks > 0` is "
|
|
948
|
+
"GOLD TIER, clicks are the only metric that proves our reply drove "
|
|
949
|
+
"someone to actually visit the project's link. Comments and upvotes "
|
|
950
|
+
"are vanity. If a project in your draft set has a gold-tier topic "
|
|
951
|
+
"in this list, mimic ITS framing (subreddit fit, keyword cluster, "
|
|
952
|
+
"specificity) FIRST before falling back to other styles. The "
|
|
953
|
+
"Δpost / Δskip columns also matter: high Δskip + few posts = the "
|
|
954
|
+
"topic surfaces alive but off-topic threads (reword more narrowly); "
|
|
955
|
+
"low Δskip + few posts = dead supply (drop the topic). Optimize the "
|
|
956
|
+
"entire pipeline for clicks; everything else is leading indicators.\n"
|
|
957
|
+
f"{top_topics_report}\n"
|
|
958
|
+
)
|
|
959
|
+
|
|
960
|
+
dud_queries_ctx = ""
|
|
961
|
+
if dud_queries_report and dud_queries_report.strip() not in ("[]", ""):
|
|
962
|
+
dud_queries_ctx = f"\n## Dead queries (skip these exact phrasings):\n{dud_queries_report}\n"
|
|
963
|
+
|
|
964
|
+
omitted_topics_ctx = ""
|
|
965
|
+
if omitted_topics_report and omitted_topics_report.strip() not in ("[]", ""):
|
|
966
|
+
omitted_topics_ctx = (
|
|
967
|
+
"\n## Category-mismatch seeds (returned alive threads but the draft "
|
|
968
|
+
"SELECTION GATE killed them — i.e. this seed surfaces wrong-audience "
|
|
969
|
+
"subs; rephrase MORE NARROWLY around your project's actual domain, "
|
|
970
|
+
"or drop the seed entirely):\n"
|
|
971
|
+
f"{omitted_topics_report}\n"
|
|
972
|
+
)
|
|
973
|
+
|
|
974
|
+
max_searches = MAX_DISCOVER_SEARCHES
|
|
975
|
+
pick_low = min(2, max_searches)
|
|
976
|
+
pick_high = max_searches
|
|
977
|
+
|
|
978
|
+
return f"""You generate Reddit search queries. The search tool runs in OPAQUE mode this cycle: it dumps every returned thread to a side file for the ripen pipeline and prints back ONLY a one-line summary count. You do NOT see thread content, titles, scores, or URLs. You cannot filter results — the ripen step (numerical delta gate) is the only filter.
|
|
979
|
+
|
|
980
|
+
Topic area: {project_json}
|
|
981
|
+
Content angle: {content_angle}
|
|
982
|
+
{recent_ctx}{top_ctx}{top_topics_ctx}{omitted_topics_ctx}{dud_queries_ctx}
|
|
983
|
+
## Tool (via Bash)
|
|
984
|
+
- Search: python3 {REDDIT_TOOLS} search "QUERY" --limit 25
|
|
985
|
+
- Search by sub: python3 {REDDIT_TOOLS} search "QUERY" --subreddits AI_Agents,SaaS --time month
|
|
986
|
+
- Search broader time: python3 {REDDIT_TOOLS} search "QUERY" --time month
|
|
987
|
+
|
|
988
|
+
## What you'll see from the tool
|
|
989
|
+
- stdout: one short line, e.g. `OK: 23 threads passed to ripen pipeline (results not shown)`
|
|
990
|
+
- stderr: `[reddit_search] q="..." raw=25 returned=23 blocked_sub=2 archived=0 locked=0 too_old=0 already_posted_flagged=0 top_score=187 top_comments=48`
|
|
991
|
+
|
|
992
|
+
You can use these counts to decide whether to run another query. You CANNOT
|
|
993
|
+
read the threads themselves. They are already on disk for ripen.
|
|
994
|
+
|
|
995
|
+
## CRITICAL Bash rules
|
|
996
|
+
- NEVER use run_in_background=true. All commands run foreground.
|
|
997
|
+
- Run AT MOST {max_searches} searches total. Each search dumps up to 25 threads.
|
|
998
|
+
- Do NOT cat, ls, find, or otherwise inspect /tmp or any dump file. The dump
|
|
999
|
+
directory is private to the ripen step. You don't need to know the path.
|
|
1000
|
+
- If rate-limited, stop. The ripen step uses whatever was dumped before the limit.
|
|
1001
|
+
|
|
1002
|
+
## Steps
|
|
1003
|
+
1. Pick {pick_low}-{pick_high} concepts from the project's search_topics: {json.dumps(topics_list)}.
|
|
1004
|
+
Rephrase each into a natural Reddit search query (vernacular, pain points).
|
|
1005
|
+
Avoid the dud queries listed above. If a seed appears in the
|
|
1006
|
+
"Category-mismatch seeds" section above, EITHER rephrase it MUCH more
|
|
1007
|
+
narrowly (constrain to your project's exact audience/subreddit) OR skip
|
|
1008
|
+
it and pick a different seed.
|
|
1009
|
+
2. Run the searches. Watch the stdout/stderr summary for each call. Prefer
|
|
1010
|
+
covering DIFFERENT angles across queries (e.g. don't run 5 near-duplicate
|
|
1011
|
+
rephrasings of one seed).
|
|
1012
|
+
3. (Optional) If a query returns `returned=0`, you may try ONE more rephrasing.
|
|
1013
|
+
You may also stop early at {pick_low} if your queries returned plenty of
|
|
1014
|
+
results — quality > quota. Never exceed {max_searches} total.
|
|
1015
|
+
4. Output DONE on its own line.
|
|
1016
|
+
|
|
1017
|
+
## OUTPUT FORMAT
|
|
1018
|
+
Just output `DONE` on its own line after running your searches. No JSON,
|
|
1019
|
+
no candidate lines, no commentary about thread content (you don't see any).
|
|
1020
|
+
"""
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def build_draft_prompt(project, config, candidates, top_report, recent_comments,
|
|
1024
|
+
style_assignment=None):
|
|
1025
|
+
"""DRAFT phase: write comments only for ripen-survivors.
|
|
1026
|
+
|
|
1027
|
+
`candidates` is the list of decisions that passed the delta gate, each
|
|
1028
|
+
annotated with ripen data (delta_up, delta_comments, composite). Claude
|
|
1029
|
+
fetches each thread, reads context, then writes the best comment.
|
|
1030
|
+
|
|
1031
|
+
2026-05-19: `style_assignment` is the pick_style_for_post() result the
|
|
1032
|
+
discover phase already wrote into the plan JSON. Forwarding it here so
|
|
1033
|
+
the draft phase enforces the SAME style instead of letting the model
|
|
1034
|
+
free-pick (and overwhelmingly default to pattern_recognizer). When
|
|
1035
|
+
omitted, get_styles_prompt() picks fresh internally (legacy callers).
|
|
1036
|
+
"""
|
|
1037
|
+
content_angle = build_content_angle(project, config)
|
|
1038
|
+
|
|
1039
|
+
recent_ctx = ""
|
|
1040
|
+
if recent_comments:
|
|
1041
|
+
# _recent_comment_text handles both legacy str and current (id, content) shapes.
|
|
1042
|
+
snippets = "\n".join(
|
|
1043
|
+
f" - {_recent_comment_text(c)}"
|
|
1044
|
+
for c in recent_comments
|
|
1045
|
+
if _recent_comment_text(c)
|
|
1046
|
+
)
|
|
1047
|
+
recent_ctx = f"\nYour last {len(recent_comments)} comments (don't repeat talking points):\n{snippets}\n"
|
|
1048
|
+
|
|
1049
|
+
top_ctx = ""
|
|
1050
|
+
if top_report:
|
|
1051
|
+
lines = top_report.split("\n")[:20]
|
|
1052
|
+
top_ctx = f"\n## Past performance feedback:\n{chr(10).join(lines)}\n"
|
|
1053
|
+
|
|
1054
|
+
candidate_lines = []
|
|
1055
|
+
for c in candidates:
|
|
1056
|
+
rip = c.get("ripen") or {}
|
|
1057
|
+
delta_info = ""
|
|
1058
|
+
if rip.get("composite") is not None:
|
|
1059
|
+
delta_info = (f" [active: Δup={rip.get('delta_up', 0)},"
|
|
1060
|
+
f" Δcomm={rip.get('delta_comments', 0)},"
|
|
1061
|
+
f" composite={rip.get('composite', 0):.1f} over"
|
|
1062
|
+
f" {rip.get('window_sec', 300)}s]")
|
|
1063
|
+
history_line = ""
|
|
1064
|
+
try:
|
|
1065
|
+
_hb = _render_author_history(
|
|
1066
|
+
"reddit", c.get("thread_author") or "", days=30, limit=5
|
|
1067
|
+
)
|
|
1068
|
+
if _hb:
|
|
1069
|
+
history_line = "\n " + _hb.replace("\n", "\n ")
|
|
1070
|
+
except Exception:
|
|
1071
|
+
pass
|
|
1072
|
+
candidate_lines.append(
|
|
1073
|
+
f" - {c['thread_url']}{delta_info}\n"
|
|
1074
|
+
f" title: {c.get('thread_title', '')}\n"
|
|
1075
|
+
f" suggested style: {c.get('engagement_style', '')}"
|
|
1076
|
+
f"{history_line}"
|
|
1077
|
+
)
|
|
1078
|
+
candidates_block = "\n".join(candidate_lines)
|
|
1079
|
+
|
|
1080
|
+
return f"""You will be handed up to {len(candidates)} Reddit thread(s) that survived the engagement-velocity (ripen) gate. Your job is to draft comments for the ones where you can write something genuinely useful to that audience. Lean toward DRAFTING when the audience overlaps even partially with the project's user, and only OMIT on clear no-bridge cases.
|
|
1081
|
+
|
|
1082
|
+
Content angle: {content_angle}
|
|
1083
|
+
{recent_ctx}{top_ctx}
|
|
1084
|
+
## Candidate threads (post-ripen):
|
|
1085
|
+
{candidates_block}
|
|
1086
|
+
|
|
1087
|
+
## SELECTION GATE — soft fits are OK; reject only clear mismatches
|
|
1088
|
+
|
|
1089
|
+
The ripen step proves a thread is alive (people are voting/commenting). It does NOT prove the thread fits the project. Reddit search returns false positives based on raw token overlap (e.g. a search for "no-code app maker" surfaces r/gamemaker shader threads because of the word "maker"; a search for "E2E testing developer productivity QA" can surface a JonBenet murder thread because of how Reddit indexes acronyms). The gate exists to catch those token-overlap false positives, NOT to demand a perfect product fit on every thread.
|
|
1090
|
+
|
|
1091
|
+
For each thread, ask the **bridge test**:
|
|
1092
|
+
"Could a thoughtful person from {project.get('name', 'this project')}'s audience plausibly read my comment and find it useful, regardless of whether they ever try the product?"
|
|
1093
|
+
|
|
1094
|
+
DRAFT it if YES. OMIT only if NO bridge exists at all (clear off-topic / hostile audience / token-overlap false positive). Soft / partial / adjacent fits are GOOD enough — a useful comment in an adjacent sub builds reputation even when no one converts. Don't optimize for purity. Don't artificially cap output. The post-phase will cap actual posting at a reasonable number, so feel free to draft for any thread that passes the soft bridge test.
|
|
1095
|
+
|
|
1096
|
+
DRAFT THESE (broad, inclusive — not just direct hits):
|
|
1097
|
+
- Project: AI test automation (Assrt). Thread: "Playwright selectors keep breaking on every refactor" → direct fit. DRAFT.
|
|
1098
|
+
- Project: AI test automation. Thread: r/QualityAssurance "How are people handling flaky CI tests?" → adjacent topic, same audience. DRAFT.
|
|
1099
|
+
- Project: AI app builder (mk0r). Thread: "I want to prototype a tip calculator without learning React" → direct fit. DRAFT.
|
|
1100
|
+
- Project: AI app builder. Thread: r/SaaS "Indie hackers shipping MVPs in a weekend" → adjacent: same builder mindset. DRAFT (helpful comment about iteration speed).
|
|
1101
|
+
- Project: study tool (Studyly). Thread: r/medschool "best way to handle 200-slide lectures" → direct fit. DRAFT.
|
|
1102
|
+
- Project: study tool. Thread: r/GetStudying "I'm burnt out, can't retain anything" → adjacent: study-habit audience. DRAFT (empathetic comment about active recall, even if no product mention).
|
|
1103
|
+
- Project: home security camera (Cyrano). Thread: r/HomeImprovement "wired vs wireless cameras" → direct fit. DRAFT.
|
|
1104
|
+
|
|
1105
|
+
OMIT THESE (clear no-bridge cases only):
|
|
1106
|
+
- Project: AI test automation. Thread: r/JonBenet "The Absurdity of the BDI Theory" → token-overlap false positive (BDI ≠ a testing acronym here). 1996 murder case audience. NO bridge. OMIT.
|
|
1107
|
+
- Project: AI app builder. Thread: r/BostonSocialClub "Events worth leaving the house for this weekend" → matched on "tried"/"maker". Locals planning weekends. NO bridge. OMIT.
|
|
1108
|
+
- Project: AI app builder. Thread: r/gamemaker "Using surfaces to create paper-like behavior" → GameMaker is a code IDE, not a no-code generator. Audience writes GML shaders. NO bridge. OMIT.
|
|
1109
|
+
- Project: study tool. Thread: r/SubredditDrama "the alternative option is still running" → meta drama, no study angle. OMIT.
|
|
1110
|
+
- Project: study tool. Thread: r/trichotillomania "the trich trance" → medical condition, not studying. OMIT.
|
|
1111
|
+
- Project: study tool. Thread where you've ALREADY commented under any of our accounts (`already_posted=true` or our usernames in the comment list): obvious astroturfing. OMIT.
|
|
1112
|
+
- Any thread where you'd be embarrassed to have your comment shown next to a {project.get('name', 'this project')} link in the same Reddit thread.
|
|
1113
|
+
|
|
1114
|
+
## Tools (via Bash)
|
|
1115
|
+
- Fetch thread: python3 {REDDIT_TOOLS} fetch "THREAD_URL"
|
|
1116
|
+
- Do NOT run any searches. The threads are already chosen.
|
|
1117
|
+
|
|
1118
|
+
## CRITICAL Bash rules
|
|
1119
|
+
- NEVER use run_in_background=true. All commands foreground.
|
|
1120
|
+
- Fetch each thread once to read OP and top comments for context. Use the fetched content to apply the SELECTION GATE.
|
|
1121
|
+
|
|
1122
|
+
## CRITICAL CONTENT RULES (apply only to threads that pass the gate)
|
|
1123
|
+
- Go BIMODAL on length: 1 punchy sentence (<100 chars) OR 4-5 sentences of real substance. Avoid 2-3 sentence middle-ground.
|
|
1124
|
+
- GROUNDING RULE — pick ONE lane per comment:
|
|
1125
|
+
LANE 1 - DISCLOSED STORY: open with a hedge ("hypothetically", "imagine someone running this", "scenario:") then you may invent specifics freely.
|
|
1126
|
+
LANE 2 - NO FABRICATION: every specific (numbers, durations, places, tools) must appear verbatim in the content_angle above. Otherwise drop the specific and pattern-frame ("the part that breaks down is...", "the typical failure mode is...").
|
|
1127
|
+
- VOICE RELATIONSHIP: see the dedicated section below; it governs whether you speak AS the maker or as an outside observer.
|
|
1128
|
+
- NEVER mention product names (fazm, assrt, pieline, cyrano, terminator, mk0r, s4l).
|
|
1129
|
+
- NEVER include URLs or links in your comment text.
|
|
1130
|
+
- Prefer replying to OP (top-level reply). ONE comment per thread.
|
|
1131
|
+
- Statements beat questions. Be authoritative, not inquisitive.
|
|
1132
|
+
|
|
1133
|
+
## Content rules
|
|
1134
|
+
{get_content_rules("reddit")}
|
|
1135
|
+
|
|
1136
|
+
{get_styles_prompt("reddit", context="posting", assignment=style_assignment)}
|
|
1137
|
+
|
|
1138
|
+
{get_voice_relationship_rule()}
|
|
1139
|
+
|
|
1140
|
+
## OUTPUT FORMAT
|
|
1141
|
+
For each thread that PASSES the SELECTION GATE, output one JSON object per line:
|
|
1142
|
+
{{"action": "post", "thread_url": "SAME_URL_AS_GIVEN", "reply_to_url": null, "text": "your comment here", "thread_author": "username", "thread_title": "thread title", "engagement_style": "{(style_assignment or {}).get('style') or 'style_name'}", "search_topic": "the seed concept", "new_style": null}}
|
|
1143
|
+
|
|
1144
|
+
For threads that FAIL the gate, simply omit the post JSON above. The shell handles unhandled candidates correctly (Phase 0 salvage on the next cycle re-checks them, and one-strike ripen failure has already pruned dead threads).
|
|
1145
|
+
|
|
1146
|
+
## OPTIONAL: proposed_excludes (self-improving denylist)
|
|
1147
|
+
When you OMIT a thread because of a recurring CLASS of false-positive (the SUB itself surfaces wrong-audience threads, not just this one thread), you MAY emit a second JSON line for that thread:
|
|
1148
|
+
|
|
1149
|
+
{{"action": "reject", "thread_url": "SAME_URL_AS_GIVEN", "reason": "short reason", "proposed_excludes": ["subreddit:bestofredditorupdates"]}}
|
|
1150
|
+
|
|
1151
|
+
Rules:
|
|
1152
|
+
- proposed_excludes entries MUST use the typed form `subreddit:<slug>` (lowercase, no `r/` prefix). Future shape: `keyword:<word>` is accepted but unused today.
|
|
1153
|
+
- DO emit when: the false-positive is structural — e.g. r/bestofredditorupdates is family drama matching on the word "alternative"; r/hfy is sci-fi narrative matching on the word "spaced"; r/superstonk is GME meme stock matching on "anki" via a random comment. The SUB is the false positive, not just this one post.
|
|
1154
|
+
- DO NOT emit when: this specific thread is bad but the sub is fine in general (e.g. r/{project.get('name', 'project')}'s natural audience like r/medicalschool, r/anki, r/getstudying — never propose excluding a top-performing sub).
|
|
1155
|
+
- Activation gate: a term needs >=2 SEPARATE batches to propose it before it goes live on future Reddit searches. A single mistaken proposal cannot mute a sub. Propose if a thoughtful future cycle would likely agree; otherwise omit.
|
|
1156
|
+
- 1-3 entries per reject is plenty. When in doubt, omit the field. Default (no reject line) is safe.
|
|
1157
|
+
|
|
1158
|
+
Examples of GOOD proposals:
|
|
1159
|
+
- Reject r/bestofredditorupdates "Husband lied" → ["subreddit:bestofredditorupdates"]
|
|
1160
|
+
- Reject r/hfy "The Trial of Humanity" → ["subreddit:hfy"]
|
|
1161
|
+
- Reject r/battlefield6 "GAME UPDATE 1.3.1.0" → ["subreddit:battlefield6"]
|
|
1162
|
+
- Reject r/superstonk "GMERICA acquisition" → ["subreddit:superstonk"]
|
|
1163
|
+
- Reject r/nosleep "cursed doll" → ["subreddit:nosleep"]
|
|
1164
|
+
|
|
1165
|
+
Examples of WRONG proposals (do not emit):
|
|
1166
|
+
- Reject a specific r/nursing thread because OP is venting → DO NOT exclude r/nursing (it's our target audience; just omit this thread)
|
|
1167
|
+
- Reject one r/anki thread that's off-topic → DO NOT exclude r/anki (core ICP)
|
|
1168
|
+
|
|
1169
|
+
Output DONE after all JSONs (both post and reject lines, in any order). Do NOT narrate. Fetch, gate, draft-or-reject, output JSONs, DONE.
|
|
1170
|
+
"""
|
|
1171
|
+
|
|
1172
|
+
|
|
1173
|
+
def parse_candidates(output):
|
|
1174
|
+
"""Extract action=candidate JSON objects from Claude's discover output."""
|
|
1175
|
+
candidates = []
|
|
1176
|
+
seen_urls = set()
|
|
1177
|
+
for match in re.finditer(r'\{[^{}]*?"action"\s*:\s*"candidate"[^{}]*?\}', output):
|
|
1178
|
+
try:
|
|
1179
|
+
c = json.loads(match.group())
|
|
1180
|
+
url = c.get("thread_url", "")
|
|
1181
|
+
if url and url not in seen_urls:
|
|
1182
|
+
candidates.append(c)
|
|
1183
|
+
seen_urls.add(url)
|
|
1184
|
+
except (json.JSONDecodeError, TypeError):
|
|
1185
|
+
continue
|
|
1186
|
+
return candidates
|
|
1187
|
+
|
|
1188
|
+
|
|
1189
|
+
def build_prompt(project, config, limit, top_report, recent_comments,
|
|
1190
|
+
top_topics_report="", dud_queries_report=""):
|
|
1191
|
+
"""Build prompt for Claude to search, evaluate, and draft replies (no posting).
|
|
1192
|
+
|
|
1193
|
+
`dud_queries_report` is a JSON list of recent zero-result queries for this
|
|
1194
|
+
project (see get_dud_reddit_queries). When non-empty, an anti-list block is
|
|
1195
|
+
inserted alongside the positive top_topics_report so the LLM is steered
|
|
1196
|
+
away from phrasings that have already proven flat in the last 7 days.
|
|
1197
|
+
"""
|
|
1198
|
+
content_angle = build_content_angle(project, config)
|
|
1199
|
+
|
|
1200
|
+
# DB-backed search_topics (post 2026-05-27 config.json removal).
|
|
1201
|
+
topics_list = list(topics_for_project(project.get("name") or ""))
|
|
1202
|
+
|
|
1203
|
+
project_json = json.dumps({
|
|
1204
|
+
"name": project.get("name"),
|
|
1205
|
+
"description": project.get("description"),
|
|
1206
|
+
"search_topics": topics_list,
|
|
1207
|
+
}, indent=2)
|
|
1208
|
+
|
|
1209
|
+
recent_ctx = ""
|
|
1210
|
+
if recent_comments:
|
|
1211
|
+
# _recent_comment_text handles both legacy str and current (id, content) shapes.
|
|
1212
|
+
snippets = "\n".join(
|
|
1213
|
+
f" - {_recent_comment_text(c)}"
|
|
1214
|
+
for c in recent_comments
|
|
1215
|
+
if _recent_comment_text(c)
|
|
1216
|
+
)
|
|
1217
|
+
recent_ctx = f"""
|
|
1218
|
+
Your last {len(recent_comments)} comments (don't repeat talking points):
|
|
1219
|
+
{snippets}
|
|
1220
|
+
"""
|
|
1221
|
+
|
|
1222
|
+
top_ctx = ""
|
|
1223
|
+
if top_report:
|
|
1224
|
+
lines = top_report.split("\n")[:30]
|
|
1225
|
+
top_ctx = f"""
|
|
1226
|
+
## Feedback from past performance:
|
|
1227
|
+
{chr(10).join(lines)}
|
|
1228
|
+
"""
|
|
1229
|
+
|
|
1230
|
+
top_topics_ctx = ""
|
|
1231
|
+
if top_topics_report:
|
|
1232
|
+
top_topics_ctx = f"""
|
|
1233
|
+
## Past top-performing search topics (sorted by clicks DESC first, then composite-scored: clicks*100 + comments + upvotes)
|
|
1234
|
+
CLICKS ARE THE PRIORITY SIGNAL. Any topic with `clicks > 0` is GOLD TIER, clicks
|
|
1235
|
+
are the only metric that proves our reply drove someone to actually visit the
|
|
1236
|
+
project's link. Comments and upvotes are vanity. If a project in your draft set
|
|
1237
|
+
has a gold-tier topic in this list, mimic ITS framing (subreddit fit, keyword
|
|
1238
|
+
cluster, specificity) FIRST before falling back to other styles. The Δpost /
|
|
1239
|
+
Δskip columns also matter: high Δskip + few posts = topic surfaces alive but
|
|
1240
|
+
off-topic threads (reword more narrowly); low Δskip + few posts = dead supply
|
|
1241
|
+
(drop the topic). Optimize the entire pipeline for clicks; everything else is
|
|
1242
|
+
leading indicators.
|
|
1243
|
+
|
|
1244
|
+
{top_topics_report}
|
|
1245
|
+
|
|
1246
|
+
If none of the top topics match this run's angle, pick any seed from the
|
|
1247
|
+
project's search_topics list. New topics with 0 clicks are fine — we still need
|
|
1248
|
+
to explore — but a gold-tier topic that fits should beat any unproven topic.
|
|
1249
|
+
"""
|
|
1250
|
+
|
|
1251
|
+
# NEGATIVE-signal feedback: queries that have produced zero post-filter
|
|
1252
|
+
# candidates in the last 7 days. Mirrors twitter_search_attempts /
|
|
1253
|
+
# top_dud_twitter_queries.py but speaks in terms of (query, subreddits)
|
|
1254
|
+
# since Reddit search is sub-scoped. Keep this list short — Reddit is
|
|
1255
|
+
# more keyword-rigid than Twitter, so even "the same phrase but in a
|
|
1256
|
+
# different sub" can still produce results.
|
|
1257
|
+
dud_queries_ctx = ""
|
|
1258
|
+
if dud_queries_report and dud_queries_report.strip() not in ("[]", ""):
|
|
1259
|
+
dud_queries_ctx = f"""
|
|
1260
|
+
## Dead queries (DO NOT redraft these — flat for the last 7 days):
|
|
1261
|
+
{dud_queries_report}
|
|
1262
|
+
|
|
1263
|
+
Each entry is a (query, subreddits) phrasing that has returned ZERO usable
|
|
1264
|
+
threads on every recent attempt. Pick fresh wording, a different angle, or a
|
|
1265
|
+
different subreddit slate. Reusing an exact dead phrasing wastes a search
|
|
1266
|
+
slot and burns rate-limit budget for no upside.
|
|
1267
|
+
"""
|
|
1268
|
+
|
|
1269
|
+
return f"""Find {limit} Reddit threads where you can add genuine value as someone with expertise in {project.get('name', 'general')}.
|
|
1270
|
+
|
|
1271
|
+
Topic area: {project_json}
|
|
1272
|
+
Content angle: {content_angle}
|
|
1273
|
+
|
|
1274
|
+
Your role: You are a knowledgeable observer of this space. Your comments should be indistinguishable
|
|
1275
|
+
from any other knowledgeable person in the conversation. NEVER mention product names, links, websites,
|
|
1276
|
+
or GitHub repos. Product mentions happen ONLY later in the reply pipeline when people respond to you.
|
|
1277
|
+
|
|
1278
|
+
CRITICAL: every comment picks ONE of two lanes (see the GROUNDING RULE below).
|
|
1279
|
+
LANE 1 - DISCLOSED STORY: open with a hedge ("hypothetically", "imagine someone running this",
|
|
1280
|
+
"scenario:", "say a friend tried") and you may then invent any specifics.
|
|
1281
|
+
LANE 2 - NO FABRICATION: every specific (numbers, durations, places, course names, headcount,
|
|
1282
|
+
named tools) must appear verbatim in this project's content_angle / voice / messaging in
|
|
1283
|
+
config.json. Otherwise drop the specific or pattern-frame ("the part that breaks down is...",
|
|
1284
|
+
"the typical failure mode is...").
|
|
1285
|
+
Never present an invented specific as a personal first-hand claim without a Lane 1 opener.
|
|
1286
|
+
VOICE RELATIONSHIP: see the dedicated section below; it governs whether you speak AS the maker or as an outside observer.
|
|
1287
|
+
{recent_ctx}{top_ctx}{top_topics_ctx}{dud_queries_ctx}
|
|
1288
|
+
{get_styles_prompt("reddit", context="posting", assignment=style_assignment)}
|
|
1289
|
+
|
|
1290
|
+
{get_voice_relationship_rule()}
|
|
1291
|
+
|
|
1292
|
+
## Tools (via Bash) - ALWAYS foreground, NEVER run_in_background
|
|
1293
|
+
- Search (global, by relevance): python3 {REDDIT_TOOLS} search "QUERY" --limit 15
|
|
1294
|
+
- Search (scoped to specific subs): python3 {REDDIT_TOOLS} search "QUERY" --subreddits AI_Agents,SaaS,smallbusiness --time month
|
|
1295
|
+
- Search (broader time range): python3 {REDDIT_TOOLS} search "QUERY" --time month
|
|
1296
|
+
- Fetch thread: python3 {REDDIT_TOOLS} fetch "THREAD_URL"
|
|
1297
|
+
- Check dedup: python3 {REDDIT_TOOLS} already-posted "THREAD_URL"
|
|
1298
|
+
|
|
1299
|
+
Search defaults to sort=relevance and time=week. Use --time month for broader results. Use --subreddits for targeted sub searches.
|
|
1300
|
+
|
|
1301
|
+
## Delta gating (new 2026-05-05)
|
|
1302
|
+
Each thread in the search JSON now carries delta fields populated from a
|
|
1303
|
+
persistent reddit_thread_snapshots table:
|
|
1304
|
+
- sightings: how many search cycles have surfaced this exact thread
|
|
1305
|
+
- delta_score: upvote change since first_seen_at
|
|
1306
|
+
- delta_comments: comment change since first_seen_at
|
|
1307
|
+
- delta_window_min: minutes between first_seen_at and now
|
|
1308
|
+
- first_seen_at: when we first saw this thread
|
|
1309
|
+
|
|
1310
|
+
Use these to PREFER threads that are still picking up momentum since we last
|
|
1311
|
+
saw them (positive delta_score with recent activity) over stale threads that
|
|
1312
|
+
peaked hours ago. A thread with sightings>=2 and delta_score<=0 over 60+ min
|
|
1313
|
+
is going cold; skip it for a fresher candidate.
|
|
1314
|
+
|
|
1315
|
+
## CRITICAL Bash rules
|
|
1316
|
+
- NEVER use run_in_background=true. All bash commands must run foreground and return quickly (under 20s each).
|
|
1317
|
+
- NEVER use `sleep` commands. NEVER run `sleep N && cat ...` to wait for background tasks.
|
|
1318
|
+
- NEVER pipe multiple searches with `&` or `&&`. Run ONE search command at a time, wait for output, then decide next step.
|
|
1319
|
+
- If you see `{{"error": "rate_limited", ...}}` in the output, DO NOT retry that command. Skip it and move on.
|
|
1320
|
+
Rate limits are global. Waiting won't help this session. Use whatever search results you already have.
|
|
1321
|
+
- If you can't find enough threads after 5 search attempts total, draft fewer posts (even 1-2 is fine) rather than searching more.
|
|
1322
|
+
|
|
1323
|
+
## CRITICAL CONTENT RULES
|
|
1324
|
+
- Study the style performance data in the feedback report below. Pick styles with the highest avg upvotes.
|
|
1325
|
+
- Go BIMODAL on length: either 1 punchy sentence (<100 chars) or 4-5 sentences of real substance. AVOID the 2-3 sentence middle.
|
|
1326
|
+
- GROUNDING has TWO valid forms. Lane 1: open with a disclosure phrase ("hypothetically", "imagine someone running this", "scenario:") and then invent freely. Lane 2: every specific (numbers/places/programs) must be grounded in content_angle/voice/messaging in config.json, or drop the specific and pattern-frame ("the part that breaks down is...", "the typical failure mode is..."). Never present an invented specific as a personal first-hand claim without a Lane 1 opener.
|
|
1327
|
+
- VOICE: see the VOICE RELATIONSHIP section below; it governs whether you speak AS the maker or as an outside observer based on the matched project's voice_relationship field.
|
|
1328
|
+
- NEVER mention product names (fazm, assrt, pieline, cyrano, terminator, mk0r, s4l).
|
|
1329
|
+
- NEVER include URLs or links.
|
|
1330
|
+
- Prefer replying to OP (top-level reply).
|
|
1331
|
+
- ONE comment per thread.
|
|
1332
|
+
- Statements beat questions. Be authoritative, not inquisitive.
|
|
1333
|
+
|
|
1334
|
+
## Steps
|
|
1335
|
+
1. Pick 2 concepts from the project's search_topics list: {json.dumps(topics_list)}.
|
|
1336
|
+
These are shared concept seeds across platforms (Twitter, Reddit, GitHub, LinkedIn). Some
|
|
1337
|
+
phrases are tuned for other platforms — rephrase each into natural Reddit search terms
|
|
1338
|
+
(vernacular, problem-framing, pain points) before running the search. Skip already_posted=true threads.
|
|
1339
|
+
2. Pick {limit} best threads where you have genuine expertise to contribute. Prefer replying to OP. Fetch each one.
|
|
1340
|
+
3. Draft the comment following the CRITICAL CONTENT RULES above. Quality over quantity.
|
|
1341
|
+
4. Output each as a JSON object, then DONE. Include the seed concept you used in "search_topic".
|
|
1342
|
+
|
|
1343
|
+
## Content rules
|
|
1344
|
+
{get_content_rules("reddit")}
|
|
1345
|
+
|
|
1346
|
+
## CRITICAL OUTPUT FORMAT
|
|
1347
|
+
You MUST output each draft as a raw JSON object on its own line. No commentary before or after. Example:
|
|
1348
|
+
|
|
1349
|
+
{{"action": "post", "thread_url": "https://old.reddit.com/r/sub/comments/abc/title/", "reply_to_url": null, "text": "your comment here", "thread_author": "username", "thread_title": "thread title", "engagement_style": "critic", "search_topic": "the seed concept you picked", "new_style": null}}
|
|
1350
|
+
|
|
1351
|
+
If, and ONLY if, none of the listed styles fits, you may invent one. Set "engagement_style" to your snake_case name AND replace `"new_style": null` with `{{"description": "...", "example": "...", "note": "...", "why_existing_didnt_fit": "..."}}`. Inventing should be rare; prefer an existing style if it's even 80% right.
|
|
1352
|
+
|
|
1353
|
+
After all {limit} JSON objects, output DONE on its own line.
|
|
1354
|
+
Do NOT describe what you are doing. Do NOT narrate. Just search, draft, output JSON, DONE.
|
|
1355
|
+
"""
|
|
1356
|
+
|
|
1357
|
+
|
|
1358
|
+
def run_claude(prompt, timeout=600):
|
|
1359
|
+
"""Run claude -p in bare mode with Bash tool only (no MCP needed).
|
|
1360
|
+
|
|
1361
|
+
Streams output in real time to stderr (picked up by tee in the shell wrapper)
|
|
1362
|
+
while collecting the full output for JSON parsing.
|
|
1363
|
+
"""
|
|
1364
|
+
import time as _time
|
|
1365
|
+
usage = {"input_tokens": 0, "output_tokens": 0, "cache_read": 0, "cache_create": 0, "cost_usd": 0.0}
|
|
1366
|
+
session_id = str(uuid.uuid4())
|
|
1367
|
+
usage["session_id"] = session_id
|
|
1368
|
+
# Set in this process's env so subsequent log_post → reddit_tools.py inherits it
|
|
1369
|
+
os.environ["CLAUDE_SESSION_ID"] = session_id
|
|
1370
|
+
cmd = ["claude", "-p", "--session-id", session_id, "--output-format", "stream-json", "--verbose"]
|
|
1371
|
+
cmd += ["--tools", "Bash,Read"]
|
|
1372
|
+
env = os.environ.copy()
|
|
1373
|
+
env.pop("ANTHROPIC_API_KEY", None) # ensure claude uses OAuth, not API key
|
|
1374
|
+
try:
|
|
1375
|
+
proc = subprocess.Popen(
|
|
1376
|
+
cmd, env=env, stdin=subprocess.PIPE,
|
|
1377
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
|
|
1378
|
+
)
|
|
1379
|
+
proc.stdin.write(prompt)
|
|
1380
|
+
proc.stdin.close()
|
|
1381
|
+
collected = []
|
|
1382
|
+
deadline = _time.time() + timeout
|
|
1383
|
+
import select
|
|
1384
|
+
while True:
|
|
1385
|
+
remaining = deadline - _time.time()
|
|
1386
|
+
if remaining <= 0:
|
|
1387
|
+
proc.kill()
|
|
1388
|
+
return False, "TIMEOUT", usage
|
|
1389
|
+
ready, _, _ = select.select([proc.stdout], [], [], min(remaining, 30))
|
|
1390
|
+
if ready:
|
|
1391
|
+
line = proc.stdout.readline()
|
|
1392
|
+
if not line:
|
|
1393
|
+
break
|
|
1394
|
+
collected.append(line)
|
|
1395
|
+
# Stream meaningful events to stderr so tee/log captures them
|
|
1396
|
+
try:
|
|
1397
|
+
evt = json.loads(line.strip())
|
|
1398
|
+
etype = evt.get("type", "")
|
|
1399
|
+
if etype == "assistant":
|
|
1400
|
+
msg = evt.get("message", {})
|
|
1401
|
+
for block in msg.get("content", []):
|
|
1402
|
+
if block.get("type") == "tool_use":
|
|
1403
|
+
print(f"[post_reddit] tool: {block.get('name','')} | {str(block.get('input',{}).get('command',''))[:120]}", file=sys.stderr, flush=True)
|
|
1404
|
+
elif block.get("type") == "text" and block.get("text","").strip():
|
|
1405
|
+
txt = block["text"].strip()[:200]
|
|
1406
|
+
print(f"[post_reddit] {txt}", file=sys.stderr, flush=True)
|
|
1407
|
+
elif etype == "user":
|
|
1408
|
+
# Tool results land in user messages. reddit_tools.py
|
|
1409
|
+
# search emits a `[reddit_search] q=... raw=N returned=R`
|
|
1410
|
+
# line on its own stderr, which Claude Code's Bash tool
|
|
1411
|
+
# bundles into the tool_result content. Forward those
|
|
1412
|
+
# markers into our log so enrichPostCommentsRedditRuns
|
|
1413
|
+
# can derive raw/passed pills per run.
|
|
1414
|
+
msg = evt.get("message", {})
|
|
1415
|
+
for block in msg.get("content", []):
|
|
1416
|
+
if block.get("type") != "tool_result":
|
|
1417
|
+
continue
|
|
1418
|
+
content = block.get("content", "")
|
|
1419
|
+
if isinstance(content, list):
|
|
1420
|
+
content = "".join(c.get("text","") for c in content if isinstance(c, dict))
|
|
1421
|
+
for ln in str(content).splitlines():
|
|
1422
|
+
if ln.startswith("[reddit_search]"):
|
|
1423
|
+
print(ln, file=sys.stderr, flush=True)
|
|
1424
|
+
elif etype == "result":
|
|
1425
|
+
print(f"[post_reddit] done: cost=${evt.get('total_cost_usd',0):.4f}", file=sys.stderr, flush=True)
|
|
1426
|
+
except (json.JSONDecodeError, TypeError):
|
|
1427
|
+
print(f"[post_reddit] {line.rstrip()[:200]}", file=sys.stderr, flush=True)
|
|
1428
|
+
elif proc.poll() is not None:
|
|
1429
|
+
# Process ended, read remaining
|
|
1430
|
+
rest = proc.stdout.read()
|
|
1431
|
+
if rest:
|
|
1432
|
+
collected.append(rest)
|
|
1433
|
+
break
|
|
1434
|
+
else:
|
|
1435
|
+
print(f"[post_reddit] ... still running ({int(_time.time() - (deadline - timeout))}s)", file=sys.stderr, flush=True)
|
|
1436
|
+
proc.wait()
|
|
1437
|
+
# Parse stream-json: collect ALL text blocks (not just the final result)
|
|
1438
|
+
# JSON post decisions can appear in any assistant message, not just the last one
|
|
1439
|
+
all_text_parts = []
|
|
1440
|
+
for line_str in collected:
|
|
1441
|
+
line_str = line_str.strip()
|
|
1442
|
+
if not line_str:
|
|
1443
|
+
continue
|
|
1444
|
+
try:
|
|
1445
|
+
event = json.loads(line_str)
|
|
1446
|
+
etype = event.get("type", "")
|
|
1447
|
+
if etype == "assistant":
|
|
1448
|
+
for block in event.get("message", {}).get("content", []):
|
|
1449
|
+
if block.get("type") == "text":
|
|
1450
|
+
all_text_parts.append(block["text"])
|
|
1451
|
+
elif etype == "result":
|
|
1452
|
+
if event.get("result"):
|
|
1453
|
+
all_text_parts.append(event["result"])
|
|
1454
|
+
usage["cost_usd"] = event.get("total_cost_usd", 0.0)
|
|
1455
|
+
u = event.get("usage", {})
|
|
1456
|
+
usage["input_tokens"] = u.get("input_tokens", 0)
|
|
1457
|
+
usage["output_tokens"] = u.get("output_tokens", 0)
|
|
1458
|
+
usage["cache_read"] = u.get("cache_read_input_tokens", 0)
|
|
1459
|
+
usage["cache_create"] = u.get("cache_creation_input_tokens", 0)
|
|
1460
|
+
except (json.JSONDecodeError, TypeError):
|
|
1461
|
+
pass
|
|
1462
|
+
text_output = "\n".join(all_text_parts) if all_text_parts else "".join(collected)
|
|
1463
|
+
stderr_out = proc.stderr.read() if proc.stderr else ""
|
|
1464
|
+
try:
|
|
1465
|
+
log_args = [PYTHON, os.path.join(REPO_DIR, "scripts", "log_claude_session.py"),
|
|
1466
|
+
"--session-id", session_id, "--script", "post_reddit"]
|
|
1467
|
+
orch_cost = usage.get("cost_usd")
|
|
1468
|
+
if isinstance(orch_cost, (int, float)) and orch_cost > 0:
|
|
1469
|
+
log_args.extend(["--orchestrator-cost-usd", str(orch_cost)])
|
|
1470
|
+
subprocess.run(log_args, capture_output=True, text=True, timeout=30)
|
|
1471
|
+
except Exception as e:
|
|
1472
|
+
print(f"[post_reddit] WARNING: log_claude_session failed: {e}", file=sys.stderr)
|
|
1473
|
+
return proc.returncode == 0, text_output + stderr_out, usage
|
|
1474
|
+
except Exception as e:
|
|
1475
|
+
return False, str(e), usage
|
|
1476
|
+
|
|
1477
|
+
|
|
1478
|
+
def _acquire_browser_lease(timeout: int = 600, ttl: int = 90):
|
|
1479
|
+
"""Acquire the reddit-browser lease for THIS row's CDP work.
|
|
1480
|
+
|
|
1481
|
+
Per-post acquire (not per-cycle, per-phase) is the load-bearing migration
|
|
1482
|
+
shipped 2026-05-13. Before this change, run-reddit-search.sh held the
|
|
1483
|
+
lease around the entire `--phase post` invocation, so a 10-row salvage
|
|
1484
|
+
batch monopolised the browser for ~30 min (10 × ~45s post + 9 × 180s
|
|
1485
|
+
between-post sleep) while peers (link-edit-reddit, dm-outreach-reddit,
|
|
1486
|
+
engage-reddit, engage-dm-replies-reddit) sat blocked. Pushing acquire/
|
|
1487
|
+
release down to per-row means lease is only held during the actual CDP
|
|
1488
|
+
posting work (~45s incl. retries), and the 3-min between-post sleeps
|
|
1489
|
+
happen unlocked.
|
|
1490
|
+
|
|
1491
|
+
The MCP wrapper's auto-heartbeat (PreToolUse/PostToolUse hooks bumping
|
|
1492
|
+
`expires_at`) keeps the lease alive during real browser activity, so no
|
|
1493
|
+
manual heartbeat is needed here. Default TTL of 90s leaves enough headroom
|
|
1494
|
+
for post_via_cdp's 5-attempt retry loop with internal sleeps.
|
|
1495
|
+
|
|
1496
|
+
Returns (ok: bool, msg: str). msg is the helper's last stdout line on
|
|
1497
|
+
success, or BUSY/ERROR diagnostic on failure.
|
|
1498
|
+
"""
|
|
1499
|
+
try:
|
|
1500
|
+
r = subprocess.run(
|
|
1501
|
+
[PYTHON, REDDIT_BROWSER_LOCK, "acquire",
|
|
1502
|
+
"--timeout", str(timeout), "--ttl", str(ttl)],
|
|
1503
|
+
capture_output=True, text=True, timeout=timeout + 30,
|
|
1504
|
+
)
|
|
1505
|
+
out_lines = [ln for ln in (r.stdout or "").strip().splitlines() if ln]
|
|
1506
|
+
last = out_lines[-1] if out_lines else ""
|
|
1507
|
+
if r.returncode == 0 and last.startswith("OK"):
|
|
1508
|
+
return True, last
|
|
1509
|
+
return False, last or (r.stderr or "").strip()[:200] or f"rc={r.returncode}"
|
|
1510
|
+
except subprocess.TimeoutExpired:
|
|
1511
|
+
return False, "subprocess_timeout"
|
|
1512
|
+
except Exception as e:
|
|
1513
|
+
return False, f"exception:{e}"
|
|
1514
|
+
|
|
1515
|
+
|
|
1516
|
+
def _release_browser_lease() -> None:
|
|
1517
|
+
"""Release the reddit-browser lease. Idempotent (NOT_HELD is fine).
|
|
1518
|
+
|
|
1519
|
+
Always called in a `finally` so peers can acquire during the 3-min
|
|
1520
|
+
between-post sleep even if post_via_cdp raised. The lease auto-decays
|
|
1521
|
+
after 90s of idleness anyway (no MCP heartbeats while we're sleeping),
|
|
1522
|
+
but explicit release frees peers immediately.
|
|
1523
|
+
"""
|
|
1524
|
+
try:
|
|
1525
|
+
subprocess.run(
|
|
1526
|
+
[PYTHON, REDDIT_BROWSER_LOCK, "release"],
|
|
1527
|
+
capture_output=True, text=True, timeout=10,
|
|
1528
|
+
)
|
|
1529
|
+
except Exception:
|
|
1530
|
+
pass
|
|
1531
|
+
|
|
1532
|
+
|
|
1533
|
+
def post_via_cdp(thread_url, reply_to_url, text):
|
|
1534
|
+
"""Post a comment or reply via CDP. Returns parsed JSON result."""
|
|
1535
|
+
# 5 attempts with lock-aware backoff. Lock contention (engage.sh or other
|
|
1536
|
+
# reddit-agent sessions mid-work) gets longer waits since those sessions
|
|
1537
|
+
# have natural gaps every 20-60s between replies. Other errors use a short
|
|
1538
|
+
# retry in case of transient network issues.
|
|
1539
|
+
MAX_ATTEMPTS = 5
|
|
1540
|
+
for attempt in range(MAX_ATTEMPTS):
|
|
1541
|
+
try:
|
|
1542
|
+
target = reply_to_url or thread_url
|
|
1543
|
+
cmd = [PYTHON, REDDIT_BROWSER, "reply" if reply_to_url else "post-comment", target, text]
|
|
1544
|
+
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
1545
|
+
cdp_out = proc.stdout.strip()
|
|
1546
|
+
if not cdp_out:
|
|
1547
|
+
# Full stderr (was [:200] until 2026-05-14; truncation hid the
|
|
1548
|
+
# actual exception class/message, leaving cdp_no_response
|
|
1549
|
+
# failures undiagnosable in postmortems).
|
|
1550
|
+
_stderr_full = proc.stderr or ""
|
|
1551
|
+
print(f"[post_reddit] CDP attempt {attempt + 1}: no stdout. stderr:\n{_stderr_full}")
|
|
1552
|
+
if attempt < MAX_ATTEMPTS - 1:
|
|
1553
|
+
time.sleep(10)
|
|
1554
|
+
continue
|
|
1555
|
+
result = json.loads(cdp_out)
|
|
1556
|
+
if result.get("ok"):
|
|
1557
|
+
return result
|
|
1558
|
+
err = result.get("error", "unknown")
|
|
1559
|
+
print(f"[post_reddit] CDP attempt {attempt + 1}: {err}")
|
|
1560
|
+
if err in ("thread_not_found", "thread_locked", "thread_archived", "already_replied", "not_logged_in", "account_blocked_in_sub"):
|
|
1561
|
+
return result # Don't retry these
|
|
1562
|
+
# Lock contention: another reddit-agent session is actively working.
|
|
1563
|
+
# Back off in increasing intervals to catch a natural gap between
|
|
1564
|
+
# their reply drafts. Total wait across 5 attempts: ~2.5 min.
|
|
1565
|
+
if "locked by session" in err.lower():
|
|
1566
|
+
if attempt < MAX_ATTEMPTS - 1:
|
|
1567
|
+
wait = [20, 35, 50, 60][attempt]
|
|
1568
|
+
print(f"[post_reddit] CDP waiting {wait}s for browser lock to free...")
|
|
1569
|
+
time.sleep(wait)
|
|
1570
|
+
continue
|
|
1571
|
+
# Any other error: short sleep then retry
|
|
1572
|
+
if attempt < MAX_ATTEMPTS - 1:
|
|
1573
|
+
time.sleep(5)
|
|
1574
|
+
except (subprocess.TimeoutExpired, subprocess.CalledProcessError, json.JSONDecodeError) as e:
|
|
1575
|
+
print(f"[post_reddit] CDP attempt {attempt + 1} exception: {e}")
|
|
1576
|
+
if attempt < MAX_ATTEMPTS - 1:
|
|
1577
|
+
time.sleep(10)
|
|
1578
|
+
return {"ok": False, "error": "all_attempts_failed"}
|
|
1579
|
+
|
|
1580
|
+
|
|
1581
|
+
def log_post(thread_url, permalink, text, project_name, thread_author, thread_title, reddit_username, engagement_style=None, search_topic=None, generation_trace_path=None, link_source=None):
|
|
1582
|
+
"""Log a successful post to the database. Returns the new post_id, or None.
|
|
1583
|
+
|
|
1584
|
+
generation_trace_path (2026-05-12): optional path to a JSON file with
|
|
1585
|
+
the few-shot context Claude saw before drafting (top_performers
|
|
1586
|
+
report, recent comments, top_search_topics). Forwarded to
|
|
1587
|
+
reddit_tools.py as --generation-trace and stored in
|
|
1588
|
+
posts.generation_trace JSONB. File-based (not inline) to keep argv
|
|
1589
|
+
short. Same trace blob is reused for every post produced from this
|
|
1590
|
+
Claude draft, since they all share the same few-shot context.
|
|
1591
|
+
|
|
1592
|
+
link_source (2026-05-17): optional string written to posts.link_source so
|
|
1593
|
+
the dashboard can break out audience-page traffic (e.g.
|
|
1594
|
+
'audience_page:founder-ghostwriting') from generic homepage links. Set by
|
|
1595
|
+
the post loop after URL wrapping based on which curated landing page
|
|
1596
|
+
(if any) Claude baked into the reply text.
|
|
1597
|
+
"""
|
|
1598
|
+
try:
|
|
1599
|
+
cmd = [PYTHON, REDDIT_TOOLS, "log-post",
|
|
1600
|
+
thread_url, permalink or "", text, project_name,
|
|
1601
|
+
thread_author, thread_title,
|
|
1602
|
+
"--account", reddit_username]
|
|
1603
|
+
if engagement_style:
|
|
1604
|
+
cmd.extend(["--engagement-style", engagement_style])
|
|
1605
|
+
if search_topic:
|
|
1606
|
+
cmd.extend(["--search-topic", search_topic])
|
|
1607
|
+
if generation_trace_path:
|
|
1608
|
+
cmd.extend(["--generation-trace", generation_trace_path])
|
|
1609
|
+
if link_source:
|
|
1610
|
+
cmd.extend(["--link-source", link_source])
|
|
1611
|
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
|
|
1612
|
+
try:
|
|
1613
|
+
payload = json.loads((result.stdout or "").strip())
|
|
1614
|
+
return payload.get("post_id")
|
|
1615
|
+
except (json.JSONDecodeError, AttributeError, TypeError):
|
|
1616
|
+
return None
|
|
1617
|
+
except Exception as e:
|
|
1618
|
+
print(f"[post_reddit] WARNING: log-post failed: {e}")
|
|
1619
|
+
return None
|
|
1620
|
+
|
|
1621
|
+
|
|
1622
|
+
def bump_campaigns(table, row_id, campaign_ids):
|
|
1623
|
+
"""Attach a row in {posts,replies,dm_messages} to its applied campaigns."""
|
|
1624
|
+
if not row_id or not campaign_ids:
|
|
1625
|
+
return
|
|
1626
|
+
bump = os.path.join(REPO_DIR, "scripts", "campaign_bump.py")
|
|
1627
|
+
for cid in campaign_ids:
|
|
1628
|
+
try:
|
|
1629
|
+
subprocess.run(
|
|
1630
|
+
[PYTHON, bump,
|
|
1631
|
+
"--table", table, "--id", str(row_id), "--campaign-id", str(cid)],
|
|
1632
|
+
capture_output=True, text=True, timeout=15,
|
|
1633
|
+
)
|
|
1634
|
+
except Exception as e:
|
|
1635
|
+
print(f"[post_reddit] WARNING: campaign_bump failed (id={row_id} c={cid}): {e}")
|
|
1636
|
+
|
|
1637
|
+
|
|
1638
|
+
def parse_post_decisions(output):
|
|
1639
|
+
"""Extract JSON post decisions from Claude's output, deduplicated by thread_url."""
|
|
1640
|
+
decisions = []
|
|
1641
|
+
seen_urls = set()
|
|
1642
|
+
for match in re.finditer(r'\{[^{}]*?"action"\s*:\s*"post"[^{}]*?\}', output):
|
|
1643
|
+
try:
|
|
1644
|
+
decision = json.loads(match.group())
|
|
1645
|
+
url = decision.get("thread_url", "")
|
|
1646
|
+
if decision.get("text") and url and url not in seen_urls:
|
|
1647
|
+
decisions.append(decision)
|
|
1648
|
+
seen_urls.add(url)
|
|
1649
|
+
except (json.JSONDecodeError, TypeError):
|
|
1650
|
+
continue
|
|
1651
|
+
return decisions
|
|
1652
|
+
|
|
1653
|
+
|
|
1654
|
+
def parse_reject_decisions(output):
|
|
1655
|
+
"""Extract action='reject' JSON lines from the draft prompt (2026-05-11).
|
|
1656
|
+
|
|
1657
|
+
Reject lines may carry a `proposed_excludes` array of typed exclude terms
|
|
1658
|
+
(`subreddit:<slug>` or `keyword:<word>`). These get fed to
|
|
1659
|
+
project_excludes.propose() so the 2-batch activation gate accumulates
|
|
1660
|
+
them without auto-trusting a single false rejection. The "thread itself
|
|
1661
|
+
is bad" reasons (no proposed_excludes) are still parsed for audit but
|
|
1662
|
+
have no side effect on the denylist.
|
|
1663
|
+
|
|
1664
|
+
Multiline-safe regex (the `proposed_excludes` array may contain commas
|
|
1665
|
+
and span lines if Claude pretty-prints). Each JSON parse failure is
|
|
1666
|
+
silently dropped — the JSON shape stamp `"action":"reject"` is the only
|
|
1667
|
+
discriminator, so reject lines that don't parse are simply ignored.
|
|
1668
|
+
"""
|
|
1669
|
+
rejects = []
|
|
1670
|
+
seen_urls = set()
|
|
1671
|
+
for match in re.finditer(
|
|
1672
|
+
r'\{[^{}]*?"action"\s*:\s*"reject"[^{}]*?\}',
|
|
1673
|
+
output, flags=re.DOTALL,
|
|
1674
|
+
):
|
|
1675
|
+
try:
|
|
1676
|
+
r = json.loads(match.group())
|
|
1677
|
+
url = r.get("thread_url", "")
|
|
1678
|
+
if not url or url in seen_urls:
|
|
1679
|
+
continue
|
|
1680
|
+
rejects.append(r)
|
|
1681
|
+
seen_urls.add(url)
|
|
1682
|
+
except (json.JSONDecodeError, TypeError):
|
|
1683
|
+
continue
|
|
1684
|
+
return rejects
|
|
1685
|
+
|
|
1686
|
+
|
|
1687
|
+
def _propose_excludes_from_rejects(rejects, project_name, batch_id, candidates_by_url):
|
|
1688
|
+
"""Forward Claude-proposed excludes into project_search_excludes (reddit).
|
|
1689
|
+
|
|
1690
|
+
Mirrors the twitter cycle's behavior at run-twitter-cycle.sh:929-966:
|
|
1691
|
+
each proposed term is normalize/validated by project_excludes.propose()
|
|
1692
|
+
against the platform's allowed kinds and the project's reserved-keyword
|
|
1693
|
+
list. The activation gate (>=2 distinct batch_ids) is enforced inside
|
|
1694
|
+
propose(); a single false-rejection in this cycle cannot mute a sub.
|
|
1695
|
+
|
|
1696
|
+
Best-effort: import / DB failures are logged once and the post pipeline
|
|
1697
|
+
continues. The propose() side effect is not on the critical path for
|
|
1698
|
+
posting; if it dies, the only consequence is that we don't accumulate
|
|
1699
|
+
new exclude proposals this cycle.
|
|
1700
|
+
|
|
1701
|
+
Returns a dict with counters for logging.
|
|
1702
|
+
"""
|
|
1703
|
+
if not rejects or not project_name:
|
|
1704
|
+
return {"rejects_seen": len(rejects or []), "proposed": 0,
|
|
1705
|
+
"inserted": 0, "bumped": 0, "rejected": 0, "active_now": 0}
|
|
1706
|
+
counters = {"rejects_seen": len(rejects), "proposed": 0,
|
|
1707
|
+
"inserted": 0, "bumped": 0, "rejected": 0, "active_now": 0}
|
|
1708
|
+
try:
|
|
1709
|
+
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
|
1710
|
+
if scripts_dir not in sys.path:
|
|
1711
|
+
sys.path.insert(0, scripts_dir)
|
|
1712
|
+
import project_excludes as pe
|
|
1713
|
+
except Exception as e:
|
|
1714
|
+
print(f"[post_reddit] WARN: project_excludes import failed: {e}",
|
|
1715
|
+
file=sys.stderr, flush=True)
|
|
1716
|
+
return counters
|
|
1717
|
+
|
|
1718
|
+
for r in rejects:
|
|
1719
|
+
url = r.get("thread_url") or ""
|
|
1720
|
+
terms = r.get("proposed_excludes") or []
|
|
1721
|
+
if not isinstance(terms, list):
|
|
1722
|
+
continue
|
|
1723
|
+
reason = (r.get("reason") or "")[:500]
|
|
1724
|
+
cand = candidates_by_url.get(url) or {}
|
|
1725
|
+
# candidate_id is the reddit_candidates.id for audit purposes; falls
|
|
1726
|
+
# back to None when the candidate object doesn't carry it through.
|
|
1727
|
+
cand_id = cand.get("id") or cand.get("candidate_id")
|
|
1728
|
+
for t in terms[:5]: # cap so a runaway prompt can't spam the table
|
|
1729
|
+
counters["proposed"] += 1
|
|
1730
|
+
try:
|
|
1731
|
+
out = pe.propose(
|
|
1732
|
+
"reddit", project_name, t,
|
|
1733
|
+
candidate_id=cand_id,
|
|
1734
|
+
batch_id=batch_id,
|
|
1735
|
+
reason=reason or None,
|
|
1736
|
+
)
|
|
1737
|
+
except Exception as e:
|
|
1738
|
+
print(f"[post_reddit] WARN: propose failed term={t!r}: {e}",
|
|
1739
|
+
file=sys.stderr, flush=True)
|
|
1740
|
+
counters["rejected"] += 1
|
|
1741
|
+
continue
|
|
1742
|
+
action = out.get("action") or ""
|
|
1743
|
+
if not out.get("ok"):
|
|
1744
|
+
counters["rejected"] += 1
|
|
1745
|
+
elif action == "inserted":
|
|
1746
|
+
counters["inserted"] += 1
|
|
1747
|
+
elif action in ("bumped", "duplicate_batch"):
|
|
1748
|
+
counters["bumped"] += 1
|
|
1749
|
+
if out.get("active"):
|
|
1750
|
+
counters["active_now"] += 1
|
|
1751
|
+
return counters
|
|
1752
|
+
|
|
1753
|
+
|
|
1754
|
+
# Stopwords stripped before computing query<->thread topical overlap. Kept small
|
|
1755
|
+
# and generic: these are the high-frequency English glue words that cause the
|
|
1756
|
+
# Reddit `sort=relevance` leak (a chatty natural-language query like "claude
|
|
1757
|
+
# artifacts built me a little tool to track my habits" matches an unrelated BORU
|
|
1758
|
+
# thread purely on shared words like "me", "to", "my", "a", "little"). We do NOT
|
|
1759
|
+
# strip domain words here — only structural filler — so the surviving overlap is
|
|
1760
|
+
# a real topical signal.
|
|
1761
|
+
_OVERLAP_STOPWORDS = frozenset("""
|
|
1762
|
+
a an the and or but if then else of to in on at for with without from by about into
|
|
1763
|
+
over under again further is are was were be been being am do does did doing have has
|
|
1764
|
+
had having i me my myself we our ours you your yours he him his she her it its they
|
|
1765
|
+
them their this that these those what which who whom whose how when where why all any
|
|
1766
|
+
both each few more most other some such no nor not only own same so than too very can
|
|
1767
|
+
will just dont don't should now get got make made want need like really actually
|
|
1768
|
+
something someone anyone everyone thing things stuff lot lots little bit kind sort
|
|
1769
|
+
""".split())
|
|
1770
|
+
|
|
1771
|
+
# Token must be >=3 chars to count toward overlap (drops "ai" etc.? no — keep 2+
|
|
1772
|
+
# but exclude pure stopwords). We use 2 to keep short domain tokens like "db", "os".
|
|
1773
|
+
_OVERLAP_MIN_LEN = 2
|
|
1774
|
+
|
|
1775
|
+
|
|
1776
|
+
def _overlap_tokens(text):
|
|
1777
|
+
"""Lowercase alphanumeric tokens of length >= _OVERLAP_MIN_LEN, minus stopwords."""
|
|
1778
|
+
if not text:
|
|
1779
|
+
return set()
|
|
1780
|
+
toks = re.findall(r"[a-z0-9]+", text.lower())
|
|
1781
|
+
return {t for t in toks if len(t) >= _OVERLAP_MIN_LEN and t not in _OVERLAP_STOPWORDS}
|
|
1782
|
+
|
|
1783
|
+
|
|
1784
|
+
def _topical_overlap(query, title, selftext):
|
|
1785
|
+
"""Fraction of distinct content tokens in `query` that also appear in the
|
|
1786
|
+
thread's title+selftext. 0.0 = no shared topical token (likely relevance-sort
|
|
1787
|
+
garbage), 1.0 = every query content word is present in the thread.
|
|
1788
|
+
|
|
1789
|
+
This is a *soft signal* used only to rank/prioritize candidates, never to hard-
|
|
1790
|
+
drop them — per the conservative directive, we isolate + surface the garbage
|
|
1791
|
+
rather than silently filtering it.
|
|
1792
|
+
"""
|
|
1793
|
+
q = _overlap_tokens(query)
|
|
1794
|
+
if not q:
|
|
1795
|
+
return 0.0
|
|
1796
|
+
body = _overlap_tokens((title or "") + " " + (selftext or ""))
|
|
1797
|
+
if not body:
|
|
1798
|
+
return 0.0
|
|
1799
|
+
return len(q & body) / len(q)
|
|
1800
|
+
|
|
1801
|
+
|
|
1802
|
+
def _discover_iteration(args, config, reddit_username, already_picked):
|
|
1803
|
+
"""DISCOVER phase: search and select threads. No drafting.
|
|
1804
|
+
|
|
1805
|
+
Returns {project_name, decisions: [candidates], cost, session_id} where
|
|
1806
|
+
each candidate has thread_url, title, author, search_topic but NO text
|
|
1807
|
+
field (drafting happens in the draft phase). cost is always 0.0 and
|
|
1808
|
+
session_id None: as of 2026-06-01 discover is fully programmatic (Python
|
|
1809
|
+
builds the query bank and runs reddit_tools.cmd_search directly; no Claude
|
|
1810
|
+
session). Uses `decisions` key for downstream-phase compatibility.
|
|
1811
|
+
"""
|
|
1812
|
+
if args.project:
|
|
1813
|
+
project = None
|
|
1814
|
+
for p in config.get("projects", []):
|
|
1815
|
+
if p["name"].lower() == args.project.lower():
|
|
1816
|
+
project = p
|
|
1817
|
+
break
|
|
1818
|
+
if not project:
|
|
1819
|
+
print(f"[post_reddit] ERROR: project '{args.project}' not found")
|
|
1820
|
+
return None
|
|
1821
|
+
else:
|
|
1822
|
+
project = pick_project("reddit", exclude=already_picked)
|
|
1823
|
+
if not project:
|
|
1824
|
+
print(f"[post_reddit] No eligible project left (already picked: {already_picked})")
|
|
1825
|
+
return None
|
|
1826
|
+
|
|
1827
|
+
project_name = project.get("name", "general")
|
|
1828
|
+
print(f"[post_reddit] Project: {project_name}")
|
|
1829
|
+
|
|
1830
|
+
# 2026-05-11: surface the per-project sub denylist for visibility in run
|
|
1831
|
+
# logs (twitter cycle does the equivalent at run-twitter-cycle.sh:410).
|
|
1832
|
+
# The actual *enforcement* happens server-side in reddit_tools._load_
|
|
1833
|
+
# comment_blocked_subs via the S4L_REDDIT_PROJECT env var set below.
|
|
1834
|
+
# mark_used stamps last_used_at on every active term so decay (60d
|
|
1835
|
+
# unused → prune) only fires on terms that truly stopped contributing.
|
|
1836
|
+
try:
|
|
1837
|
+
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
|
1838
|
+
if scripts_dir not in sys.path:
|
|
1839
|
+
sys.path.insert(0, scripts_dir)
|
|
1840
|
+
import project_excludes as _pe
|
|
1841
|
+
_split = _pe.active_excludes_by_kind("reddit", project_name)
|
|
1842
|
+
_active_subs = _split.get("subreddit") or []
|
|
1843
|
+
_active_kws = _split.get("keyword") or []
|
|
1844
|
+
if _active_subs or _active_kws:
|
|
1845
|
+
_sub_preview = ",".join(_active_subs[:8]) + ("..." if len(_active_subs) > 8 else "")
|
|
1846
|
+
_kw_preview = ",".join(_active_kws[:8]) + ("..." if len(_active_kws) > 8 else "")
|
|
1847
|
+
print(
|
|
1848
|
+
f"[project_excludes] platform=reddit project={project_name} "
|
|
1849
|
+
f"active_subs={len(_active_subs)} active_keywords={len(_active_kws)} "
|
|
1850
|
+
f"subs=[{_sub_preview}] keywords=[{_kw_preview}]"
|
|
1851
|
+
)
|
|
1852
|
+
# Stamp last_used_at so decay doesn't prune still-live terms.
|
|
1853
|
+
# mark_used wants the FULL typed-term form (subreddit:foo).
|
|
1854
|
+
_full_terms = (
|
|
1855
|
+
[f"subreddit:{s}" for s in _active_subs]
|
|
1856
|
+
+ [f"keyword:{k}" for k in _active_kws]
|
|
1857
|
+
)
|
|
1858
|
+
try:
|
|
1859
|
+
_pe.mark_used("reddit", project_name, _full_terms)
|
|
1860
|
+
except Exception as e:
|
|
1861
|
+
print(f"[project_excludes] WARN: mark_used failed: {e}", file=sys.stderr)
|
|
1862
|
+
except Exception as e:
|
|
1863
|
+
# Visibility-only path. Never fail discover because of it.
|
|
1864
|
+
print(f"[project_excludes] WARN: active-excludes log failed: {e}", file=sys.stderr)
|
|
1865
|
+
|
|
1866
|
+
# 2026-06-01: discover is now FULLY PROGRAMMATIC (no Claude session).
|
|
1867
|
+
# Previously discover burned an entire Claude session in OPAQUE mode just
|
|
1868
|
+
# to pick query phrasings and fire reddit_tools.py search calls whose
|
|
1869
|
+
# results Claude never even saw (the dump_dir harvest below is what
|
|
1870
|
+
# actually feeds candidates). Query selection + search execution are both
|
|
1871
|
+
# deterministic, so we now build the query bank in Python (mirroring the
|
|
1872
|
+
# Twitter cycle: scan = deterministic Python, Claude only enters at draft)
|
|
1873
|
+
# and run each search via reddit_tools.cmd_search directly. The picker
|
|
1874
|
+
# (engagement style) still fires once at the start of the draft phase —
|
|
1875
|
+
# the only Claude call that actually writes a comment.
|
|
1876
|
+
#
|
|
1877
|
+
# reddit_query_bank pulls proven query phrasings from
|
|
1878
|
+
# /api/v1/search-topics/ranked?platform=reddit (on Reddit the harvested
|
|
1879
|
+
# search_topic IS the raw query string) ranked clicks-first, then appends
|
|
1880
|
+
# config.json seeds for cold-start coverage, deduped by normalized core.
|
|
1881
|
+
import reddit_query_bank as _rqb
|
|
1882
|
+
max_searches = int(os.environ.get("S4L_REDDIT_MAX_SEARCHES", "6") or "6")
|
|
1883
|
+
bank = _rqb.build_bank(project_name, limit=max_searches)
|
|
1884
|
+
queries = [(b.get("query") or "").strip() for b in bank if (b.get("query") or "").strip()]
|
|
1885
|
+
n_proven = sum(1 for b in bank if b.get("source") == "proven")
|
|
1886
|
+
n_seed = len(bank) - n_proven
|
|
1887
|
+
print(f"[discover_bank] project={project_name} queries={len(queries)} "
|
|
1888
|
+
f"proven={n_proven} seed={n_seed} cap={max_searches} :: {queries}")
|
|
1889
|
+
|
|
1890
|
+
if args.dry_run:
|
|
1891
|
+
print(f"=== DRY RUN discover (project={project_name}) ===")
|
|
1892
|
+
for i, q in enumerate(queries, 1):
|
|
1893
|
+
print(f" {i}. {q}")
|
|
1894
|
+
print("=== END DRY RUN ===")
|
|
1895
|
+
return {"project_name": project_name, "decisions": [], "cost": 0.0, "dry_run": True}
|
|
1896
|
+
|
|
1897
|
+
if not queries:
|
|
1898
|
+
print(f"[post_reddit] discover: no queries for project={project_name} "
|
|
1899
|
+
f"(empty bank: no proven queries and no config seeds)")
|
|
1900
|
+
return {"project_name": project_name, "decisions": [], "cost": 0.0,
|
|
1901
|
+
"error": "no_queries"}
|
|
1902
|
+
|
|
1903
|
+
plan_batch_id = f"reddit-discover-{project_name}-{int(time.time())}-{uuid.uuid4().hex[:8]}"
|
|
1904
|
+
os.environ["S4L_REDDIT_PROJECT"] = project_name
|
|
1905
|
+
os.environ["S4L_REDDIT_BATCH_ID"] = plan_batch_id
|
|
1906
|
+
|
|
1907
|
+
# Opaque-results discover (post 2026-05-07 refactor): create a private
|
|
1908
|
+
# dump dir and tell reddit_tools.py via env var to write thread JSON
|
|
1909
|
+
# there instead of stdout. Claude only sees count summaries, never
|
|
1910
|
+
# individual threads, so it cannot pre-filter the way it did in the
|
|
1911
|
+
# 20:16:39 cycle (returned 0 of 39 expected). After Claude exits we
|
|
1912
|
+
# harvest every dumped file directly into the candidate plan.
|
|
1913
|
+
import tempfile as _tempfile
|
|
1914
|
+
import shutil as _shutil
|
|
1915
|
+
import glob as _glob
|
|
1916
|
+
dump_dir = _tempfile.mkdtemp(prefix=f"reddit-discover-{project_name}-")
|
|
1917
|
+
os.environ["S4L_REDDIT_DUMP_DIR"] = dump_dir
|
|
1918
|
+
|
|
1919
|
+
print(f"[post_reddit] Starting programmatic discover "
|
|
1920
|
+
f"(queries={len(queries)}, limit={args.limit}, dump_dir={dump_dir})")
|
|
1921
|
+
import reddit_tools as _rt
|
|
1922
|
+
import types as _types
|
|
1923
|
+
start = time.time()
|
|
1924
|
+
searches_ok = 0
|
|
1925
|
+
try:
|
|
1926
|
+
for q in queries:
|
|
1927
|
+
sargs = _types.SimpleNamespace(
|
|
1928
|
+
query=q,
|
|
1929
|
+
limit=int(args.limit or 25),
|
|
1930
|
+
sort="relevance",
|
|
1931
|
+
time="week",
|
|
1932
|
+
subreddits=None,
|
|
1933
|
+
)
|
|
1934
|
+
try:
|
|
1935
|
+
_rt.cmd_search(sargs) # writes result-*.json into dump_dir
|
|
1936
|
+
searches_ok += 1
|
|
1937
|
+
except SystemExit as se:
|
|
1938
|
+
# cmd_search may sys.exit on a hard rate-limit / stop. Halt the
|
|
1939
|
+
# loop but KEEP whatever already dumped (harvested below).
|
|
1940
|
+
print(f"[post_reddit] discover search halted on {q!r}: "
|
|
1941
|
+
f"SystemExit({getattr(se, 'code', '?')})")
|
|
1942
|
+
break
|
|
1943
|
+
except Exception as e:
|
|
1944
|
+
# One bad query (transient 500, parse error) must not kill the
|
|
1945
|
+
# whole discover. Skip it and continue with the rest of the bank.
|
|
1946
|
+
print(f"[post_reddit] discover search failed for {q!r}: {e}",
|
|
1947
|
+
file=sys.stderr)
|
|
1948
|
+
finally:
|
|
1949
|
+
# Always unset so a subsequent (non-discover) reddit_tools call in
|
|
1950
|
+
# this process doesn't accidentally inherit dump mode.
|
|
1951
|
+
os.environ.pop("S4L_REDDIT_DUMP_DIR", None)
|
|
1952
|
+
elapsed = time.time() - start
|
|
1953
|
+
print(f"[post_reddit] Discover ran {searches_ok}/{len(queries)} searches "
|
|
1954
|
+
f"in {elapsed:.0f}s ($0.0000)")
|
|
1955
|
+
|
|
1956
|
+
# Harvest the dump dir: every cmd_search call that returned threads wrote a
|
|
1957
|
+
# result-*.json. Even if a later query halted the loop, earlier searches'
|
|
1958
|
+
# dumps are still valid candidates.
|
|
1959
|
+
candidates = []
|
|
1960
|
+
seen_urls = set()
|
|
1961
|
+
dump_files = sorted(_glob.glob(os.path.join(dump_dir, "result-*.json")))
|
|
1962
|
+
print(f"[post_reddit] Discover dump dir contains {len(dump_files)} file(s)")
|
|
1963
|
+
for dump_path in dump_files:
|
|
1964
|
+
try:
|
|
1965
|
+
with open(dump_path) as df:
|
|
1966
|
+
payload = json.load(df)
|
|
1967
|
+
except Exception as e:
|
|
1968
|
+
print(f"[post_reddit] WARN: skipping unreadable dump {dump_path}: {e}",
|
|
1969
|
+
file=sys.stderr)
|
|
1970
|
+
continue
|
|
1971
|
+
query = payload.get("query") or ""
|
|
1972
|
+
for t in payload.get("threads") or []:
|
|
1973
|
+
url = t.get("url") or ""
|
|
1974
|
+
if not url or url in seen_urls:
|
|
1975
|
+
continue
|
|
1976
|
+
seen_urls.add(url)
|
|
1977
|
+
candidates.append({
|
|
1978
|
+
"action": "candidate",
|
|
1979
|
+
"thread_url": url,
|
|
1980
|
+
"thread_title": t.get("title") or "",
|
|
1981
|
+
"thread_author": t.get("author") or "",
|
|
1982
|
+
"selftext": t.get("selftext") or "", # captured for analytics + future relevance gates
|
|
1983
|
+
"score": int(t.get("score") or 0),
|
|
1984
|
+
"num_comments": int(t.get("num_comments") or 0),
|
|
1985
|
+
"search_topic": query,
|
|
1986
|
+
})
|
|
1987
|
+
# Best-effort cleanup; the OS will eventually reap /tmp anyway.
|
|
1988
|
+
try:
|
|
1989
|
+
_shutil.rmtree(dump_dir, ignore_errors=True)
|
|
1990
|
+
except Exception:
|
|
1991
|
+
pass
|
|
1992
|
+
|
|
1993
|
+
# Zero successful searches AND nothing harvested = real search-layer
|
|
1994
|
+
# failure (rate-limit / all queries 500'd). Return an error so the runner
|
|
1995
|
+
# counts it failed (rc 5). If searches ran but simply found no fresh
|
|
1996
|
+
# threads, candidates is empty WITHOUT an error → rc 6 (skipped).
|
|
1997
|
+
if searches_ok == 0 and not candidates:
|
|
1998
|
+
print(f"[post_reddit] Discover FAILED: 0/{len(queries)} searches succeeded, "
|
|
1999
|
+
f"no candidates harvested")
|
|
2000
|
+
return {"project_name": project_name, "decisions": [], "cost": 0.0,
|
|
2001
|
+
"error": "no_search_results"}
|
|
2002
|
+
|
|
2003
|
+
print(f"[post_reddit] Discover harvested {len(candidates)} candidate(s) from dump dir")
|
|
2004
|
+
if not candidates:
|
|
2005
|
+
print(f"[post_reddit] No candidates dumped — {searches_ok}/{len(queries)} "
|
|
2006
|
+
f"searches ran but returned no fresh threads")
|
|
2007
|
+
|
|
2008
|
+
# --- Topical-overlap scoring + top-N cap (replaces the old ripen momentum
|
|
2009
|
+
# gate, retired 2026-06-01 to align with the Twitter pipeline which dropped
|
|
2010
|
+
# its inter-phase momentum sleep on 2026-05-31). Reddit's sort=relevance
|
|
2011
|
+
# leaks high-engagement OFF-topic threads that share only structural English
|
|
2012
|
+
# words with a chatty natural-language query (e.g. an on-topic query about a
|
|
2013
|
+
# habit-tracking tool matching an unrelated BORU drama thread). Without the
|
|
2014
|
+
# ripen stage thinning the set over 30 min, we instead sort by a topical-
|
|
2015
|
+
# overlap signal and keep the top N so draft spends its budget on the most
|
|
2016
|
+
# on-topic + active threads. We do NOT hard-drop low-overlap rows: every
|
|
2017
|
+
# harvested candidate is still persisted to the queue for analytics + salvage;
|
|
2018
|
+
# the cap is a soft prioritization only (conservative per user directive —
|
|
2019
|
+
# isolate + surface the garbage in logs rather than silently filtering it).
|
|
2020
|
+
DISCOVER_CAP = int(os.environ.get("S4L_REDDIT_DISCOVER_CAP", "25") or "25")
|
|
2021
|
+
for c in candidates:
|
|
2022
|
+
ov = _topical_overlap(c.get("search_topic"), c.get("thread_title"), c.get("selftext"))
|
|
2023
|
+
# velocity proxy: comments weighted 4x upvotes, echoing the old ripen
|
|
2024
|
+
# composite (Δup + 4·Δcomments) but on absolute counts since we no longer
|
|
2025
|
+
# sample momentum over a time window.
|
|
2026
|
+
vel = int(c.get("score") or 0) + 4 * int(c.get("num_comments") or 0)
|
|
2027
|
+
c["topical_overlap"] = round(ov, 3)
|
|
2028
|
+
c["velocity"] = vel
|
|
2029
|
+
# Primary sort: overlap desc (on-topic first). Tiebreak: velocity desc.
|
|
2030
|
+
ranked = sorted(candidates, key=lambda c: (c["topical_overlap"], c["velocity"]), reverse=True)
|
|
2031
|
+
selected = ranked[:DISCOVER_CAP] if DISCOVER_CAP > 0 else ranked
|
|
2032
|
+
|
|
2033
|
+
# [discover_harvest] marker: surface the overlap distribution so relevance-sort
|
|
2034
|
+
# garbage is visible in logs. overlap_zero = rows sharing NO content token with
|
|
2035
|
+
# the query = almost certainly leak; if these dominate the harvest we know the
|
|
2036
|
+
# query/search is misfiring without having dropped anything.
|
|
2037
|
+
n_zero = sum(1 for c in candidates if c["topical_overlap"] == 0.0)
|
|
2038
|
+
n_low = sum(1 for c in candidates if 0.0 < c["topical_overlap"] < 0.34)
|
|
2039
|
+
n_mid = sum(1 for c in candidates if 0.34 <= c["topical_overlap"] < 0.67)
|
|
2040
|
+
n_high = sum(1 for c in candidates if c["topical_overlap"] >= 0.67)
|
|
2041
|
+
cut = selected[-1]["topical_overlap"] if selected else 0.0
|
|
2042
|
+
print(f"[discover_harvest] project={project_name} harvested={len(candidates)} "
|
|
2043
|
+
f"selected={len(selected)} cap={DISCOVER_CAP} cutoff_overlap={cut:.3f} "
|
|
2044
|
+
f"overlap_zero={n_zero} low={n_low} mid={n_mid} high={n_high}")
|
|
2045
|
+
for c in selected:
|
|
2046
|
+
print(f"[discover_harvest] ov={c['topical_overlap']:.2f} vel={c['velocity']:>5} "
|
|
2047
|
+
f"q={(c.get('search_topic') or '')[:40]!r} :: {(c.get('thread_title') or '')[:70]!r}")
|
|
2048
|
+
|
|
2049
|
+
# Persist freshly-discovered candidates to reddit_candidates so a
|
|
2050
|
+
# transient post failure on a later phase can be retried by the next
|
|
2051
|
+
# cycle's Phase 0 salvage. Best-effort: if the queue write fails, the
|
|
2052
|
+
# tmpfile flow still works for this cycle, we just lose the salvage
|
|
2053
|
+
# benefit. See module-level _db_upsert_discovered_candidate. We persist
|
|
2054
|
+
# ALL harvested candidates (not just the capped `selected`) so the queue
|
|
2055
|
+
# keeps full history per the no-pruning rule.
|
|
2056
|
+
queue_batch = getattr(args, "batch_id", None) or plan_batch_id
|
|
2057
|
+
if not args.dry_run and candidates:
|
|
2058
|
+
for c in candidates:
|
|
2059
|
+
_db_upsert_discovered_candidate(c, queue_batch, project_name)
|
|
2060
|
+
|
|
2061
|
+
# Backfill seed on reddit_search_attempts rows from this batch so the
|
|
2062
|
+
# Search Queries dashboard can join attempts → posts via search_topic.
|
|
2063
|
+
# Use the top-ranked selected candidate's search_topic so the seed reflects
|
|
2064
|
+
# what actually flows into draft.
|
|
2065
|
+
if selected and plan_batch_id:
|
|
2066
|
+
seed = (selected[0].get("search_topic") or "").strip()
|
|
2067
|
+
if seed:
|
|
2068
|
+
try:
|
|
2069
|
+
api_patch(
|
|
2070
|
+
"/api/v1/reddit-search-attempts",
|
|
2071
|
+
{"batch_id": plan_batch_id, "seed": seed},
|
|
2072
|
+
)
|
|
2073
|
+
except Exception as e:
|
|
2074
|
+
print(f"[post_reddit] WARNING: seed backfill failed: {e}", file=sys.stderr)
|
|
2075
|
+
|
|
2076
|
+
return {"project_name": project_name, "decisions": selected,
|
|
2077
|
+
"cost": 0.0, "session_id": None,
|
|
2078
|
+
"phase": "discover"}
|
|
2079
|
+
|
|
2080
|
+
|
|
2081
|
+
def _draft_iteration(plan, config, reddit_username):
|
|
2082
|
+
"""DRAFT phase: write comments for ripen-survivors only.
|
|
2083
|
+
|
|
2084
|
+
`plan` is the ripen-filtered discover output. Each decision has thread_url
|
|
2085
|
+
+ ripen annotations. Claude fetches each thread and writes the comment.
|
|
2086
|
+
Returns the plan with `text` added to each decision (i.e. ready for _post_iteration).
|
|
2087
|
+
|
|
2088
|
+
Salvage shortcut (2026-05-06): for each candidate we first check if a
|
|
2089
|
+
still-fresh draft exists in reddit_candidates (drafted < DRAFT_TTL_MIN min
|
|
2090
|
+
ago, written by a prior cycle whose post phase failed transiently). If
|
|
2091
|
+
every candidate has a fresh draft, we skip the Claude session entirely
|
|
2092
|
+
and merge the persisted text in. Mirrors twitter_post_plan.py's "EXISTING
|
|
2093
|
+
DRAFT" reuse path; saves $0.20-$0.40 per salvaged candidate.
|
|
2094
|
+
"""
|
|
2095
|
+
project_name = plan.get("project_name", "general")
|
|
2096
|
+
candidates = [d for d in (plan.get("decisions") or []) if d.get("thread_url")]
|
|
2097
|
+
if not candidates:
|
|
2098
|
+
return plan
|
|
2099
|
+
|
|
2100
|
+
# Salvage shortcut: check each candidate for a still-fresh persisted draft
|
|
2101
|
+
# before paying the LLM cost. If ALL candidates are covered, skip Claude
|
|
2102
|
+
# and return the merged plan immediately. Order matters here: we must
|
|
2103
|
+
# consult the DB before building the Claude prompt so we don't waste
|
|
2104
|
+
# tokens prepping a session we won't run.
|
|
2105
|
+
fresh_drafts = {}
|
|
2106
|
+
for c in candidates:
|
|
2107
|
+
# An in-memory draft_text from _db_pick_salvage_candidate also counts.
|
|
2108
|
+
if c.get("draft_text"):
|
|
2109
|
+
fresh_drafts[c["thread_url"]] = (
|
|
2110
|
+
c["draft_text"],
|
|
2111
|
+
c.get("engagement_style") or "reused",
|
|
2112
|
+
)
|
|
2113
|
+
continue
|
|
2114
|
+
text, style = _db_load_fresh_draft(c["thread_url"])
|
|
2115
|
+
if text:
|
|
2116
|
+
fresh_drafts[c["thread_url"]] = (text, style or c.get("engagement_style") or "reused")
|
|
2117
|
+
|
|
2118
|
+
if fresh_drafts and len(fresh_drafts) == len(candidates):
|
|
2119
|
+
print(f"[post_reddit] Draft shortcut: all {len(candidates)} candidate(s) "
|
|
2120
|
+
f"have fresh drafts (<{DRAFT_TTL_MIN}m), skipping Claude session.")
|
|
2121
|
+
merged = []
|
|
2122
|
+
for c in candidates:
|
|
2123
|
+
text, style = fresh_drafts[c["thread_url"]]
|
|
2124
|
+
merged_d = dict(c)
|
|
2125
|
+
merged_d["text"] = text
|
|
2126
|
+
merged_d["engagement_style"] = style
|
|
2127
|
+
merged_d["action"] = "post"
|
|
2128
|
+
merged_d.setdefault("reply_to_url", None)
|
|
2129
|
+
merged.append(merged_d)
|
|
2130
|
+
plan = dict(plan)
|
|
2131
|
+
plan["decisions"] = merged
|
|
2132
|
+
plan["draft_cost"] = 0.0
|
|
2133
|
+
plan["phase"] = "draft"
|
|
2134
|
+
plan["draft_reused"] = True
|
|
2135
|
+
# Build a "reused draft" marker trace so the audit row isn't empty.
|
|
2136
|
+
# We can't recover the exact context the prior cycle's Claude saw,
|
|
2137
|
+
# but the current top_performers/recent_comments document what the
|
|
2138
|
+
# few-shot prompt WOULD have contained had we redrafted. The
|
|
2139
|
+
# reused_from_prior_cycle flag tells future auditors "this is
|
|
2140
|
+
# current-cycle context, not what produced the draft" — without it
|
|
2141
|
+
# the trace would look like Claude saw this report and chose to
|
|
2142
|
+
# reuse, which it didn't (Claude wasn't invoked at all). Marker
|
|
2143
|
+
# also gives 100% trace coverage on the platform so SQL queries
|
|
2144
|
+
# don't have to special-case NULL rows.
|
|
2145
|
+
try:
|
|
2146
|
+
from generation_trace import build_trace, write_trace_tempfile
|
|
2147
|
+
top_report = get_top_performers(project_name)
|
|
2148
|
+
recent_comments = get_recent_comments()
|
|
2149
|
+
trace = build_trace(
|
|
2150
|
+
platform="reddit",
|
|
2151
|
+
project_name=project_name,
|
|
2152
|
+
prompt_chars=0, # no Claude call this cycle
|
|
2153
|
+
top_performers_text=top_report or "",
|
|
2154
|
+
top_search_topics_text="",
|
|
2155
|
+
recent_comment_ids=[pid for pid, _ in (recent_comments or [])],
|
|
2156
|
+
model="reused_from_prior_cycle",
|
|
2157
|
+
min_score_floor=10,
|
|
2158
|
+
extras={
|
|
2159
|
+
"reused_from_prior_cycle": True,
|
|
2160
|
+
"draft_ttl_min": DRAFT_TTL_MIN,
|
|
2161
|
+
"reused_candidate_count": len(candidates),
|
|
2162
|
+
},
|
|
2163
|
+
)
|
|
2164
|
+
trace_path = write_trace_tempfile(trace, prefix="reddit_reused_trace_")
|
|
2165
|
+
if trace_path:
|
|
2166
|
+
plan["generation_trace_path"] = trace_path
|
|
2167
|
+
print(f"[post_reddit] Reused-draft trace marker: {trace_path}")
|
|
2168
|
+
except Exception as e:
|
|
2169
|
+
print(f"[post_reddit] WARNING: reused-draft trace build failed "
|
|
2170
|
+
f"({e}); proceeding without trace")
|
|
2171
|
+
return plan
|
|
2172
|
+
|
|
2173
|
+
project = None
|
|
2174
|
+
config_projects = config.get("projects", [])
|
|
2175
|
+
for p in config_projects:
|
|
2176
|
+
if p["name"].lower() == project_name.lower():
|
|
2177
|
+
project = p
|
|
2178
|
+
break
|
|
2179
|
+
if not project:
|
|
2180
|
+
print(f"[post_reddit] WARNING: project '{project_name}' not found in config, drafting with generic context")
|
|
2181
|
+
project = {"name": project_name}
|
|
2182
|
+
|
|
2183
|
+
# 2026-05-19: pick the engagement style HERE — draft is the only
|
|
2184
|
+
# Claude call in the Reddit cycle that actually writes a comment, so
|
|
2185
|
+
# this is where the picker belongs. (Discover is scan-only opaque
|
|
2186
|
+
# mode; it never sees thread content and never drafts text, so a
|
|
2187
|
+
# picker there would just be useless decoration.)
|
|
2188
|
+
# Mirrors the Twitter engage cycle: pick once → filter top_performers
|
|
2189
|
+
# to the assigned style → embed the assignment block in the prompt →
|
|
2190
|
+
# JSON example shows the literal assigned style name. End-to-end
|
|
2191
|
+
# adherence comes from those three lined-up signals.
|
|
2192
|
+
style_assignment = pick_style_for_post("reddit", context="posting")
|
|
2193
|
+
picked_style = style_assignment.get("style")
|
|
2194
|
+
print(f"[post_reddit] draft style assigned: mode={style_assignment['mode']} "
|
|
2195
|
+
f"style={picked_style or '(invent)'}")
|
|
2196
|
+
top_report = get_top_performers(project_name, style=picked_style)
|
|
2197
|
+
recent_comments = get_recent_comments()
|
|
2198
|
+
# We don't have a Reddit equivalent of top_search_topics_report in
|
|
2199
|
+
# the draft phase (the discover phase loads it for the search step).
|
|
2200
|
+
# Pass empty string; the trace audit still captures top_performers
|
|
2201
|
+
# and recent_comments, which is the bulk of the few-shot context.
|
|
2202
|
+
prompt = build_draft_prompt(project, config, candidates, top_report, recent_comments,
|
|
2203
|
+
style_assignment=style_assignment)
|
|
2204
|
+
|
|
2205
|
+
# Build the generation_trace audit blob: what Claude is about to see.
|
|
2206
|
+
# Captured BEFORE the Claude call so we never end up with a post row
|
|
2207
|
+
# missing its trace if Claude errors out. The path is stashed in
|
|
2208
|
+
# `plan` so the post-phase (_post_iteration → log_post) can forward
|
|
2209
|
+
# it to reddit_tools.py for INSERT into posts.generation_trace.
|
|
2210
|
+
# Same trace reused for every post produced from this draft session.
|
|
2211
|
+
try:
|
|
2212
|
+
from generation_trace import build_trace, write_trace_tempfile
|
|
2213
|
+
trace = build_trace(
|
|
2214
|
+
platform="reddit",
|
|
2215
|
+
project_name=project_name,
|
|
2216
|
+
prompt_chars=len(prompt or ""),
|
|
2217
|
+
top_performers_text=top_report or "",
|
|
2218
|
+
top_search_topics_text="", # Reddit draft phase doesn't surface this
|
|
2219
|
+
recent_comment_ids=[pid for pid, _ in (recent_comments or [])],
|
|
2220
|
+
model=None,
|
|
2221
|
+
min_score_floor=10, # PLATFORM_MIN_SCORE['reddit']
|
|
2222
|
+
)
|
|
2223
|
+
trace_path = write_trace_tempfile(trace, prefix="reddit_gen_trace_")
|
|
2224
|
+
if trace_path:
|
|
2225
|
+
plan["generation_trace_path"] = trace_path
|
|
2226
|
+
print(f"[post_reddit] Generation trace: {trace_path} "
|
|
2227
|
+
f"({os.path.getsize(trace_path)} bytes)")
|
|
2228
|
+
except Exception as e:
|
|
2229
|
+
# Audit row is nice-to-have, never a blocker.
|
|
2230
|
+
print(f"[post_reddit] WARNING: generation_trace build failed "
|
|
2231
|
+
f"({e}); proceeding without trace")
|
|
2232
|
+
|
|
2233
|
+
print(f"[post_reddit] Starting draft session for {len(candidates)} thread(s)...")
|
|
2234
|
+
start = time.time()
|
|
2235
|
+
ok, output, usage = run_claude(prompt, timeout=600)
|
|
2236
|
+
elapsed = time.time() - start
|
|
2237
|
+
print(f"[post_reddit] Draft finished in {elapsed:.0f}s (${usage['cost_usd']:.4f})")
|
|
2238
|
+
|
|
2239
|
+
if not ok:
|
|
2240
|
+
print(f"[post_reddit] Draft FAILED: {output[:300]}")
|
|
2241
|
+
plan["draft_error"] = "claude_failed"
|
|
2242
|
+
plan["draft_cost"] = usage["cost_usd"]
|
|
2243
|
+
return plan
|
|
2244
|
+
|
|
2245
|
+
drafted = parse_post_decisions(output)
|
|
2246
|
+
print(f"[post_reddit] Draft produced {len(drafted)} post(s)")
|
|
2247
|
+
|
|
2248
|
+
# 2026-05-11: parse optional action=reject lines and forward any
|
|
2249
|
+
# `proposed_excludes` arrays into project_search_excludes via the
|
|
2250
|
+
# activation gate (>=2 distinct batches required before a term goes
|
|
2251
|
+
# live). Self-improving denylist mirroring twitter's behavior. Errors
|
|
2252
|
+
# here MUST NOT kill the draft phase; the post pipeline is the critical
|
|
2253
|
+
# path. See parse_reject_decisions / _propose_excludes_from_rejects.
|
|
2254
|
+
try:
|
|
2255
|
+
rejects = parse_reject_decisions(output)
|
|
2256
|
+
if rejects:
|
|
2257
|
+
cand_by_url = {c.get("thread_url"): c for c in candidates if c.get("thread_url")}
|
|
2258
|
+
counters = _propose_excludes_from_rejects(
|
|
2259
|
+
rejects, project_name, plan.get("batch_id"), cand_by_url,
|
|
2260
|
+
)
|
|
2261
|
+
if counters["proposed"]:
|
|
2262
|
+
print(
|
|
2263
|
+
f"[post_reddit] reject lines={counters['rejects_seen']} "
|
|
2264
|
+
f"proposed={counters['proposed']} inserted={counters['inserted']} "
|
|
2265
|
+
f"bumped={counters['bumped']} rejected={counters['rejected']} "
|
|
2266
|
+
f"active_now={counters['active_now']}"
|
|
2267
|
+
)
|
|
2268
|
+
except Exception as e:
|
|
2269
|
+
print(f"[post_reddit] WARN: reject-line processing failed: {e}", file=sys.stderr)
|
|
2270
|
+
|
|
2271
|
+
# Merge text back into the original candidates by thread_url so we
|
|
2272
|
+
# preserve ripen annotations, search_topic, etc. from discover phase.
|
|
2273
|
+
# Each freshly-written draft is also persisted to reddit_candidates so a
|
|
2274
|
+
# later salvage iteration can reuse it without paying the LLM cost again.
|
|
2275
|
+
by_url = {d["thread_url"]: d for d in drafted}
|
|
2276
|
+
merged = []
|
|
2277
|
+
for c in candidates:
|
|
2278
|
+
url = c.get("thread_url", "")
|
|
2279
|
+
drafted_d = by_url.get(url)
|
|
2280
|
+
if drafted_d and drafted_d.get("text"):
|
|
2281
|
+
merged_d = dict(c)
|
|
2282
|
+
merged_d["text"] = drafted_d["text"]
|
|
2283
|
+
merged_d["reply_to_url"] = drafted_d.get("reply_to_url")
|
|
2284
|
+
merged_d["thread_author"] = drafted_d.get("thread_author") or c.get("thread_author")
|
|
2285
|
+
merged_d["thread_title"] = drafted_d.get("thread_title") or c.get("thread_title")
|
|
2286
|
+
merged_d["engagement_style"] = drafted_d.get("engagement_style") or c.get("engagement_style")
|
|
2287
|
+
merged_d["action"] = "post"
|
|
2288
|
+
merged.append(merged_d)
|
|
2289
|
+
_db_save_draft(url, merged_d["text"], merged_d.get("engagement_style"))
|
|
2290
|
+
else:
|
|
2291
|
+
# Claude OMITted this thread (build_draft_prompt's SELECTION GATE
|
|
2292
|
+
# decided no plausible bridge between the thread's audience and
|
|
2293
|
+
# the project — token-overlap false positive, off-topic sub, etc.).
|
|
2294
|
+
# Mark status='failed' with reason='draft_gate_omit' so Phase 0
|
|
2295
|
+
# salvage on the next cycle stops re-pulling it. Without this the
|
|
2296
|
+
# same dead thread would keep clearing ripen (engagement is real)
|
|
2297
|
+
# and burning ~$0.05/cycle on a fetch + gate decision that always
|
|
2298
|
+
# lands the same way. Mirrors the one-strike rule at ripen time,
|
|
2299
|
+
# applied at draft time for active-but-unfit threads.
|
|
2300
|
+
print(f"[post_reddit] Draft gate OMIT for {url}: marking status=failed")
|
|
2301
|
+
_db_mark_candidate_attempt(url, reason="draft_gate_omit", permanent=True)
|
|
2302
|
+
|
|
2303
|
+
plan = dict(plan)
|
|
2304
|
+
plan["decisions"] = merged
|
|
2305
|
+
plan["draft_cost"] = usage["cost_usd"]
|
|
2306
|
+
plan["draft_session_id"] = usage.get("session_id")
|
|
2307
|
+
plan["phase"] = "draft"
|
|
2308
|
+
# Stash the picker assignment so _post_iteration (which runs in a
|
|
2309
|
+
# separate process via JSON-serialized plan) can pass it to
|
|
2310
|
+
# validate_or_register for USE-mode drift coercion + INVENT-mode gating.
|
|
2311
|
+
plan["style_assignment"] = style_assignment
|
|
2312
|
+
return plan
|
|
2313
|
+
|
|
2314
|
+
|
|
2315
|
+
def _post_iteration(plan, reddit_username):
|
|
2316
|
+
"""Execute browser CDP posts for the decisions in plan. Returns (posted, failed)."""
|
|
2317
|
+
project_name = plan["project_name"]
|
|
2318
|
+
decisions = plan.get("decisions") or []
|
|
2319
|
+
# Picker assignment was stamped by _draft_iteration; survives JSON
|
|
2320
|
+
# serialization across the draft→post process boundary. Used below
|
|
2321
|
+
# in validate_or_register for USE-mode drift coercion + INVENT-mode
|
|
2322
|
+
# gating. Fallback to {} for plans drafted before this field landed.
|
|
2323
|
+
style_assignment = plan.get("style_assignment") or {}
|
|
2324
|
+
|
|
2325
|
+
if not decisions:
|
|
2326
|
+
return 0, 0
|
|
2327
|
+
|
|
2328
|
+
# 2026-05-08: post-phase cap REMOVED per user instruction. Three serial
|
|
2329
|
+
# gates already filter the candidate pool (search-time blocks,
|
|
2330
|
+
# ripen composite floor, softened LLM relevance gate). Anything that
|
|
2331
|
+
# survives all three has earned its post; an arbitrary 10/cycle cap was
|
|
2332
|
+
# just throwing away qualified work. If Reddit rate-limits start firing
|
|
2333
|
+
# under runaway-cycle conditions, revisit by adding a per-minute throttle
|
|
2334
|
+
# to _post_iteration's loop body, NOT a hard count cap.
|
|
2335
|
+
|
|
2336
|
+
# In two-phase mode (plan in process A, post in process B), the env var
|
|
2337
|
+
# set by run_claude in process A is gone. Re-export here so log_post →
|
|
2338
|
+
# reddit_tools.py log-post stamps posts.claude_session_id correctly and
|
|
2339
|
+
# the dashboard activity feed can join to claude_sessions for cost.
|
|
2340
|
+
plan_session_id = plan.get("session_id")
|
|
2341
|
+
if plan_session_id:
|
|
2342
|
+
os.environ["CLAUDE_SESSION_ID"] = plan_session_id
|
|
2343
|
+
|
|
2344
|
+
active_campaigns = load_active_reddit_campaigns()
|
|
2345
|
+
if active_campaigns:
|
|
2346
|
+
for c in active_campaigns:
|
|
2347
|
+
print(f"[post_reddit] active campaign id={c['id']} "
|
|
2348
|
+
f"sample_rate={c['sample_rate']:.3f} suffix={c['suffix']!r}")
|
|
2349
|
+
|
|
2350
|
+
posted = 0
|
|
2351
|
+
failed = 0
|
|
2352
|
+
|
|
2353
|
+
for i, decision in enumerate(decisions):
|
|
2354
|
+
thread_url = decision["thread_url"]
|
|
2355
|
+
reply_to_url = decision.get("reply_to_url")
|
|
2356
|
+
text = decision["text"]
|
|
2357
|
+
thread_author = decision.get("thread_author", "unknown")
|
|
2358
|
+
thread_title = decision.get("thread_title", "unknown")
|
|
2359
|
+
# validate_or_register: in USE mode, coerces any drifted style name
|
|
2360
|
+
# back to the assigned one (so picker authority is preserved even if
|
|
2361
|
+
# the drafter ignores the assignment). In INVENT mode (5% slot),
|
|
2362
|
+
# registers the new style into engagement_styles_registry via
|
|
2363
|
+
# /api/v1/engagement-styles/registry. assigned_style/assigned_mode
|
|
2364
|
+
# come from pick_style_for_post() above; without them the picker's
|
|
2365
|
+
# choice would be silently overridable by the model.
|
|
2366
|
+
engagement_style, _style_action = validate_or_register(
|
|
2367
|
+
decision,
|
|
2368
|
+
source_post={
|
|
2369
|
+
"platform": "reddit",
|
|
2370
|
+
"post_url": thread_url,
|
|
2371
|
+
"post_id": None,
|
|
2372
|
+
"model": decision.get("model"),
|
|
2373
|
+
},
|
|
2374
|
+
assigned_style=(style_assignment or {}).get("style"),
|
|
2375
|
+
assigned_mode=(style_assignment or {}).get("mode"),
|
|
2376
|
+
)
|
|
2377
|
+
search_topic = decision.get("search_topic") or None
|
|
2378
|
+
|
|
2379
|
+
applied_campaign_ids = []
|
|
2380
|
+
for camp in active_campaigns:
|
|
2381
|
+
if random.random() < camp["sample_rate"]:
|
|
2382
|
+
text = text + camp["suffix"]
|
|
2383
|
+
applied_campaign_ids.append(camp["id"])
|
|
2384
|
+
if applied_campaign_ids:
|
|
2385
|
+
print(f"[post_reddit] applied campaigns {applied_campaign_ids} (suffix appended)")
|
|
2386
|
+
|
|
2387
|
+
# Audience-page detection (2026-05-17). Inspect the unwrapped text for
|
|
2388
|
+
# any URL that exactly matches a curated audience-page (e.g.
|
|
2389
|
+
# https://s4l.ai/ghostwriting). When found, posts.link_source is
|
|
2390
|
+
# stamped 'audience_page:<angle>' for the row so the dashboard can
|
|
2391
|
+
# break out curated traffic from generic homepage links. Detection
|
|
2392
|
+
# runs BEFORE wrap_text_for_post because wrapping rewrites the URLs
|
|
2393
|
+
# to /r/<code> short links; classify_url_as_audience_page() needs
|
|
2394
|
+
# the original target URL.
|
|
2395
|
+
audience_page_link_source = None
|
|
2396
|
+
try:
|
|
2397
|
+
for _url_m in re.finditer(r'https?://[^\s)\]>"\']+', text):
|
|
2398
|
+
_raw = _url_m.group(0).rstrip('.,);!?]')
|
|
2399
|
+
_angle = _audience_classify_url(_raw, project_name)
|
|
2400
|
+
if _angle:
|
|
2401
|
+
audience_page_link_source = f"audience_page:{_angle}"
|
|
2402
|
+
break
|
|
2403
|
+
except Exception as _e:
|
|
2404
|
+
print(f"[post_reddit] WARNING: audience-page classify raised ({_e})")
|
|
2405
|
+
|
|
2406
|
+
# URL-wrap the final text (URLs in suffix included). Mints into
|
|
2407
|
+
# post_links with NULL post_id; we backfill after log_post returns
|
|
2408
|
+
# below. On wrap failure, post unwrapped — losing attribution is
|
|
2409
|
+
# better than failing a post that already passed planning.
|
|
2410
|
+
minted_session = None
|
|
2411
|
+
try:
|
|
2412
|
+
from dm_short_links import wrap_text_for_post, utm_only_text
|
|
2413
|
+
wrap_res = wrap_text_for_post(text=text, platform="reddit",
|
|
2414
|
+
project_name=project_name)
|
|
2415
|
+
if wrap_res.get("ok"):
|
|
2416
|
+
text = wrap_res["text"]
|
|
2417
|
+
minted_session = wrap_res.get("minted_session")
|
|
2418
|
+
if wrap_res.get("codes"):
|
|
2419
|
+
print(f"[post_reddit] wrapped {len(wrap_res['codes'])} URL(s): "
|
|
2420
|
+
f"{wrap_res['codes']}")
|
|
2421
|
+
else:
|
|
2422
|
+
print(f"[post_reddit] WARNING: URL wrap failed "
|
|
2423
|
+
f"({wrap_res.get('error')}); falling back to UTM-only")
|
|
2424
|
+
text = utm_only_text(text=text, platform="reddit", project_name=project_name)
|
|
2425
|
+
except Exception as e:
|
|
2426
|
+
print(f"[post_reddit] WARNING: URL wrap raised ({e}); falling back to UTM-only")
|
|
2427
|
+
try:
|
|
2428
|
+
from dm_short_links import utm_only_text
|
|
2429
|
+
text = utm_only_text(text=text, platform="reddit", project_name=project_name)
|
|
2430
|
+
except Exception as ee:
|
|
2431
|
+
print(f"[post_reddit] WARNING: UTM-only fallback also failed ({ee}); posting unwrapped")
|
|
2432
|
+
|
|
2433
|
+
# Per-row reddit-browser lease (2026-05-13). Acquire JUST around the
|
|
2434
|
+
# CDP work, release before this row's DB post-processing and the 3-min
|
|
2435
|
+
# between-post sleep. Peers (link-edit, dm-outreach, engage,
|
|
2436
|
+
# engage-dm-replies) can use the browser during our sleeps and DB
|
|
2437
|
+
# writes instead of sitting blocked until the whole batch finishes.
|
|
2438
|
+
lease_ok, lease_msg = _acquire_browser_lease(timeout=600, ttl=90)
|
|
2439
|
+
if not lease_ok:
|
|
2440
|
+
print(f"[post_reddit] {i + 1}/{len(decisions)} LEASE: {lease_msg}; skipping post")
|
|
2441
|
+
failed += 1
|
|
2442
|
+
# Treat lease-acquire failure as TRANSIENT so phase0 salvages
|
|
2443
|
+
# the row next cycle (it's not the candidate's fault that a
|
|
2444
|
+
# peer pipeline held the browser too long).
|
|
2445
|
+
_db_mark_candidate_attempt(thread_url, "lease_acquire_timeout", permanent=False)
|
|
2446
|
+
if i < len(decisions) - 1:
|
|
2447
|
+
time.sleep(180)
|
|
2448
|
+
continue
|
|
2449
|
+
|
|
2450
|
+
try:
|
|
2451
|
+
print(f"[post_reddit] Posting {i + 1}/{len(decisions)}: {thread_title[:50]}...")
|
|
2452
|
+
result = post_via_cdp(thread_url, reply_to_url, text)
|
|
2453
|
+
finally:
|
|
2454
|
+
_release_browser_lease()
|
|
2455
|
+
|
|
2456
|
+
if result.get("ok"):
|
|
2457
|
+
if result.get("already_replied"):
|
|
2458
|
+
print(f"[post_reddit] DEDUP: already posted in this thread")
|
|
2459
|
+
# Treat dedup as a successful queue resolution: the row should
|
|
2460
|
+
# come out of 'pending' so Phase 0 stops salvaging it.
|
|
2461
|
+
_db_mark_candidate_posted(thread_url, None)
|
|
2462
|
+
continue
|
|
2463
|
+
permalink = result.get("permalink", "")
|
|
2464
|
+
if not permalink or not permalink.startswith("http"):
|
|
2465
|
+
print(f"[post_reddit] SKIPPED LOG: no valid permalink captured (got: {permalink!r})")
|
|
2466
|
+
failed += 1
|
|
2467
|
+
# No-permalink is permanent: the post may have actually
|
|
2468
|
+
# landed but we can't verify it; retrying would dupe.
|
|
2469
|
+
_db_mark_candidate_attempt(thread_url, "no_permalink", permanent=True)
|
|
2470
|
+
continue
|
|
2471
|
+
new_post_id = log_post(thread_url, permalink, text, project_name,
|
|
2472
|
+
thread_author, thread_title, reddit_username,
|
|
2473
|
+
engagement_style=engagement_style,
|
|
2474
|
+
search_topic=search_topic,
|
|
2475
|
+
# Forward the trace blob built during draft phase.
|
|
2476
|
+
# Same trace for every post in this plan because they
|
|
2477
|
+
# all saw the same few-shot context. None when the
|
|
2478
|
+
# draft phase used a reused/cached draft (no Claude
|
|
2479
|
+
# call) — that's fine, audit just records no trace.
|
|
2480
|
+
generation_trace_path=plan.get("generation_trace_path"),
|
|
2481
|
+
link_source=audience_page_link_source)
|
|
2482
|
+
bump_campaigns("posts", new_post_id, applied_campaign_ids)
|
|
2483
|
+
# Backfill post_links.post_id for the codes minted at wrap time
|
|
2484
|
+
# so /api/short-links/<code> resolver knows which post each
|
|
2485
|
+
# click attributes to. Idempotent; no-op when minted_session is
|
|
2486
|
+
# None (post had no URLs).
|
|
2487
|
+
if minted_session and new_post_id:
|
|
2488
|
+
try:
|
|
2489
|
+
from dm_short_links import backfill_post_id
|
|
2490
|
+
backfill_post_id(minted_session=minted_session,
|
|
2491
|
+
post_id=new_post_id)
|
|
2492
|
+
except Exception as e:
|
|
2493
|
+
print(f"[post_reddit] WARNING: backfill_post_id failed ({e})")
|
|
2494
|
+
posted += 1
|
|
2495
|
+
print(f"[post_reddit] POSTED: {permalink}")
|
|
2496
|
+
_db_mark_candidate_posted(thread_url, new_post_id)
|
|
2497
|
+
else:
|
|
2498
|
+
err = result.get("error", "unknown")
|
|
2499
|
+
failed += 1
|
|
2500
|
+
print(f"[post_reddit] CDP FAILED: {err}")
|
|
2501
|
+
if err == "account_blocked_in_sub":
|
|
2502
|
+
# project=None: account-level ban applies across ALL projects,
|
|
2503
|
+
# not just the one currently posting. Backfill of 28 existing
|
|
2504
|
+
# project-scoped entries applied 2026-05-19.
|
|
2505
|
+
mark_comment_blocked(thread_url, reason=err, project=None)
|
|
2506
|
+
# Classify the CDP error for queue retry. Unknown errors default
|
|
2507
|
+
# to TRANSIENT so we don't permanently kill candidates on a new
|
|
2508
|
+
# error string we haven't classified yet; the MAX_ATTEMPTS cap
|
|
2509
|
+
# auto-promotes them to 'failed' after 3 retries anyway.
|
|
2510
|
+
permanent = err in _PERMANENT_CDP_ERRORS
|
|
2511
|
+
_db_mark_candidate_attempt(thread_url, err, permanent=permanent)
|
|
2512
|
+
|
|
2513
|
+
if i < len(decisions) - 1:
|
|
2514
|
+
time.sleep(180) # 3 min gap between posts within a single Claude session
|
|
2515
|
+
|
|
2516
|
+
return posted, failed
|
|
2517
|
+
|
|
2518
|
+
|
|
2519
|
+
def main():
|
|
2520
|
+
parser = argparse.ArgumentParser(description="Reddit posting orchestrator")
|
|
2521
|
+
parser.add_argument("--dry-run", action="store_true", help="Print prompt without executing")
|
|
2522
|
+
parser.add_argument("--limit", type=int, default=3, help="Max comments per Claude session (default: 3)")
|
|
2523
|
+
parser.add_argument("--timeout", type=int, default=3600, help="Timeout for Claude session")
|
|
2524
|
+
parser.add_argument("--project", default=None, help="Override project selection")
|
|
2525
|
+
parser.add_argument("--phase",
|
|
2526
|
+
choices=["discover", "draft", "post", "phase0", "salvage"],
|
|
2527
|
+
required=True,
|
|
2528
|
+
help="discover: search+select threads only (no drafting), writes JSON to --out. "
|
|
2529
|
+
"draft: write comments for ripen-survivors from --in, writes JSON to --out. "
|
|
2530
|
+
"post: read JSON from --in and post via CDP. "
|
|
2531
|
+
"phase0: hard-expire stale pending rows + re-assign salvageable rows "
|
|
2532
|
+
"to --batch-id. Prints `expired=N salvaged=M` for the orchestrator. "
|
|
2533
|
+
"salvage: pull ONE salvage-eligible row (already re-assigned to "
|
|
2534
|
+
"--batch-id by phase0) and write it as a discover-shape JSON to --out. "
|
|
2535
|
+
"Exits 0 with a candidate, 6 if nothing salvageable.")
|
|
2536
|
+
parser.add_argument("--out", default=None,
|
|
2537
|
+
help="Output JSON path (--phase discover, --phase draft, --phase salvage)")
|
|
2538
|
+
parser.add_argument("--in", dest="in_path", default=None,
|
|
2539
|
+
help="Input JSON path (--phase draft, --phase post)")
|
|
2540
|
+
parser.add_argument("--exclude", default="", help="Comma-separated project names to exclude")
|
|
2541
|
+
parser.add_argument("--batch-id", dest="batch_id", default=None,
|
|
2542
|
+
help="Cycle-level batch_id (e.g. rdcycle-YYYYMMDD-HHMMSS). Used by "
|
|
2543
|
+
"--phase phase0 / --phase salvage / --phase discover to attribute "
|
|
2544
|
+
"rows in reddit_candidates and reddit_batches. Required for "
|
|
2545
|
+
"phase0 and salvage; optional for discover (defaults to a "
|
|
2546
|
+
"per-discover synthetic id).")
|
|
2547
|
+
args = parser.parse_args()
|
|
2548
|
+
|
|
2549
|
+
config = load_config()
|
|
2550
|
+
reddit_username = config.get("accounts", {}).get("reddit", {}).get("username", "Deep_Ad1959")
|
|
2551
|
+
|
|
2552
|
+
if args.phase == "phase0":
|
|
2553
|
+
# Hard-expire stale pending rows + re-assign salvageable rows to the
|
|
2554
|
+
# current cycle's batch_id. Single advisory-lock'd transaction so two
|
|
2555
|
+
# concurrent cycles can't double-salvage the same row. Output is the
|
|
2556
|
+
# one line `expired=N salvaged=M` parsed by run-reddit-search.sh.
|
|
2557
|
+
if not args.batch_id:
|
|
2558
|
+
print("[post_reddit] ERROR: --phase phase0 requires --batch-id", file=sys.stderr)
|
|
2559
|
+
sys.exit(2)
|
|
2560
|
+
expired, salvaged = _db_phase0_salvage(args.batch_id)
|
|
2561
|
+
print(f"expired={expired} salvaged={salvaged}")
|
|
2562
|
+
return
|
|
2563
|
+
|
|
2564
|
+
if args.phase == "salvage":
|
|
2565
|
+
# Pull up to --limit salvage-eligible rows (already re-assigned to
|
|
2566
|
+
# args.batch_id by phase0) from a SINGLE project and write a
|
|
2567
|
+
# discover-shape JSON to --out. The shell can then feed that file
|
|
2568
|
+
# to ripen → draft → post like a normal candidate batch.
|
|
2569
|
+
if not args.out:
|
|
2570
|
+
print("[post_reddit] ERROR: --phase salvage requires --out PATH", file=sys.stderr)
|
|
2571
|
+
sys.exit(2)
|
|
2572
|
+
if not args.batch_id:
|
|
2573
|
+
print("[post_reddit] ERROR: --phase salvage requires --batch-id", file=sys.stderr)
|
|
2574
|
+
sys.exit(2)
|
|
2575
|
+
salvage_limit = max(1, int(args.limit or 1))
|
|
2576
|
+
plan = _db_pick_salvage_candidates(args.batch_id, limit=salvage_limit)
|
|
2577
|
+
if not plan:
|
|
2578
|
+
print("[post_reddit] salvage: no eligible pending rows for this cycle")
|
|
2579
|
+
sys.exit(6)
|
|
2580
|
+
with open(args.out, "w") as f:
|
|
2581
|
+
json.dump(plan, f)
|
|
2582
|
+
urls = [d["thread_url"] for d in plan["decisions"]]
|
|
2583
|
+
print(f"[post_reddit] SALVAGED {plan['salvaged_count']} candidate(s) "
|
|
2584
|
+
f"(max attempt={plan['salvaged_attempt']}/{MAX_ATTEMPTS}) "
|
|
2585
|
+
f"project={plan['project_name']} urls={urls}")
|
|
2586
|
+
return
|
|
2587
|
+
|
|
2588
|
+
if args.phase == "discover":
|
|
2589
|
+
if not args.out:
|
|
2590
|
+
print("[post_reddit] ERROR: --phase discover requires --out PATH", file=sys.stderr)
|
|
2591
|
+
sys.exit(2)
|
|
2592
|
+
if not preflight_rate_limit():
|
|
2593
|
+
print("[post_reddit] rate-limited, discover skipped")
|
|
2594
|
+
sys.exit(3)
|
|
2595
|
+
excluded = [x.strip() for x in args.exclude.split(",") if x.strip()]
|
|
2596
|
+
plan = _discover_iteration(args, config, reddit_username, excluded)
|
|
2597
|
+
if plan is None:
|
|
2598
|
+
sys.exit(4)
|
|
2599
|
+
with open(args.out, "w") as f:
|
|
2600
|
+
json.dump(plan, f)
|
|
2601
|
+
if plan.get("dry_run"):
|
|
2602
|
+
sys.exit(0)
|
|
2603
|
+
if plan.get("error"):
|
|
2604
|
+
sys.exit(5)
|
|
2605
|
+
if not plan.get("decisions"):
|
|
2606
|
+
sys.exit(6)
|
|
2607
|
+
return
|
|
2608
|
+
|
|
2609
|
+
if args.phase == "draft":
|
|
2610
|
+
if not args.in_path or not os.path.exists(args.in_path):
|
|
2611
|
+
print(f"[post_reddit] ERROR: --phase draft requires --in PATH (got {args.in_path!r})",
|
|
2612
|
+
file=sys.stderr)
|
|
2613
|
+
sys.exit(2)
|
|
2614
|
+
if not args.out:
|
|
2615
|
+
print("[post_reddit] ERROR: --phase draft requires --out PATH", file=sys.stderr)
|
|
2616
|
+
sys.exit(2)
|
|
2617
|
+
with open(args.in_path) as f:
|
|
2618
|
+
plan = json.load(f)
|
|
2619
|
+
if not plan.get("decisions"):
|
|
2620
|
+
print("[post_reddit] draft: no survivors in plan, nothing to draft")
|
|
2621
|
+
sys.exit(6)
|
|
2622
|
+
plan = _draft_iteration(plan, config, reddit_username)
|
|
2623
|
+
with open(args.out, "w") as f:
|
|
2624
|
+
json.dump(plan, f)
|
|
2625
|
+
if plan.get("draft_error"):
|
|
2626
|
+
sys.exit(5)
|
|
2627
|
+
if not plan.get("decisions"):
|
|
2628
|
+
sys.exit(6)
|
|
2629
|
+
return
|
|
2630
|
+
|
|
2631
|
+
if args.phase == "post":
|
|
2632
|
+
if not args.in_path or not os.path.exists(args.in_path):
|
|
2633
|
+
print(f"[post_reddit] ERROR: --phase post requires --in PATH (got {args.in_path!r})", file=sys.stderr)
|
|
2634
|
+
sys.exit(2)
|
|
2635
|
+
with open(args.in_path) as f:
|
|
2636
|
+
plan = json.load(f)
|
|
2637
|
+
# Hard preflight: _post_iteration shells to reddit_browser.py, the only
|
|
2638
|
+
# Playwright importer on this rail. If the resolved interpreter can't
|
|
2639
|
+
# import it the owned runtime is missing/half-provisioned and every post
|
|
2640
|
+
# would die with CDP_ERROR. Fail LOUD with a distinct signal instead.
|
|
2641
|
+
# Gated on real decisions so an empty plan still exits clean.
|
|
2642
|
+
if plan.get("decisions"):
|
|
2643
|
+
_chk = subprocess.run(
|
|
2644
|
+
[PYTHON, "-c", "import playwright"],
|
|
2645
|
+
capture_output=True, text=True,
|
|
2646
|
+
)
|
|
2647
|
+
if _chk.returncode != 0:
|
|
2648
|
+
print(f"[post_reddit] FATAL runtime_incomplete: interpreter {PYTHON!r} "
|
|
2649
|
+
f"cannot import playwright — the owned Python runtime is missing or "
|
|
2650
|
+
f"unprovisioned. Run the `runtime` install (action:'install') before "
|
|
2651
|
+
f"posting. stderr: {(_chk.stderr or '').strip()[:300]}", file=sys.stderr)
|
|
2652
|
+
sys.exit(3)
|
|
2653
|
+
try:
|
|
2654
|
+
posted, failed = _post_iteration(plan, reddit_username)
|
|
2655
|
+
print(f"[post_reddit] phase=post project={plan.get('project_name')} posted={posted} failed={failed}")
|
|
2656
|
+
finally:
|
|
2657
|
+
# Clean up the generation_trace temp file. By this point every
|
|
2658
|
+
# post that landed has the trace JSONB persisted to its row,
|
|
2659
|
+
# so the on-disk file is redundant. Best-effort delete.
|
|
2660
|
+
try:
|
|
2661
|
+
from generation_trace import cleanup_trace_tempfile
|
|
2662
|
+
cleanup_trace_tempfile(plan.get("generation_trace_path"))
|
|
2663
|
+
except Exception:
|
|
2664
|
+
pass
|
|
2665
|
+
|
|
2666
|
+
|
|
2667
|
+
if __name__ == "__main__":
|
|
2668
|
+
main()
|