@m13v/s4l 1.6.197-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1314 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +497 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,2549 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Reddit browser automation functions for Social Autoposter.
|
|
3
|
+
|
|
4
|
+
Replaces multi-step Claude browser MCP calls with single Python function calls.
|
|
5
|
+
Each function does all browser work internally and returns structured JSON.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
# Post a top-level comment on a Reddit thread
|
|
9
|
+
python3 reddit_browser.py post-comment "https://old.reddit.com/r/sub/comments/abc/title/" "comment text"
|
|
10
|
+
|
|
11
|
+
# Reply to an existing comment
|
|
12
|
+
python3 reddit_browser.py reply "https://old.reddit.com/r/sub/comments/abc/title/def/" "reply text"
|
|
13
|
+
|
|
14
|
+
# Scan DM inbox for unread conversations
|
|
15
|
+
python3 reddit_browser.py unread-dms
|
|
16
|
+
|
|
17
|
+
# Read messages from a Reddit chat conversation
|
|
18
|
+
python3 reddit_browser.py read-conversation "https://www.reddit.com/chat/..."
|
|
19
|
+
|
|
20
|
+
# Send a DM in a Reddit chat
|
|
21
|
+
python3 reddit_browser.py send-dm "https://www.reddit.com/chat/..." "message text"
|
|
22
|
+
|
|
23
|
+
Requires: pip install playwright && playwright install chromium
|
|
24
|
+
|
|
25
|
+
Connects to the running reddit-agent MCP browser via CDP (Chrome DevTools Protocol)
|
|
26
|
+
to reuse the existing logged-in session.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
import atexit
|
|
30
|
+
import json
|
|
31
|
+
import os
|
|
32
|
+
import random
|
|
33
|
+
import re
|
|
34
|
+
import subprocess
|
|
35
|
+
import sys
|
|
36
|
+
import time
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _bh_activity_log(action: str, cdp_url: str) -> None:
|
|
40
|
+
"""Append to the universal browser-activity.log (Python-CDP path coverage)."""
|
|
41
|
+
try:
|
|
42
|
+
import time as _t
|
|
43
|
+
import os as _o
|
|
44
|
+
from pathlib import Path as _P
|
|
45
|
+
_p = _P(_o.environ.get(
|
|
46
|
+
"BH_ACTIVITY_LOG",
|
|
47
|
+
str(_P.home() / ".claude" / "browser-profiles" / "browser-activity.log"),
|
|
48
|
+
))
|
|
49
|
+
_port = (cdp_url or "").rsplit(":", 1)[-1].split("/")[0] or "-"
|
|
50
|
+
_p.parent.mkdir(parents=True, exist_ok=True)
|
|
51
|
+
with _p.open("a") as _f:
|
|
52
|
+
_f.write(
|
|
53
|
+
f"[{_t.strftime('%Y-%m-%d %H:%M:%S')}] pycdp "
|
|
54
|
+
f"script={_o.path.basename(__file__)} action={action} "
|
|
55
|
+
f"pid={_o.getpid()} ppid={_o.getppid()} cdp={cdp_url or '-'} "
|
|
56
|
+
f"port={_port}\n"
|
|
57
|
+
)
|
|
58
|
+
except Exception:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
PROFILE_DIR = os.path.expanduser("~/.claude/browser-profiles/reddit")
|
|
63
|
+
LOCK_FILE = os.path.expanduser("~/.claude/reddit-agent-lock.json")
|
|
64
|
+
LOCK_EXPIRY = 300 # Must match reddit-agent-lock.sh
|
|
65
|
+
LOCK_WAIT_MAX = 45 # seconds to wait for lock to free before giving up
|
|
66
|
+
LOCK_POLL_INTERVAL = 2
|
|
67
|
+
|
|
68
|
+
# Side log for tool-layer diagnostics. Stdout would corrupt the JSON contract
|
|
69
|
+
# every CLI caller relies on; stderr is dropped on the floor by both
|
|
70
|
+
# subprocess.check_output(stderr=DEVNULL) callers AND by claude -p without
|
|
71
|
+
# --output-format stream-json. A side file is the only place these lines
|
|
72
|
+
# survive the round-trip, so verification (e.g. "did the suffix gate fire
|
|
73
|
+
# this run") becomes a cheap grep.
|
|
74
|
+
DIAG_LOG = os.path.expanduser("~/social-autoposter/skill/logs/reddit_browser_diag.log")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _diag_log(msg):
|
|
78
|
+
try:
|
|
79
|
+
os.makedirs(os.path.dirname(DIAG_LOG), exist_ok=True)
|
|
80
|
+
with open(DIAG_LOG, "a") as f:
|
|
81
|
+
from datetime import datetime
|
|
82
|
+
f.write(f"{datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')} {msg}\n")
|
|
83
|
+
except Exception:
|
|
84
|
+
pass
|
|
85
|
+
VIEWPORT = {"width": 911, "height": 1016}
|
|
86
|
+
|
|
87
|
+
# Load Reddit username from config.
|
|
88
|
+
# Prefers the new top-level `reddit_account.username` (2026-05-15) over the
|
|
89
|
+
# legacy `accounts.reddit.username` path. Drift between the two silently
|
|
90
|
+
# broke the post-permalink lookup on the VM (wrong username → JS finds 0
|
|
91
|
+
# matching comments → permalink=None → pipeline records `failed` despite
|
|
92
|
+
# the comment landing on Reddit).
|
|
93
|
+
_config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "config.json")
|
|
94
|
+
OUR_USERNAME = "Deep_Ad1959"
|
|
95
|
+
if os.path.exists(_config_path):
|
|
96
|
+
try:
|
|
97
|
+
with open(_config_path) as f:
|
|
98
|
+
_cfg = json.load(f)
|
|
99
|
+
OUR_USERNAME = (
|
|
100
|
+
(_cfg.get("reddit_account") or {}).get("username")
|
|
101
|
+
or _cfg.get("accounts", {}).get("reddit", {}).get("username")
|
|
102
|
+
or OUR_USERNAME
|
|
103
|
+
)
|
|
104
|
+
except Exception:
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _ban_entry_to_slug(entry):
|
|
109
|
+
"""Extract sub slug from a comment_blocked / thread_blocked entry.
|
|
110
|
+
|
|
111
|
+
Handles both shapes: bare string (pre-2026-05-11) and audit dict
|
|
112
|
+
{"sub": ..., "added_at": ..., "reason": ..., "project": ...}.
|
|
113
|
+
Returns lowercased slug or None.
|
|
114
|
+
"""
|
|
115
|
+
if isinstance(entry, str):
|
|
116
|
+
s = entry.strip().lower()
|
|
117
|
+
return s or None
|
|
118
|
+
if isinstance(entry, dict):
|
|
119
|
+
s = (entry.get("sub") or "").strip().lower()
|
|
120
|
+
return s or None
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _load_comment_blocked_subs():
|
|
125
|
+
"""Return the set of subreddits (lowercased) we cannot post comments in.
|
|
126
|
+
|
|
127
|
+
Mirrors reddit_tools._load_comment_blocked_subs so the reply path can
|
|
128
|
+
pre-flight without taking that import (and its db dependency).
|
|
129
|
+
|
|
130
|
+
Scope model (2026-05-19 cleanup): comment_blocked entries are always
|
|
131
|
+
account-level. Filter by the entry's `account` field against the local
|
|
132
|
+
machine's reddit_account.username so this MacBook's bans don't suppress
|
|
133
|
+
subs on the sandbox VM (which posts as a different account). The
|
|
134
|
+
legacy `project` field on entries is IGNORED here too — comment_blocked
|
|
135
|
+
is account-scoped by nature; project-specific rejects live in
|
|
136
|
+
project_search_excludes.
|
|
137
|
+
|
|
138
|
+
Handles both ban-list shapes: bare-string entries (pre-2026-05-11) and
|
|
139
|
+
audit dicts {"sub": ..., "added_at": ..., "reason": ..., "account": ...}.
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
with open(_config_path) as f:
|
|
143
|
+
cfg = json.load(f)
|
|
144
|
+
current_account = (cfg.get("reddit_account") or {}).get("username") or None
|
|
145
|
+
blocked = set()
|
|
146
|
+
bans = cfg.get("subreddit_bans") or {}
|
|
147
|
+
if isinstance(bans, dict):
|
|
148
|
+
for entry in bans.get("comment_blocked") or []:
|
|
149
|
+
slug = _ban_entry_to_slug(entry)
|
|
150
|
+
if not slug:
|
|
151
|
+
continue
|
|
152
|
+
entry_account = None
|
|
153
|
+
if isinstance(entry, dict):
|
|
154
|
+
entry_account = entry.get("account") or None
|
|
155
|
+
# account=null = global (apply on every account; back-compat).
|
|
156
|
+
# account=set + mismatch = skip; this entry belongs to a
|
|
157
|
+
# different machine's account.
|
|
158
|
+
if (entry_account is not None and current_account is not None
|
|
159
|
+
and entry_account.lower() != current_account.lower()):
|
|
160
|
+
continue
|
|
161
|
+
blocked.add(slug)
|
|
162
|
+
for s in cfg.get("exclusions", {}).get("subreddits", []):
|
|
163
|
+
blocked.add(s.lower())
|
|
164
|
+
return blocked
|
|
165
|
+
except Exception:
|
|
166
|
+
return set()
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _subreddit_from_permalink(url):
|
|
170
|
+
"""Extract subreddit name (lowercased, no r/ prefix) from a Reddit URL."""
|
|
171
|
+
if not url:
|
|
172
|
+
return None
|
|
173
|
+
m = re.search(r"/r/([^/?#]+)", url)
|
|
174
|
+
return m.group(1).lower() if m else None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def find_reddit_cdp_port():
|
|
178
|
+
"""Find the CDP port of the running reddit-agent MCP browser.
|
|
179
|
+
|
|
180
|
+
Scans all Chrome/Chromium processes for remote-debugging-port flags,
|
|
181
|
+
then queries each port's /json endpoint for pages with reddit.com
|
|
182
|
+
or old.reddit.com URLs. Strongly prefers old.reddit.com pages
|
|
183
|
+
(the MCP agent browser) over new reddit pages.
|
|
184
|
+
"""
|
|
185
|
+
try:
|
|
186
|
+
ps_out = subprocess.check_output(
|
|
187
|
+
["ps", "aux"], text=True, stderr=subprocess.DEVNULL
|
|
188
|
+
)
|
|
189
|
+
ports = set()
|
|
190
|
+
for line in ps_out.splitlines():
|
|
191
|
+
if "chromium" not in line.lower() and "chrome" not in line.lower():
|
|
192
|
+
continue
|
|
193
|
+
m = re.search(r"remote-debugging-port=(\d+)", line)
|
|
194
|
+
if m:
|
|
195
|
+
ports.add(int(m.group(1)))
|
|
196
|
+
|
|
197
|
+
import urllib.request
|
|
198
|
+
|
|
199
|
+
old_reddit_port = None
|
|
200
|
+
new_reddit_port = None
|
|
201
|
+
any_reddit_port = None
|
|
202
|
+
for port in sorted(ports):
|
|
203
|
+
try:
|
|
204
|
+
resp = urllib.request.urlopen(
|
|
205
|
+
f"http://localhost:{port}/json", timeout=2
|
|
206
|
+
)
|
|
207
|
+
pages = json.loads(resp.read())
|
|
208
|
+
reddit_urls = [
|
|
209
|
+
p.get("url", "")
|
|
210
|
+
for p in pages
|
|
211
|
+
if "reddit.com" in p.get("url", "")
|
|
212
|
+
]
|
|
213
|
+
if not reddit_urls:
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
# Strongly prefer old.reddit.com (the MCP agent browser)
|
|
217
|
+
has_old = any(
|
|
218
|
+
"old.reddit.com" in u and "login" not in u
|
|
219
|
+
for u in reddit_urls
|
|
220
|
+
)
|
|
221
|
+
if has_old and not old_reddit_port:
|
|
222
|
+
old_reddit_port = port
|
|
223
|
+
|
|
224
|
+
# New reddit with actual content pages
|
|
225
|
+
has_new = any(
|
|
226
|
+
("/r/" in u or "/chat" in u or "/message" in u
|
|
227
|
+
or "reddit.com/u/" in u)
|
|
228
|
+
and "old.reddit.com" not in u
|
|
229
|
+
and "login" not in u
|
|
230
|
+
for u in reddit_urls
|
|
231
|
+
)
|
|
232
|
+
if has_new and not new_reddit_port:
|
|
233
|
+
new_reddit_port = port
|
|
234
|
+
|
|
235
|
+
if not any_reddit_port:
|
|
236
|
+
any_reddit_port = port
|
|
237
|
+
except Exception:
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
return old_reddit_port or new_reddit_port or any_reddit_port
|
|
241
|
+
except Exception:
|
|
242
|
+
pass
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
_LOCK_SESSION_ID = f"python:{os.getpid()}"
|
|
247
|
+
|
|
248
|
+
# Path to the bash-lock lease helper. Bumping this lease from inside reddit_browser.py
|
|
249
|
+
# is what shields Python-CDP pipelines (run-reddit-search, audit-reddit-resurrect,
|
|
250
|
+
# stats.sh reddit phase, engage-reddit) from the watchdog's 60-90s reclaim. Those
|
|
251
|
+
# pipelines never go through MCP, so the MCP PreToolUse heartbeat hook never fires
|
|
252
|
+
# for them. Each subprocess invocation of reddit_browser.py is a CDP step, so
|
|
253
|
+
# bumping `expires_at` on every subprocess start gives the watchdog a clear "this
|
|
254
|
+
# pipeline is alive and using the browser" signal.
|
|
255
|
+
_BASH_LEASE_HEARTBEAT_PATH = os.path.join(
|
|
256
|
+
os.path.dirname(os.path.abspath(__file__)), "reddit_browser_lock.py"
|
|
257
|
+
)
|
|
258
|
+
_BASH_LEASE_TTL_SEC = 90
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _heartbeat_bash_lease():
|
|
262
|
+
"""Best-effort: bump the bash-lock lease's expires_at by `_BASH_LEASE_TTL_SEC`.
|
|
263
|
+
|
|
264
|
+
Silent on every outcome (OK / NOT_HELD / HELD_BY_OTHER / errors). This is a
|
|
265
|
+
pure peace-keeping signal to the watchdog, not load-bearing for correctness.
|
|
266
|
+
Times out fast (3s) so a hung lease helper can't stall a CDP step.
|
|
267
|
+
|
|
268
|
+
NOT_HELD is fine: the bash lock may genuinely not be acquired (e.g. ad-hoc
|
|
269
|
+
use of reddit_browser.py outside a pipeline). HELD_BY_OTHER is also fine:
|
|
270
|
+
a peer holds the bash lock; we shouldn't touch their lease.
|
|
271
|
+
"""
|
|
272
|
+
try:
|
|
273
|
+
subprocess.run(
|
|
274
|
+
["python3", _BASH_LEASE_HEARTBEAT_PATH, "heartbeat",
|
|
275
|
+
"--ttl", str(_BASH_LEASE_TTL_SEC)],
|
|
276
|
+
capture_output=True, timeout=3, check=False,
|
|
277
|
+
)
|
|
278
|
+
except Exception:
|
|
279
|
+
pass # Best-effort. Never fail a CDP op because of lease bookkeeping.
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _release_browser_lock():
|
|
283
|
+
"""Release the lock if we hold it."""
|
|
284
|
+
try:
|
|
285
|
+
if os.path.exists(LOCK_FILE):
|
|
286
|
+
with open(LOCK_FILE) as f:
|
|
287
|
+
lock = json.load(f)
|
|
288
|
+
if lock.get("session_id") == _LOCK_SESSION_ID:
|
|
289
|
+
os.remove(LOCK_FILE)
|
|
290
|
+
except (json.JSONDecodeError, OSError):
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
atexit.register(_release_browser_lock)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _acquire_browser_lock():
|
|
298
|
+
"""Wait for the Reddit browser lock to free, then acquire it.
|
|
299
|
+
|
|
300
|
+
Polls every LOCK_POLL_INTERVAL seconds for up to LOCK_WAIT_MAX seconds.
|
|
301
|
+
Exits 1 only after exhausting the wait budget; the launchd caller will
|
|
302
|
+
retry on the next 5-min tick.
|
|
303
|
+
"""
|
|
304
|
+
deadline = time.time() + LOCK_WAIT_MAX
|
|
305
|
+
while True:
|
|
306
|
+
if os.path.exists(LOCK_FILE):
|
|
307
|
+
try:
|
|
308
|
+
with open(LOCK_FILE) as f:
|
|
309
|
+
lock = json.load(f)
|
|
310
|
+
age = time.time() - lock.get("timestamp", 0)
|
|
311
|
+
if age >= LOCK_EXPIRY:
|
|
312
|
+
break
|
|
313
|
+
holder = lock.get("session_id", "unknown")
|
|
314
|
+
if time.time() >= deadline:
|
|
315
|
+
print(json.dumps({
|
|
316
|
+
"success": False,
|
|
317
|
+
"error": f"Reddit browser locked by session {holder} ({int(age)}s); waited {LOCK_WAIT_MAX}s, giving up."
|
|
318
|
+
}))
|
|
319
|
+
sys.exit(1)
|
|
320
|
+
time.sleep(LOCK_POLL_INTERVAL)
|
|
321
|
+
continue
|
|
322
|
+
except (json.JSONDecodeError, OSError):
|
|
323
|
+
pass
|
|
324
|
+
break
|
|
325
|
+
with open(LOCK_FILE, "w") as f:
|
|
326
|
+
json.dump({"session_id": _LOCK_SESSION_ID, "timestamp": int(time.time())}, f)
|
|
327
|
+
# Also bump the bash-lock lease so the watchdog respects this CDP burst.
|
|
328
|
+
# See _heartbeat_bash_lease() for why: Python-CDP pipelines never trigger
|
|
329
|
+
# the MCP PreToolUse heartbeat hook, so without this call the watchdog
|
|
330
|
+
# would steal the bash lock at age >= 60s during legitimate CDP work.
|
|
331
|
+
_heartbeat_bash_lease()
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def _refresh_browser_lock():
|
|
335
|
+
"""Refresh the lock timestamp to prevent expiry during long operations.
|
|
336
|
+
|
|
337
|
+
Also bumps the bash-lock lease so the watchdog won't reclaim during
|
|
338
|
+
multi-step CDP ops. Call this from inside long CDP flows (scroll loops,
|
|
339
|
+
multi-second waits) to keep both the python lock file mtime AND the bash
|
|
340
|
+
lease fresh.
|
|
341
|
+
"""
|
|
342
|
+
try:
|
|
343
|
+
with open(LOCK_FILE, "w") as f:
|
|
344
|
+
json.dump({"session_id": _LOCK_SESSION_ID, "timestamp": int(time.time())}, f)
|
|
345
|
+
except OSError:
|
|
346
|
+
pass
|
|
347
|
+
_heartbeat_bash_lease()
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def get_browser_and_page(playwright):
|
|
351
|
+
"""Get a logged-in Reddit page, preferring CDP-attach over launch_persistent_context.
|
|
352
|
+
|
|
353
|
+
Two paths:
|
|
354
|
+
1. CDP-attach (preferred on appmaker/e2b VM and any host running a visible
|
|
355
|
+
logged-in Chromium): connect to the existing browser, find a context with
|
|
356
|
+
a live reddit_session cookie, open a NEW PAGE on that context.
|
|
357
|
+
2. launch_persistent_context fallback: when CDP isn't available OR contexts
|
|
358
|
+
have no reddit_session (laptop where reddit-agent MCP isolates its session
|
|
359
|
+
in an invisible context).
|
|
360
|
+
|
|
361
|
+
Why CDP-attach matters: appmaker's visible Chromium permanently holds
|
|
362
|
+
/root/.chromium-profile. launch_persistent_context collides on profile leveldb
|
|
363
|
+
locks, loads a partial session, and EVERY post returns account_blocked_in_sub
|
|
364
|
+
because the comment form never renders. Attaching to the live context dodges
|
|
365
|
+
the collision entirely.
|
|
366
|
+
|
|
367
|
+
Returns (browser, page, is_cdp). When is_cdp=True, callers must close ONLY
|
|
368
|
+
the page (not page.context) and NOT the browser; closing context[0] or the
|
|
369
|
+
CDP browser would kill the user's visible session.
|
|
370
|
+
"""
|
|
371
|
+
_acquire_browser_lock()
|
|
372
|
+
|
|
373
|
+
# Preferred: explicit harness CDP endpoint (REDDIT_CDP_URL, set by
|
|
374
|
+
# skill/lib/reddit-backend.sh -> http://127.0.0.1:9557). When present we
|
|
375
|
+
# attach DIRECTLY to that URL and skip the ps-based port scan entirely.
|
|
376
|
+
# This is the reddit-harness migration path (2026-05-29): the whole Reddit
|
|
377
|
+
# pipeline rides a dedicated browser-harness Chrome on port 9557, profile
|
|
378
|
+
# ~/.claude/browser-profiles/reddit-harness, mirroring twitter-harness.
|
|
379
|
+
# Mirrors twitter's TWITTER_CDP_URL direct-attach pattern.
|
|
380
|
+
cdp_url_env = (os.environ.get("REDDIT_CDP_URL") or "").strip()
|
|
381
|
+
if cdp_url_env:
|
|
382
|
+
try:
|
|
383
|
+
cdp_browser = playwright.chromium.connect_over_cdp(cdp_url_env)
|
|
384
|
+
_bh_activity_log("attach_harness", cdp_url_env)
|
|
385
|
+
# Prefer a context that already carries a live reddit_session; else
|
|
386
|
+
# fall back to the first context (harness is single-profile, logged in).
|
|
387
|
+
chosen = None
|
|
388
|
+
for ctx in cdp_browser.contexts:
|
|
389
|
+
try:
|
|
390
|
+
cookies = ctx.cookies("https://www.reddit.com/")
|
|
391
|
+
except Exception:
|
|
392
|
+
cookies = []
|
|
393
|
+
if any(c.get("name") == "reddit_session" and c.get("value") for c in cookies):
|
|
394
|
+
chosen = ctx
|
|
395
|
+
break
|
|
396
|
+
if chosen is None and cdp_browser.contexts:
|
|
397
|
+
chosen = cdp_browser.contexts[0]
|
|
398
|
+
if chosen is not None:
|
|
399
|
+
# Reuse an existing tab instead of opening a new one. new_page()
|
|
400
|
+
# steals OS focus every call (annoying when working in other apps);
|
|
401
|
+
# navigating a background tab does not. Mirrors twitter_browser.
|
|
402
|
+
# Prefer a tab already on reddit.com (not login); else pages[0];
|
|
403
|
+
# else, only if the context has zero tabs, create one. The caller
|
|
404
|
+
# navigates it and leaves it open (finally blocks don't close CDP tabs).
|
|
405
|
+
for pg in chosen.pages:
|
|
406
|
+
if "reddit.com" in (pg.url or "") and "login" not in (pg.url or ""):
|
|
407
|
+
return cdp_browser, pg, True
|
|
408
|
+
if chosen.pages:
|
|
409
|
+
return cdp_browser, chosen.pages[0], True
|
|
410
|
+
page = chosen.new_page()
|
|
411
|
+
return cdp_browser, page, True
|
|
412
|
+
# No usable context: do NOT close the CDP browser (would kill the
|
|
413
|
+
# harness Chrome); just disconnect by falling through.
|
|
414
|
+
except Exception:
|
|
415
|
+
pass
|
|
416
|
+
|
|
417
|
+
cdp_port = find_reddit_cdp_port()
|
|
418
|
+
|
|
419
|
+
if cdp_port:
|
|
420
|
+
try:
|
|
421
|
+
cdp_browser = playwright.chromium.connect_over_cdp(f"http://localhost:{cdp_port}")
|
|
422
|
+
_bh_activity_log("attach_legacy", f"http://localhost:{cdp_port}")
|
|
423
|
+
for ctx in cdp_browser.contexts:
|
|
424
|
+
try:
|
|
425
|
+
cookies = ctx.cookies("https://www.reddit.com/")
|
|
426
|
+
except Exception:
|
|
427
|
+
cookies = []
|
|
428
|
+
has_session = any(
|
|
429
|
+
c.get("name") == "reddit_session" and c.get("value")
|
|
430
|
+
for c in cookies
|
|
431
|
+
)
|
|
432
|
+
if has_session:
|
|
433
|
+
# Reuse an existing tab (no focus-steal); only new_page if none.
|
|
434
|
+
for pg in ctx.pages:
|
|
435
|
+
if "reddit.com" in (pg.url or "") and "login" not in (pg.url or ""):
|
|
436
|
+
return cdp_browser, pg, True
|
|
437
|
+
if ctx.pages:
|
|
438
|
+
return cdp_browser, ctx.pages[0], True
|
|
439
|
+
page = ctx.new_page()
|
|
440
|
+
return cdp_browser, page, True
|
|
441
|
+
try:
|
|
442
|
+
cdp_browser.close()
|
|
443
|
+
except Exception:
|
|
444
|
+
pass
|
|
445
|
+
except Exception:
|
|
446
|
+
pass
|
|
447
|
+
|
|
448
|
+
# Fallback: launch our own persistent context against PROFILE_DIR.
|
|
449
|
+
# Retry on Chromium SingletonLock collisions (MCP holds the OS-level profile
|
|
450
|
+
# lock for its entire server lifetime; the JSON lock can expire while the
|
|
451
|
+
# OS lock is still held).
|
|
452
|
+
deadline = time.time() + LOCK_WAIT_MAX
|
|
453
|
+
last_err = None
|
|
454
|
+
while True:
|
|
455
|
+
try:
|
|
456
|
+
context = playwright.chromium.launch_persistent_context(
|
|
457
|
+
PROFILE_DIR,
|
|
458
|
+
headless=True,
|
|
459
|
+
args=["--disable-blink-features=AutomationControlled"],
|
|
460
|
+
viewport=VIEWPORT,
|
|
461
|
+
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
462
|
+
)
|
|
463
|
+
break
|
|
464
|
+
except Exception as e:
|
|
465
|
+
last_err = e
|
|
466
|
+
if time.time() >= deadline:
|
|
467
|
+
_release_browser_lock()
|
|
468
|
+
print(json.dumps({
|
|
469
|
+
"success": False,
|
|
470
|
+
"error": f"chromium profile locked by another process; waited {LOCK_WAIT_MAX}s: {e}"
|
|
471
|
+
}))
|
|
472
|
+
sys.exit(1)
|
|
473
|
+
time.sleep(LOCK_POLL_INTERVAL)
|
|
474
|
+
page = context.new_page()
|
|
475
|
+
return context, page, False
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def _to_old_reddit(url):
|
|
479
|
+
"""Convert any reddit URL to old.reddit.com."""
|
|
480
|
+
url = re.sub(r"https?://(www\.)?reddit\.com", "https://old.reddit.com", url)
|
|
481
|
+
# Remove trailing query params that old reddit doesn't use
|
|
482
|
+
url = re.sub(r"\?.*$", "", url)
|
|
483
|
+
return url
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _ensure_old_reddit(page):
|
|
487
|
+
"""If page redirected to new reddit, navigate to old.reddit.com equivalent."""
|
|
488
|
+
current = page.url
|
|
489
|
+
if "old.reddit.com" in current:
|
|
490
|
+
return
|
|
491
|
+
if "reddit.com" in current and "old.reddit.com" not in current:
|
|
492
|
+
old_url = _to_old_reddit(current)
|
|
493
|
+
page.goto(old_url, wait_until="domcontentloaded")
|
|
494
|
+
page.wait_for_timeout(3000)
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def post_comment(thread_url, text):
|
|
498
|
+
"""Post a top-level comment on a Reddit thread.
|
|
499
|
+
|
|
500
|
+
Navigates to old.reddit.com thread, finds the comment textarea,
|
|
501
|
+
types the comment text, and submits.
|
|
502
|
+
|
|
503
|
+
Returns: {"ok": true, "permalink": "..."} or {"ok": false, "error": "..."}
|
|
504
|
+
"""
|
|
505
|
+
from playwright.sync_api import sync_playwright
|
|
506
|
+
|
|
507
|
+
with sync_playwright() as p:
|
|
508
|
+
browser, page, is_cdp = get_browser_and_page(p)
|
|
509
|
+
|
|
510
|
+
try:
|
|
511
|
+
old_url = _to_old_reddit(thread_url)
|
|
512
|
+
page.goto(old_url, wait_until="domcontentloaded")
|
|
513
|
+
page.wait_for_timeout(3000)
|
|
514
|
+
_ensure_old_reddit(page)
|
|
515
|
+
|
|
516
|
+
# Check if thread exists using visible content only (old reddit hides
|
|
517
|
+
# template strings like "there doesn't seem to be anything here" in the
|
|
518
|
+
# page markup on every page, so text_content("body") gives false positives).
|
|
519
|
+
content_el = page.locator("#siteTable, .sitetable.linklisting").first
|
|
520
|
+
try:
|
|
521
|
+
content_el.wait_for(state="attached", timeout=5000)
|
|
522
|
+
except Exception:
|
|
523
|
+
return {"ok": False, "error": "thread_not_found"}
|
|
524
|
+
|
|
525
|
+
# A real 404 page shows an interstitial with class "interstitial"
|
|
526
|
+
if page.locator(".interstitial").count() > 0:
|
|
527
|
+
interstitial_text = page.locator(".interstitial").first.text_content() or ""
|
|
528
|
+
if "page not found" in interstitial_text.lower():
|
|
529
|
+
return {"ok": False, "error": "thread_not_found"}
|
|
530
|
+
if "this is an archived post" in interstitial_text.lower():
|
|
531
|
+
return {"ok": False, "error": "thread_archived"}
|
|
532
|
+
|
|
533
|
+
# Check if the THREAD itself is locked. Must be scoped to the OP
|
|
534
|
+
# container (`#siteTable .thing.self`). A bare `.locked-tagline`
|
|
535
|
+
# lookup matches the per-comment "locked comment" badge that subs
|
|
536
|
+
# like r/selfhosted use on their stickied moderator comments, which
|
|
537
|
+
# caused false-positive thread_locked errors for ~3 NightOwl posts
|
|
538
|
+
# on 2026-05-18 against perfectly open threads.
|
|
539
|
+
if page.locator("#siteTable .thing.self .locked-tagline").count() > 0:
|
|
540
|
+
return {"ok": False, "error": "thread_locked"}
|
|
541
|
+
|
|
542
|
+
# Check if we're actually logged in (login redirect or no user element)
|
|
543
|
+
if "login" in page.url.lower():
|
|
544
|
+
return {"ok": False, "error": "not_logged_in"}
|
|
545
|
+
|
|
546
|
+
# Check if the top-level comment form exists at all.
|
|
547
|
+
# When the sub gates top-level commenting on this account (CrowdControl,
|
|
548
|
+
# AutoMod karma/age threshold, mod-approved-only, shadowban), old reddit
|
|
549
|
+
# silently omits the form for us while still rendering the rest of the
|
|
550
|
+
# page. The sub itself may be public; the gate is account-level. There
|
|
551
|
+
# is no error banner and no API field that exposes this, so the only
|
|
552
|
+
# signal is the missing form on a logged-in page load.
|
|
553
|
+
has_comment_form = page.locator(
|
|
554
|
+
".commentarea .usertext.cloneable, .commentarea > form.usertext"
|
|
555
|
+
).count() > 0
|
|
556
|
+
if not has_comment_form:
|
|
557
|
+
return {"ok": False, "error": "account_blocked_in_sub"}
|
|
558
|
+
|
|
559
|
+
# Some subs render the form but show a gate notice instead of a usable
|
|
560
|
+
# textarea (CrowdControl on AutoMod-flagged users, "you must have X karma
|
|
561
|
+
# to comment in r/sub", subreddit quarantine consent, etc.). Detect these
|
|
562
|
+
# before burning 5+3s of textarea polling. The "infobar" / "md-container"
|
|
563
|
+
# banner above .commentarea carries the gate text. We pattern-match a few
|
|
564
|
+
# well-known phrases so we return early with the correct error.
|
|
565
|
+
gate_phrases = [
|
|
566
|
+
"you must be a subscriber",
|
|
567
|
+
"you don't have permission to comment",
|
|
568
|
+
"you must have at least",
|
|
569
|
+
"minimum karma",
|
|
570
|
+
"minimum account age",
|
|
571
|
+
"crowdcontrol",
|
|
572
|
+
"this community has restricted",
|
|
573
|
+
"verified email",
|
|
574
|
+
"only approved users",
|
|
575
|
+
"you must agree", # quarantine consent
|
|
576
|
+
]
|
|
577
|
+
try:
|
|
578
|
+
preamble = (page.locator(
|
|
579
|
+
".commentarea, .infobar, .md-container, .interstitial"
|
|
580
|
+
).first.text_content(timeout=1500) or "").lower()
|
|
581
|
+
if any(p in preamble for p in gate_phrases):
|
|
582
|
+
return {"ok": False, "error": "account_blocked_in_sub"}
|
|
583
|
+
except Exception:
|
|
584
|
+
pass
|
|
585
|
+
|
|
586
|
+
# Find the top-level comment form textarea.
|
|
587
|
+
comment_form = page.locator(
|
|
588
|
+
".commentarea > form.usertext textarea, "
|
|
589
|
+
".commentarea > .usertext-edit textarea, "
|
|
590
|
+
".commentarea > .usertext textarea"
|
|
591
|
+
).first
|
|
592
|
+
|
|
593
|
+
try:
|
|
594
|
+
comment_form.wait_for(state="visible", timeout=5000)
|
|
595
|
+
except Exception:
|
|
596
|
+
# Broader fallback: any textarea in the comment area that's
|
|
597
|
+
# NOT inside a .comment (those are reply forms)
|
|
598
|
+
try:
|
|
599
|
+
comment_form = page.locator(
|
|
600
|
+
".commentarea textarea"
|
|
601
|
+
).first
|
|
602
|
+
comment_form.wait_for(state="visible", timeout=3000)
|
|
603
|
+
except Exception:
|
|
604
|
+
return {"ok": False, "error": "comment_box_not_found"}
|
|
605
|
+
|
|
606
|
+
# Even if the textarea is "visible", a sub may render it disabled or
|
|
607
|
+
# with a readonly attribute (some quarantined / restricted-mode subs do
|
|
608
|
+
# this). Fail fast as account_blocked_in_sub so salvage doesn't keep
|
|
609
|
+
# retrying the same dead thread.
|
|
610
|
+
try:
|
|
611
|
+
is_disabled = comment_form.evaluate(
|
|
612
|
+
"el => !!(el.disabled || el.readOnly || "
|
|
613
|
+
"el.getAttribute('aria-disabled') === 'true' || "
|
|
614
|
+
"el.closest('.disabled,.usertext-disabled'))"
|
|
615
|
+
)
|
|
616
|
+
if is_disabled:
|
|
617
|
+
return {"ok": False, "error": "account_blocked_in_sub"}
|
|
618
|
+
except Exception:
|
|
619
|
+
pass
|
|
620
|
+
|
|
621
|
+
# Fill the textarea (old reddit uses standard textareas)
|
|
622
|
+
comment_form.fill(text)
|
|
623
|
+
page.wait_for_timeout(1000)
|
|
624
|
+
|
|
625
|
+
# Click the save/submit button
|
|
626
|
+
save_btn = page.locator(
|
|
627
|
+
".commentarea button.save[type='submit'], "
|
|
628
|
+
".commentarea > form.usertext button[type='submit'], "
|
|
629
|
+
".commentarea > .usertext button[type='submit'], "
|
|
630
|
+
".commentarea > .usertext-edit button[type='submit']"
|
|
631
|
+
).first
|
|
632
|
+
|
|
633
|
+
try:
|
|
634
|
+
save_btn.wait_for(state="visible", timeout=3000)
|
|
635
|
+
save_btn.click()
|
|
636
|
+
except Exception:
|
|
637
|
+
# Fallback: find any visible save button in the comment area
|
|
638
|
+
try:
|
|
639
|
+
save_btn = page.locator(
|
|
640
|
+
".commentarea button:has-text('save')"
|
|
641
|
+
).first
|
|
642
|
+
save_btn.click()
|
|
643
|
+
except Exception:
|
|
644
|
+
return {"ok": False, "error": "save_button_not_found"}
|
|
645
|
+
|
|
646
|
+
page.wait_for_timeout(5000)
|
|
647
|
+
|
|
648
|
+
# Check for errors (rate limit, etc.)
|
|
649
|
+
error_el = page.locator(".status.error, .error").first
|
|
650
|
+
try:
|
|
651
|
+
if error_el.is_visible():
|
|
652
|
+
error_text = error_el.text_content() or "unknown_error"
|
|
653
|
+
return {"ok": False, "error": error_text.strip()}
|
|
654
|
+
except Exception:
|
|
655
|
+
pass
|
|
656
|
+
|
|
657
|
+
# Capture the final URL. On a successful submit, old.reddit
|
|
658
|
+
# redirects to the new permalink (.../comments/<thread>/.../<comment_id>/),
|
|
659
|
+
# so a URL still equal to the thread URL is a strong signal the
|
|
660
|
+
# comment never landed (silent shadow-reject / anti-spam).
|
|
661
|
+
try:
|
|
662
|
+
final_url = page.url
|
|
663
|
+
except Exception:
|
|
664
|
+
final_url = ""
|
|
665
|
+
|
|
666
|
+
# Try to find the permalink of our new comment
|
|
667
|
+
permalink = page.evaluate("""(ourUsername) => {
|
|
668
|
+
// Find comments by our username, get the last one (most recent)
|
|
669
|
+
const authorLinks = document.querySelectorAll(
|
|
670
|
+
'.comment a.author[href*="/' + ourUsername + '"]'
|
|
671
|
+
);
|
|
672
|
+
if (authorLinks.length === 0) return null;
|
|
673
|
+
const lastAuthor = authorLinks[authorLinks.length - 1];
|
|
674
|
+
// Walk up to the .comment container
|
|
675
|
+
let comment = lastAuthor.closest('.comment');
|
|
676
|
+
if (!comment) return null;
|
|
677
|
+
// Find the permalink
|
|
678
|
+
const perma = comment.querySelector('a.bylink[href*="/comments/"]');
|
|
679
|
+
if (perma) return perma.getAttribute('href');
|
|
680
|
+
return null;
|
|
681
|
+
}""", OUR_USERNAME)
|
|
682
|
+
|
|
683
|
+
if not permalink:
|
|
684
|
+
# Dump HTML + screenshot so we can post-mortem (silent shadow-reject
|
|
685
|
+
# vs slow render vs DOM selector miss). final_url tells us which.
|
|
686
|
+
try:
|
|
687
|
+
debug_dir = os.path.join(
|
|
688
|
+
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
|
689
|
+
"log",
|
|
690
|
+
)
|
|
691
|
+
os.makedirs(debug_dir, exist_ok=True)
|
|
692
|
+
stamp = time.strftime("%Y%m%d-%H%M%S", time.gmtime())
|
|
693
|
+
base = os.path.join(
|
|
694
|
+
debug_dir,
|
|
695
|
+
f"reddit_browser_post_fail_{stamp}_{os.getpid()}",
|
|
696
|
+
)
|
|
697
|
+
try:
|
|
698
|
+
html = page.content()[:200000]
|
|
699
|
+
with open(base + ".html", "w") as f:
|
|
700
|
+
f.write(html)
|
|
701
|
+
except Exception:
|
|
702
|
+
pass
|
|
703
|
+
try:
|
|
704
|
+
page.screenshot(path=base + ".png", full_page=False)
|
|
705
|
+
except Exception:
|
|
706
|
+
pass
|
|
707
|
+
print(
|
|
708
|
+
f"[reddit_browser] post-comment no permalink; "
|
|
709
|
+
f"final_url={final_url} thread_url={thread_url} "
|
|
710
|
+
f"dump={base}",
|
|
711
|
+
file=sys.stderr,
|
|
712
|
+
)
|
|
713
|
+
except Exception:
|
|
714
|
+
pass
|
|
715
|
+
|
|
716
|
+
# If the URL never redirected away from the thread page, the
|
|
717
|
+
# submit didn't take. Surface as an explicit error so callers
|
|
718
|
+
# can distinguish this from "submitted but slow render".
|
|
719
|
+
try:
|
|
720
|
+
norm_old = _to_old_reddit(thread_url).rstrip("/")
|
|
721
|
+
norm_final = (final_url or "").rstrip("/")
|
|
722
|
+
except Exception:
|
|
723
|
+
norm_old = thread_url
|
|
724
|
+
norm_final = final_url or ""
|
|
725
|
+
if norm_final == norm_old:
|
|
726
|
+
return {
|
|
727
|
+
"ok": False,
|
|
728
|
+
"error": "no_redirect_after_submit",
|
|
729
|
+
"thread_url": thread_url,
|
|
730
|
+
"final_url": final_url,
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
return {
|
|
734
|
+
"ok": True,
|
|
735
|
+
"permalink": permalink,
|
|
736
|
+
"thread_url": thread_url,
|
|
737
|
+
"final_url": final_url,
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
finally:
|
|
741
|
+
# Harness (is_cdp) tabs are REUSED across invocations, so never close
|
|
742
|
+
# the page here: closing it forces the next run to new_page(), which
|
|
743
|
+
# steals OS focus from whatever app is in front. Mirrors twitter_browser
|
|
744
|
+
# (only the persistent-context fallback closes). cleanup_harness_tabs at
|
|
745
|
+
# cycle start bounds tab count to one.
|
|
746
|
+
try:
|
|
747
|
+
if not is_cdp:
|
|
748
|
+
page.context.close()
|
|
749
|
+
except Exception:
|
|
750
|
+
pass
|
|
751
|
+
if not is_cdp:
|
|
752
|
+
try:
|
|
753
|
+
browser.close()
|
|
754
|
+
except Exception:
|
|
755
|
+
pass
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
def reply_to_comment(comment_permalink, text, dm_id=None):
|
|
759
|
+
"""Reply to an existing Reddit comment.
|
|
760
|
+
|
|
761
|
+
Navigates to the comment permalink on old.reddit.com, clicks the
|
|
762
|
+
"reply" link to expand the reply box, fills in the text, and submits.
|
|
763
|
+
|
|
764
|
+
Active Reddit campaigns with a `suffix` are applied at this tool layer:
|
|
765
|
+
the suffix is appended to `text` (per `sample_rate` coin flip per
|
|
766
|
+
campaign) before typing, so the literal text is guaranteed to be
|
|
767
|
+
delivered. When `dm_id` is provided, after a verified post the message
|
|
768
|
+
is logged via dm_conversation.py log-outbound so dm_messages.campaign_id
|
|
769
|
+
auto-attributes via suffix detection (single source of truth).
|
|
770
|
+
|
|
771
|
+
Returns: {"ok": true, "applied_campaigns": [...], "reply_text": "..."}
|
|
772
|
+
or {"ok": false, "error": "..."}
|
|
773
|
+
"""
|
|
774
|
+
from playwright.sync_api import sync_playwright
|
|
775
|
+
|
|
776
|
+
# Pre-flight: refuse to attempt a reply in a sub we know we can't comment in.
|
|
777
|
+
# Without this gate, an inbound that landed in `dms` while the sub was still
|
|
778
|
+
# allowed will keep cycling through the engage-dm-replies pipeline, fail with
|
|
779
|
+
# `reply_link_not_found`, and trigger a needs_human escalation every run.
|
|
780
|
+
sub = _subreddit_from_permalink(comment_permalink)
|
|
781
|
+
if sub and sub in _load_comment_blocked_subs():
|
|
782
|
+
auto_closed = False
|
|
783
|
+
if dm_id is not None:
|
|
784
|
+
try:
|
|
785
|
+
subprocess.run(
|
|
786
|
+
["python3",
|
|
787
|
+
os.path.join(os.path.dirname(os.path.abspath(__file__)), "dm_conversation.py"),
|
|
788
|
+
"set-status", "--dm-id", str(dm_id), "--status", "closed"],
|
|
789
|
+
capture_output=True, text=True, timeout=20,
|
|
790
|
+
)
|
|
791
|
+
auto_closed = True
|
|
792
|
+
except Exception as e:
|
|
793
|
+
print(f"[reply_to_comment] auto-close failed for dm_id={dm_id}: {e}",
|
|
794
|
+
file=sys.stderr)
|
|
795
|
+
return {
|
|
796
|
+
"ok": False,
|
|
797
|
+
"error": "subreddit_blocked",
|
|
798
|
+
"subreddit": sub,
|
|
799
|
+
"auto_closed": auto_closed,
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
# Tool-level URL wrap pass: every URL in the model's reply gets minted
|
|
803
|
+
# through dm_short_links.wrap_text so clicks attribute to this DM. Runs
|
|
804
|
+
# BEFORE campaign-suffix injection so suffixes (which are short literals,
|
|
805
|
+
# not URLs) aren't fed back through the wrapper. Refuses if any URL points
|
|
806
|
+
# at a project not in dms.target_projects[]; the engage pipeline is expected
|
|
807
|
+
# to set-target-project --append and retry.
|
|
808
|
+
minted_link_codes = []
|
|
809
|
+
if dm_id is not None:
|
|
810
|
+
from dm_short_links import wrap_text as _wrap_text # local import: avoid import cost when dm_id is None
|
|
811
|
+
wrap_res = _wrap_text(dm_id=dm_id, text=text)
|
|
812
|
+
if not wrap_res.get("ok"):
|
|
813
|
+
return {
|
|
814
|
+
"ok": False,
|
|
815
|
+
"error": "link_wrap_failed",
|
|
816
|
+
"wrap_error": wrap_res.get("error"),
|
|
817
|
+
"needed_project": wrap_res.get("needed_project"),
|
|
818
|
+
"url": wrap_res.get("url"),
|
|
819
|
+
}
|
|
820
|
+
text = wrap_res["text"]
|
|
821
|
+
minted_link_codes = wrap_res.get("minted_codes", [])
|
|
822
|
+
|
|
823
|
+
# Tool-level campaign suffix injection (mirrors send_dm), gated on dm_id.
|
|
824
|
+
# The DM-replies pipeline passes dm_id and relies on this layer to
|
|
825
|
+
# guarantee the suffix is delivered. The standalone reply pipeline
|
|
826
|
+
# (engage_reddit.py) runs its OWN pre-append at the engage_reddit layer
|
|
827
|
+
# and does NOT pass dm_id, so we skip injection here to avoid a second
|
|
828
|
+
# coin flip stacking on top of the first (which would push the effective
|
|
829
|
+
# tag rate to ~1-(1-r)^2 and burn the campaign budget faster than
|
|
830
|
+
# intended). The endsWith guard is still useful when engage_reddit's
|
|
831
|
+
# gate fired: surface the cid so the caller can bump if desired.
|
|
832
|
+
applied_campaigns = []
|
|
833
|
+
if dm_id is not None:
|
|
834
|
+
for cid, suffix, sample_rate in _load_active_reddit_campaigns_for_dm():
|
|
835
|
+
if random.random() < sample_rate:
|
|
836
|
+
text = text + suffix
|
|
837
|
+
applied_campaigns.append(cid)
|
|
838
|
+
else:
|
|
839
|
+
for cid, suffix, _ in _load_active_reddit_campaigns_for_dm():
|
|
840
|
+
if suffix and text.endswith(suffix):
|
|
841
|
+
applied_campaigns.append(cid)
|
|
842
|
+
_diag_msg = f"[reply_to_comment] applied_campaigns={applied_campaigns} minted_links={minted_link_codes} text_len={len(text)} dm_id={dm_id}"
|
|
843
|
+
print(_diag_msg, file=sys.stderr)
|
|
844
|
+
_diag_log(_diag_msg)
|
|
845
|
+
|
|
846
|
+
with sync_playwright() as p:
|
|
847
|
+
browser, page, is_cdp = get_browser_and_page(p)
|
|
848
|
+
|
|
849
|
+
try:
|
|
850
|
+
old_url = _to_old_reddit(comment_permalink)
|
|
851
|
+
# Don't add ?context= — it shifts the target comment up
|
|
852
|
+
page.goto(old_url, wait_until="domcontentloaded")
|
|
853
|
+
page.wait_for_timeout(3000)
|
|
854
|
+
_ensure_old_reddit(page)
|
|
855
|
+
|
|
856
|
+
# Check if comment exists
|
|
857
|
+
page_text = page.text_content("body") or ""
|
|
858
|
+
if "page not found" in page_text.lower():
|
|
859
|
+
return {"ok": False, "error": "comment_not_found"}
|
|
860
|
+
|
|
861
|
+
# Dedup: check if we already replied to this specific comment
|
|
862
|
+
already = page.evaluate("""(ourUsername) => {
|
|
863
|
+
// Find the target comment (highlighted or first in nested listing)
|
|
864
|
+
const target = document.querySelector(
|
|
865
|
+
'.nestedlisting > .comment, .comment.target'
|
|
866
|
+
);
|
|
867
|
+
if (!target) return null;
|
|
868
|
+
// Check direct child replies for our username
|
|
869
|
+
const childComments = target.querySelectorAll(
|
|
870
|
+
':scope > .child .comment'
|
|
871
|
+
);
|
|
872
|
+
for (const c of childComments) {
|
|
873
|
+
const author = c.querySelector('a.author');
|
|
874
|
+
if (author && author.textContent.trim() === ourUsername) {
|
|
875
|
+
const body = c.querySelector('.usertext-body');
|
|
876
|
+
const perma = c.querySelector('a.bylink');
|
|
877
|
+
return {
|
|
878
|
+
already_replied: true,
|
|
879
|
+
text: body ? body.textContent.trim() : '',
|
|
880
|
+
url: perma ? perma.getAttribute('href') : '',
|
|
881
|
+
};
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
return null;
|
|
885
|
+
}""", OUR_USERNAME)
|
|
886
|
+
|
|
887
|
+
if already and already.get("already_replied"):
|
|
888
|
+
return {
|
|
889
|
+
"ok": True,
|
|
890
|
+
"already_replied": True,
|
|
891
|
+
"existing_text": already.get("text", ""),
|
|
892
|
+
"existing_url": already.get("url", ""),
|
|
893
|
+
"comment_permalink": comment_permalink,
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
# Click the reply link on the target comment
|
|
897
|
+
reply_clicked = False
|
|
898
|
+
|
|
899
|
+
# Strategy 1: Find the target/highlighted comment's reply link
|
|
900
|
+
try:
|
|
901
|
+
reply_link = page.locator(
|
|
902
|
+
".nestedlisting > .comment .flat-list a:has-text('reply'), "
|
|
903
|
+
".comment.target .flat-list a:has-text('reply')"
|
|
904
|
+
).first
|
|
905
|
+
reply_link.wait_for(state="visible", timeout=5000)
|
|
906
|
+
reply_link.click()
|
|
907
|
+
reply_clicked = True
|
|
908
|
+
except Exception:
|
|
909
|
+
pass
|
|
910
|
+
|
|
911
|
+
# Strategy 2: If only one comment visible, use its reply link
|
|
912
|
+
if not reply_clicked:
|
|
913
|
+
try:
|
|
914
|
+
reply_link = page.locator(
|
|
915
|
+
".comment .flat-list a:has-text('reply')"
|
|
916
|
+
).first
|
|
917
|
+
reply_link.wait_for(state="visible", timeout=3000)
|
|
918
|
+
reply_link.click()
|
|
919
|
+
reply_clicked = True
|
|
920
|
+
except Exception:
|
|
921
|
+
pass
|
|
922
|
+
|
|
923
|
+
if not reply_clicked:
|
|
924
|
+
return {"ok": False, "error": "reply_link_not_found"}
|
|
925
|
+
|
|
926
|
+
page.wait_for_timeout(1000)
|
|
927
|
+
|
|
928
|
+
# Find the reply textarea that just appeared (pick the visible one)
|
|
929
|
+
reply_box = None
|
|
930
|
+
all_ta = page.locator(".comment .usertext-edit textarea")
|
|
931
|
+
for i in range(all_ta.count()):
|
|
932
|
+
if all_ta.nth(i).is_visible():
|
|
933
|
+
reply_box = all_ta.nth(i)
|
|
934
|
+
break
|
|
935
|
+
|
|
936
|
+
if not reply_box:
|
|
937
|
+
return {"ok": False, "error": "reply_textarea_not_found"}
|
|
938
|
+
|
|
939
|
+
# Fill the reply text
|
|
940
|
+
reply_box.fill(text)
|
|
941
|
+
page.wait_for_timeout(1000)
|
|
942
|
+
|
|
943
|
+
# Click the save button nearest to the visible reply box
|
|
944
|
+
save_btn = None
|
|
945
|
+
all_btns = page.locator(
|
|
946
|
+
".comment .usertext-edit button[type='submit']"
|
|
947
|
+
)
|
|
948
|
+
for i in range(all_btns.count()):
|
|
949
|
+
if all_btns.nth(i).is_visible():
|
|
950
|
+
save_btn = all_btns.nth(i)
|
|
951
|
+
break
|
|
952
|
+
|
|
953
|
+
if not save_btn:
|
|
954
|
+
return {"ok": False, "error": "reply_save_button_not_found"}
|
|
955
|
+
|
|
956
|
+
save_btn.click()
|
|
957
|
+
|
|
958
|
+
page.wait_for_timeout(5000)
|
|
959
|
+
|
|
960
|
+
# Check for errors
|
|
961
|
+
error_el = page.locator(".status.error, .error").first
|
|
962
|
+
try:
|
|
963
|
+
if error_el.is_visible():
|
|
964
|
+
error_text = error_el.text_content() or "unknown_error"
|
|
965
|
+
return {"ok": False, "error": error_text.strip()}
|
|
966
|
+
except Exception:
|
|
967
|
+
pass
|
|
968
|
+
|
|
969
|
+
# Verify: check if our comment appeared
|
|
970
|
+
verified = page.evaluate("""(ourUsername) => {
|
|
971
|
+
const authorLinks = document.querySelectorAll(
|
|
972
|
+
'.comment a.author[href*="/' + ourUsername + '"]'
|
|
973
|
+
);
|
|
974
|
+
return authorLinks.length > 0;
|
|
975
|
+
}""", OUR_USERNAME)
|
|
976
|
+
|
|
977
|
+
# When invoked from the DM-replies pipeline (dm_id provided), log
|
|
978
|
+
# the outbound through the canonical CLI so dm_messages.campaign_id
|
|
979
|
+
# auto-attributes via the suffix-detection path. Mirrors send_dm.
|
|
980
|
+
if verified and dm_id is not None:
|
|
981
|
+
_log_dm_outbound("", text, dm_id=dm_id, minted_codes=minted_link_codes)
|
|
982
|
+
|
|
983
|
+
return {
|
|
984
|
+
"ok": True,
|
|
985
|
+
"verified": verified,
|
|
986
|
+
"comment_permalink": comment_permalink,
|
|
987
|
+
"reply_text": text,
|
|
988
|
+
"applied_campaigns": applied_campaigns,
|
|
989
|
+
"minted_link_codes": minted_link_codes,
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
finally:
|
|
993
|
+
# Harness (is_cdp) tabs are REUSED across invocations, so never close
|
|
994
|
+
# the page here: closing it forces the next run to new_page(), which
|
|
995
|
+
# steals OS focus from whatever app is in front. Mirrors twitter_browser
|
|
996
|
+
# (only the persistent-context fallback closes). cleanup_harness_tabs at
|
|
997
|
+
# cycle start bounds tab count to one.
|
|
998
|
+
try:
|
|
999
|
+
if not is_cdp:
|
|
1000
|
+
page.context.close()
|
|
1001
|
+
except Exception:
|
|
1002
|
+
pass
|
|
1003
|
+
if not is_cdp:
|
|
1004
|
+
try:
|
|
1005
|
+
browser.close()
|
|
1006
|
+
except Exception:
|
|
1007
|
+
pass
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
def edit_comment(comment_permalink, new_text):
|
|
1011
|
+
"""Edit an existing Reddit comment.
|
|
1012
|
+
|
|
1013
|
+
Navigates to the comment permalink on old.reddit.com, clicks "edit",
|
|
1014
|
+
replaces the textarea content, and saves.
|
|
1015
|
+
|
|
1016
|
+
Returns: {"ok": true} or {"ok": false, "error": "..."}
|
|
1017
|
+
"""
|
|
1018
|
+
from playwright.sync_api import sync_playwright
|
|
1019
|
+
|
|
1020
|
+
with sync_playwright() as p:
|
|
1021
|
+
browser, page, is_cdp = get_browser_and_page(p)
|
|
1022
|
+
|
|
1023
|
+
try:
|
|
1024
|
+
old_url = _to_old_reddit(comment_permalink)
|
|
1025
|
+
page.goto(old_url, wait_until="domcontentloaded")
|
|
1026
|
+
page.wait_for_timeout(3000)
|
|
1027
|
+
_ensure_old_reddit(page)
|
|
1028
|
+
|
|
1029
|
+
page_text = page.text_content("body") or ""
|
|
1030
|
+
if "page not found" in page_text.lower():
|
|
1031
|
+
return {"ok": False, "error": "comment_not_found"}
|
|
1032
|
+
|
|
1033
|
+
# Find the target comment: on a permalink page, it's the
|
|
1034
|
+
# top-level comment in the nested listing, or has .target class
|
|
1035
|
+
target_comment = page.locator(
|
|
1036
|
+
".nestedlisting > .comment"
|
|
1037
|
+
).first
|
|
1038
|
+
try:
|
|
1039
|
+
target_comment.wait_for(state="visible", timeout=5000)
|
|
1040
|
+
except Exception:
|
|
1041
|
+
# Fallback: try .comment.target
|
|
1042
|
+
target_comment = page.locator(".comment.target").first
|
|
1043
|
+
try:
|
|
1044
|
+
target_comment.wait_for(state="visible", timeout=3000)
|
|
1045
|
+
except Exception:
|
|
1046
|
+
return {"ok": False, "error": "target_comment_not_found"}
|
|
1047
|
+
|
|
1048
|
+
# Click the "edit" link within the target comment's own flat-list
|
|
1049
|
+
# (use :scope > to avoid matching nested child comments)
|
|
1050
|
+
edit_clicked = False
|
|
1051
|
+
try:
|
|
1052
|
+
edit_link = target_comment.locator(
|
|
1053
|
+
":scope > .entry .flat-list a:has-text('edit')"
|
|
1054
|
+
).first
|
|
1055
|
+
edit_link.wait_for(state="visible", timeout=5000)
|
|
1056
|
+
edit_link.click()
|
|
1057
|
+
edit_clicked = True
|
|
1058
|
+
except Exception:
|
|
1059
|
+
pass
|
|
1060
|
+
|
|
1061
|
+
if not edit_clicked:
|
|
1062
|
+
return {"ok": False, "error": "edit_link_not_found"}
|
|
1063
|
+
|
|
1064
|
+
page.wait_for_timeout(1000)
|
|
1065
|
+
|
|
1066
|
+
# Find the edit textarea within the target comment's own entry
|
|
1067
|
+
edit_box = None
|
|
1068
|
+
all_ta = target_comment.locator(
|
|
1069
|
+
":scope > .entry .usertext-edit textarea"
|
|
1070
|
+
)
|
|
1071
|
+
for i in range(all_ta.count()):
|
|
1072
|
+
if all_ta.nth(i).is_visible():
|
|
1073
|
+
edit_box = all_ta.nth(i)
|
|
1074
|
+
break
|
|
1075
|
+
|
|
1076
|
+
if not edit_box:
|
|
1077
|
+
return {"ok": False, "error": "edit_textarea_not_found"}
|
|
1078
|
+
|
|
1079
|
+
# Clear and fill with new text
|
|
1080
|
+
edit_box.fill(new_text)
|
|
1081
|
+
page.wait_for_timeout(1000)
|
|
1082
|
+
|
|
1083
|
+
# Click save within the target comment's own entry
|
|
1084
|
+
save_btn = None
|
|
1085
|
+
all_btns = target_comment.locator(
|
|
1086
|
+
":scope > .entry .usertext-edit button[type='submit']"
|
|
1087
|
+
)
|
|
1088
|
+
for i in range(all_btns.count()):
|
|
1089
|
+
if all_btns.nth(i).is_visible():
|
|
1090
|
+
save_btn = all_btns.nth(i)
|
|
1091
|
+
break
|
|
1092
|
+
|
|
1093
|
+
if not save_btn:
|
|
1094
|
+
return {"ok": False, "error": "edit_save_button_not_found"}
|
|
1095
|
+
|
|
1096
|
+
save_btn.click()
|
|
1097
|
+
|
|
1098
|
+
page.wait_for_timeout(4000)
|
|
1099
|
+
|
|
1100
|
+
# Verify the edit was saved within the target comment
|
|
1101
|
+
target_id = target_comment.get_attribute("data-fullname") or ""
|
|
1102
|
+
verified = page.evaluate("""([newTextStart, targetId]) => {
|
|
1103
|
+
let comment;
|
|
1104
|
+
if (targetId) {
|
|
1105
|
+
comment = document.querySelector(
|
|
1106
|
+
'.comment[data-fullname="' + targetId + '"]'
|
|
1107
|
+
);
|
|
1108
|
+
} else {
|
|
1109
|
+
comment = document.querySelector(
|
|
1110
|
+
'.nestedlisting > .comment'
|
|
1111
|
+
);
|
|
1112
|
+
}
|
|
1113
|
+
if (!comment) return false;
|
|
1114
|
+
const body = comment.querySelector(
|
|
1115
|
+
':scope > .entry .usertext-body'
|
|
1116
|
+
);
|
|
1117
|
+
return body && body.textContent &&
|
|
1118
|
+
body.textContent.includes(newTextStart);
|
|
1119
|
+
}""", [new_text[:50], target_id])
|
|
1120
|
+
|
|
1121
|
+
return {
|
|
1122
|
+
"ok": True,
|
|
1123
|
+
"verified": verified,
|
|
1124
|
+
"comment_permalink": comment_permalink,
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
finally:
|
|
1128
|
+
# Harness (is_cdp) tabs are REUSED across invocations, so never close
|
|
1129
|
+
# the page here: closing it forces the next run to new_page(), which
|
|
1130
|
+
# steals OS focus from whatever app is in front. Mirrors twitter_browser
|
|
1131
|
+
# (only the persistent-context fallback closes). cleanup_harness_tabs at
|
|
1132
|
+
# cycle start bounds tab count to one.
|
|
1133
|
+
try:
|
|
1134
|
+
if not is_cdp:
|
|
1135
|
+
page.context.close()
|
|
1136
|
+
except Exception:
|
|
1137
|
+
pass
|
|
1138
|
+
if not is_cdp:
|
|
1139
|
+
try:
|
|
1140
|
+
browser.close()
|
|
1141
|
+
except Exception:
|
|
1142
|
+
pass
|
|
1143
|
+
|
|
1144
|
+
|
|
1145
|
+
def edit_thread(thread_permalink, new_body):
|
|
1146
|
+
"""Edit the selftext of a Reddit thread we authored.
|
|
1147
|
+
|
|
1148
|
+
Used by the campaign system to append a literal suffix to original
|
|
1149
|
+
threads after submit. Only works on selftext posts (link posts have no
|
|
1150
|
+
edit link). Mirrors edit_comment but targets the main post (#siteTable
|
|
1151
|
+
.thing.self) instead of a nested comment.
|
|
1152
|
+
|
|
1153
|
+
Returns: {"ok": true, "verified": bool} or {"ok": false, "error": "..."}
|
|
1154
|
+
"""
|
|
1155
|
+
from playwright.sync_api import sync_playwright
|
|
1156
|
+
|
|
1157
|
+
with sync_playwright() as p:
|
|
1158
|
+
browser, page, is_cdp = get_browser_and_page(p)
|
|
1159
|
+
|
|
1160
|
+
try:
|
|
1161
|
+
old_url = _to_old_reddit(thread_permalink)
|
|
1162
|
+
page.goto(old_url, wait_until="domcontentloaded")
|
|
1163
|
+
page.wait_for_timeout(3000)
|
|
1164
|
+
_ensure_old_reddit(page)
|
|
1165
|
+
|
|
1166
|
+
page_text = page.text_content("body") or ""
|
|
1167
|
+
if "page not found" in page_text.lower():
|
|
1168
|
+
return {"ok": False, "error": "thread_not_found"}
|
|
1169
|
+
|
|
1170
|
+
target = page.locator("#siteTable .thing.self").first
|
|
1171
|
+
try:
|
|
1172
|
+
target.wait_for(state="visible", timeout=5000)
|
|
1173
|
+
except Exception:
|
|
1174
|
+
return {"ok": False, "error": "thread_not_found"}
|
|
1175
|
+
|
|
1176
|
+
edit_clicked = False
|
|
1177
|
+
try:
|
|
1178
|
+
edit_link = target.locator(
|
|
1179
|
+
":scope .entry .flat-list a:has-text('edit')"
|
|
1180
|
+
).first
|
|
1181
|
+
edit_link.wait_for(state="visible", timeout=5000)
|
|
1182
|
+
edit_link.click()
|
|
1183
|
+
edit_clicked = True
|
|
1184
|
+
except Exception:
|
|
1185
|
+
pass
|
|
1186
|
+
|
|
1187
|
+
if not edit_clicked:
|
|
1188
|
+
return {"ok": False, "error": "edit_link_not_found"}
|
|
1189
|
+
|
|
1190
|
+
page.wait_for_timeout(1000)
|
|
1191
|
+
|
|
1192
|
+
edit_box = None
|
|
1193
|
+
all_ta = target.locator(
|
|
1194
|
+
":scope .entry .usertext-edit textarea"
|
|
1195
|
+
)
|
|
1196
|
+
for i in range(all_ta.count()):
|
|
1197
|
+
if all_ta.nth(i).is_visible():
|
|
1198
|
+
edit_box = all_ta.nth(i)
|
|
1199
|
+
break
|
|
1200
|
+
|
|
1201
|
+
if not edit_box:
|
|
1202
|
+
return {"ok": False, "error": "edit_textarea_not_found"}
|
|
1203
|
+
|
|
1204
|
+
edit_box.fill(new_body)
|
|
1205
|
+
page.wait_for_timeout(1000)
|
|
1206
|
+
|
|
1207
|
+
save_btn = None
|
|
1208
|
+
all_btns = target.locator(
|
|
1209
|
+
":scope .entry .usertext-edit button[type='submit']"
|
|
1210
|
+
)
|
|
1211
|
+
for i in range(all_btns.count()):
|
|
1212
|
+
if all_btns.nth(i).is_visible():
|
|
1213
|
+
save_btn = all_btns.nth(i)
|
|
1214
|
+
break
|
|
1215
|
+
|
|
1216
|
+
if not save_btn:
|
|
1217
|
+
return {"ok": False, "error": "edit_save_button_not_found"}
|
|
1218
|
+
|
|
1219
|
+
save_btn.click()
|
|
1220
|
+
page.wait_for_timeout(4000)
|
|
1221
|
+
|
|
1222
|
+
verified = page.evaluate("""(newTextStart) => {
|
|
1223
|
+
const t = document.querySelector('#siteTable .thing.self');
|
|
1224
|
+
if (!t) return false;
|
|
1225
|
+
const body = t.querySelector('.entry .usertext-body');
|
|
1226
|
+
return body && body.textContent &&
|
|
1227
|
+
body.textContent.includes(newTextStart);
|
|
1228
|
+
}""", new_body[-50:] if len(new_body) >= 50 else new_body)
|
|
1229
|
+
|
|
1230
|
+
return {
|
|
1231
|
+
"ok": True,
|
|
1232
|
+
"verified": verified,
|
|
1233
|
+
"thread_permalink": thread_permalink,
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
finally:
|
|
1237
|
+
# Harness (is_cdp) tabs are REUSED across invocations, so never close
|
|
1238
|
+
# the page here: closing it forces the next run to new_page(), which
|
|
1239
|
+
# steals OS focus from whatever app is in front. Mirrors twitter_browser
|
|
1240
|
+
# (only the persistent-context fallback closes). cleanup_harness_tabs at
|
|
1241
|
+
# cycle start bounds tab count to one.
|
|
1242
|
+
try:
|
|
1243
|
+
if not is_cdp:
|
|
1244
|
+
page.context.close()
|
|
1245
|
+
except Exception:
|
|
1246
|
+
pass
|
|
1247
|
+
if not is_cdp:
|
|
1248
|
+
try:
|
|
1249
|
+
browser.close()
|
|
1250
|
+
except Exception:
|
|
1251
|
+
pass
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
def unread_dms():
|
|
1255
|
+
"""Scan Reddit for unread DMs/chat conversations.
|
|
1256
|
+
|
|
1257
|
+
Navigates to old.reddit.com/message/unread/ for traditional messages,
|
|
1258
|
+
then checks reddit.com/chat for chat-style conversations.
|
|
1259
|
+
|
|
1260
|
+
Returns: list of conversations with author, preview, time, thread_url.
|
|
1261
|
+
"""
|
|
1262
|
+
from playwright.sync_api import sync_playwright
|
|
1263
|
+
|
|
1264
|
+
with sync_playwright() as p:
|
|
1265
|
+
browser, page, is_cdp = get_browser_and_page(p)
|
|
1266
|
+
|
|
1267
|
+
try:
|
|
1268
|
+
conversations = []
|
|
1269
|
+
|
|
1270
|
+
# Part 1: Check old.reddit.com/message/unread/ for traditional PMs
|
|
1271
|
+
page.goto(
|
|
1272
|
+
"https://old.reddit.com/message/unread/",
|
|
1273
|
+
wait_until="domcontentloaded",
|
|
1274
|
+
)
|
|
1275
|
+
page.wait_for_timeout(3000)
|
|
1276
|
+
_ensure_old_reddit(page)
|
|
1277
|
+
|
|
1278
|
+
old_messages = page.evaluate("""() => {
|
|
1279
|
+
const results = [];
|
|
1280
|
+
const messages = document.querySelectorAll('.message');
|
|
1281
|
+
for (const msg of messages) {
|
|
1282
|
+
// Author
|
|
1283
|
+
const authorEl = msg.querySelector('.author');
|
|
1284
|
+
const author = authorEl ? authorEl.textContent.trim() : '';
|
|
1285
|
+
|
|
1286
|
+
// Subject
|
|
1287
|
+
const subjectEl = msg.querySelector('a.title, .subject a');
|
|
1288
|
+
const subject = subjectEl ? subjectEl.textContent.trim() : '';
|
|
1289
|
+
|
|
1290
|
+
// Body preview
|
|
1291
|
+
const bodyEl = msg.querySelector('.md');
|
|
1292
|
+
const body = bodyEl ? bodyEl.textContent.trim() : '';
|
|
1293
|
+
|
|
1294
|
+
// Time
|
|
1295
|
+
const timeEl = msg.querySelector('time, .live-timestamp');
|
|
1296
|
+
const time = timeEl
|
|
1297
|
+
? (timeEl.getAttribute('title') || timeEl.textContent.trim())
|
|
1298
|
+
: '';
|
|
1299
|
+
|
|
1300
|
+
// Detect comment replies vs actual PMs
|
|
1301
|
+
// Comment replies link to /comments/ threads in the subject
|
|
1302
|
+
const commentLink = msg.querySelector('a[href*="/comments/"]');
|
|
1303
|
+
const isCommentReply = !!commentLink;
|
|
1304
|
+
|
|
1305
|
+
let threadUrl = '';
|
|
1306
|
+
let msgType = 'pm';
|
|
1307
|
+
|
|
1308
|
+
if (isCommentReply) {
|
|
1309
|
+
// Comment reply: extract the thread permalink
|
|
1310
|
+
msgType = 'comment_reply';
|
|
1311
|
+
const href = commentLink.getAttribute('href') || '';
|
|
1312
|
+
threadUrl = href.startsWith('http')
|
|
1313
|
+
? href
|
|
1314
|
+
: 'https://old.reddit.com' + href;
|
|
1315
|
+
} else {
|
|
1316
|
+
// Actual PM: use the message's own permalink
|
|
1317
|
+
const permaLink = msg.querySelector(
|
|
1318
|
+
'a.bylink, a[data-event-action="permalink"]'
|
|
1319
|
+
);
|
|
1320
|
+
if (permaLink) {
|
|
1321
|
+
const href = permaLink.getAttribute('href') || '';
|
|
1322
|
+
threadUrl = href.startsWith('http')
|
|
1323
|
+
? href
|
|
1324
|
+
: 'https://old.reddit.com' + href;
|
|
1325
|
+
}
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
if (author) {
|
|
1329
|
+
results.push({
|
|
1330
|
+
author: author,
|
|
1331
|
+
subject: subject,
|
|
1332
|
+
preview_short: body,
|
|
1333
|
+
time: time,
|
|
1334
|
+
thread_url: threadUrl,
|
|
1335
|
+
type: msgType,
|
|
1336
|
+
});
|
|
1337
|
+
}
|
|
1338
|
+
}
|
|
1339
|
+
return results;
|
|
1340
|
+
}""")
|
|
1341
|
+
|
|
1342
|
+
conversations.extend(old_messages)
|
|
1343
|
+
|
|
1344
|
+
# Part 2: Check reddit.com/chat for chat-style messages
|
|
1345
|
+
page.goto(
|
|
1346
|
+
"https://www.reddit.com/chat",
|
|
1347
|
+
wait_until="domcontentloaded",
|
|
1348
|
+
)
|
|
1349
|
+
page.wait_for_timeout(5000)
|
|
1350
|
+
|
|
1351
|
+
# Reddit Chat sidebar has links like:
|
|
1352
|
+
# <a href="/chat/room/ID">topic name</a>
|
|
1353
|
+
# Each contains a last-message preview in a child element
|
|
1354
|
+
# with text like "Username: message preview"
|
|
1355
|
+
chat_rooms = page.evaluate("""() => {
|
|
1356
|
+
const results = [];
|
|
1357
|
+
const links = document.querySelectorAll(
|
|
1358
|
+
'nav a[href*="/chat/"], a[href*="/chat/room/"]'
|
|
1359
|
+
);
|
|
1360
|
+
|
|
1361
|
+
for (const link of links) {
|
|
1362
|
+
const href = link.getAttribute('href') || '';
|
|
1363
|
+
if (!href.includes('/chat/')) continue;
|
|
1364
|
+
// Skip non-room links
|
|
1365
|
+
if (href === '/chat/' || href.includes('create')) continue;
|
|
1366
|
+
|
|
1367
|
+
const threadUrl = href.startsWith('http')
|
|
1368
|
+
? href
|
|
1369
|
+
: 'https://www.reddit.com' + href;
|
|
1370
|
+
|
|
1371
|
+
// Topic/room name from the link's accessible name or text
|
|
1372
|
+
const topic = (link.getAttribute('aria-label')
|
|
1373
|
+
|| link.textContent || '').trim();
|
|
1374
|
+
|
|
1375
|
+
// Last message preview — look for child elements
|
|
1376
|
+
// Format: "Username: message text"
|
|
1377
|
+
let author = '';
|
|
1378
|
+
let preview = '';
|
|
1379
|
+
const allText = link.textContent || '';
|
|
1380
|
+
// The preview is usually in a nested element
|
|
1381
|
+
const spans = link.querySelectorAll('span, div, p');
|
|
1382
|
+
for (const s of spans) {
|
|
1383
|
+
const t = s.textContent.trim();
|
|
1384
|
+
// Match "Username: preview text"
|
|
1385
|
+
const m = t.match(/^(\\S+):\\s*(.+)/);
|
|
1386
|
+
if (m && m[1].length < 30) {
|
|
1387
|
+
author = m[1];
|
|
1388
|
+
preview = m[2];
|
|
1389
|
+
break;
|
|
1390
|
+
}
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
// Check for unread badge (aria-label with "unread")
|
|
1394
|
+
const hasUnread = link.querySelector(
|
|
1395
|
+
'[aria-label*="unread"]'
|
|
1396
|
+
) !== null;
|
|
1397
|
+
|
|
1398
|
+
if (topic.length > 1) {
|
|
1399
|
+
results.push({
|
|
1400
|
+
author: author || topic,
|
|
1401
|
+
subject: topic,
|
|
1402
|
+
preview: preview,
|
|
1403
|
+
time: '',
|
|
1404
|
+
thread_url: threadUrl,
|
|
1405
|
+
type: 'chat',
|
|
1406
|
+
has_unread: hasUnread,
|
|
1407
|
+
});
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
|
|
1411
|
+
return results;
|
|
1412
|
+
}""")
|
|
1413
|
+
|
|
1414
|
+
conversations.extend(chat_rooms)
|
|
1415
|
+
|
|
1416
|
+
# Deduplicate by author
|
|
1417
|
+
seen = set()
|
|
1418
|
+
unique = []
|
|
1419
|
+
for c in conversations:
|
|
1420
|
+
key = c.get("author", "").lower()
|
|
1421
|
+
if key and key not in seen:
|
|
1422
|
+
seen.add(key)
|
|
1423
|
+
unique.append(c)
|
|
1424
|
+
|
|
1425
|
+
return unique
|
|
1426
|
+
|
|
1427
|
+
finally:
|
|
1428
|
+
# Harness (is_cdp) tabs are REUSED across invocations, so never close
|
|
1429
|
+
# the page here: closing it forces the next run to new_page(), which
|
|
1430
|
+
# steals OS focus from whatever app is in front. Mirrors twitter_browser
|
|
1431
|
+
# (only the persistent-context fallback closes). cleanup_harness_tabs at
|
|
1432
|
+
# cycle start bounds tab count to one.
|
|
1433
|
+
try:
|
|
1434
|
+
if not is_cdp:
|
|
1435
|
+
page.context.close()
|
|
1436
|
+
except Exception:
|
|
1437
|
+
pass
|
|
1438
|
+
if not is_cdp:
|
|
1439
|
+
try:
|
|
1440
|
+
browser.close()
|
|
1441
|
+
except Exception:
|
|
1442
|
+
pass
|
|
1443
|
+
|
|
1444
|
+
|
|
1445
|
+
def read_conversation(chat_url, max_messages=20):
|
|
1446
|
+
"""Read messages from a Reddit chat or PM thread.
|
|
1447
|
+
|
|
1448
|
+
For chat URLs (reddit.com/chat/...), navigates to the chat and extracts
|
|
1449
|
+
messages. For PM URLs (old.reddit.com/message/...), reads the PM thread.
|
|
1450
|
+
|
|
1451
|
+
Returns: {"partner_name": "...", "messages": [...], "total_found": N}
|
|
1452
|
+
"""
|
|
1453
|
+
from playwright.sync_api import sync_playwright
|
|
1454
|
+
|
|
1455
|
+
with sync_playwright() as p:
|
|
1456
|
+
browser, page, is_cdp = get_browser_and_page(p)
|
|
1457
|
+
|
|
1458
|
+
try:
|
|
1459
|
+
is_chat = "/chat" in chat_url and "message" not in chat_url
|
|
1460
|
+
|
|
1461
|
+
if is_chat:
|
|
1462
|
+
# Reddit Chat (SPA on new reddit)
|
|
1463
|
+
page.goto(chat_url, wait_until="domcontentloaded")
|
|
1464
|
+
page.wait_for_timeout(5000)
|
|
1465
|
+
|
|
1466
|
+
# Reddit Chat uses accessible names on message elements:
|
|
1467
|
+
# "USERNAME said TIME_AGO, MESSAGE_TEXT, N replies, N reactions"
|
|
1468
|
+
# Extract via aria labels on generic elements
|
|
1469
|
+
result = page.evaluate("""(params) => {
|
|
1470
|
+
const maxMessages = params.maxMessages;
|
|
1471
|
+
const ourUsername = params.ourUsername;
|
|
1472
|
+
let partnerName = '';
|
|
1473
|
+
const messages = [];
|
|
1474
|
+
|
|
1475
|
+
// Get chat room name from the header
|
|
1476
|
+
const headerEls = document.querySelectorAll(
|
|
1477
|
+
'[aria-label*="Current chat"]'
|
|
1478
|
+
);
|
|
1479
|
+
for (const h of headerEls) {
|
|
1480
|
+
const label = h.getAttribute('aria-label') || '';
|
|
1481
|
+
const m = label.match(/Current chat,\\s*(.+)/);
|
|
1482
|
+
if (m) { partnerName = m[1]; break; }
|
|
1483
|
+
}
|
|
1484
|
+
// Fallback: look for header text
|
|
1485
|
+
if (!partnerName) {
|
|
1486
|
+
const headers = document.querySelectorAll('h1, h2, h3');
|
|
1487
|
+
for (const h of headers) {
|
|
1488
|
+
const t = h.textContent.trim();
|
|
1489
|
+
if (t.length > 1 && t.length < 60 && !t.includes('Chat')
|
|
1490
|
+
&& !t.includes('Reddit')) {
|
|
1491
|
+
partnerName = t;
|
|
1492
|
+
break;
|
|
1493
|
+
}
|
|
1494
|
+
}
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1497
|
+
// Find message elements by their accessible name pattern:
|
|
1498
|
+
// "USERNAME said TIME, TEXT, N replies, N reactions"
|
|
1499
|
+
const allEls = document.querySelectorAll('[aria-label]');
|
|
1500
|
+
for (const el of allEls) {
|
|
1501
|
+
const label = el.getAttribute('aria-label') || '';
|
|
1502
|
+
// Match: "Username said time_ago, message text, N replies"
|
|
1503
|
+
const m = label.match(
|
|
1504
|
+
/^(\\S+) said (.+?),\\s*(.+?),\\s*\\d+ repl/
|
|
1505
|
+
);
|
|
1506
|
+
if (!m) continue;
|
|
1507
|
+
|
|
1508
|
+
const sender = m[1];
|
|
1509
|
+
const time = m[2];
|
|
1510
|
+
let content = m[3];
|
|
1511
|
+
|
|
1512
|
+
// Clean up content (remove trailing ", 0 reactions" etc)
|
|
1513
|
+
content = content.replace(/,\\s*\\d+ reactions?$/, '').trim();
|
|
1514
|
+
|
|
1515
|
+
const isFromUs = sender.toLowerCase()
|
|
1516
|
+
=== ourUsername.toLowerCase();
|
|
1517
|
+
if (!isFromUs && sender) {
|
|
1518
|
+
partnerName = partnerName || sender;
|
|
1519
|
+
}
|
|
1520
|
+
|
|
1521
|
+
messages.push({
|
|
1522
|
+
sender: sender,
|
|
1523
|
+
content: content,
|
|
1524
|
+
time: time,
|
|
1525
|
+
is_from_us: isFromUs,
|
|
1526
|
+
});
|
|
1527
|
+
}
|
|
1528
|
+
|
|
1529
|
+
const recent = messages.slice(-maxMessages);
|
|
1530
|
+
return {
|
|
1531
|
+
partner_name: partnerName,
|
|
1532
|
+
messages: recent,
|
|
1533
|
+
total_found: messages.length,
|
|
1534
|
+
};
|
|
1535
|
+
}""", {"maxMessages": max_messages, "ourUsername": OUR_USERNAME})
|
|
1536
|
+
|
|
1537
|
+
return result
|
|
1538
|
+
|
|
1539
|
+
else:
|
|
1540
|
+
# Traditional PM thread on old.reddit.com
|
|
1541
|
+
old_url = _to_old_reddit(chat_url)
|
|
1542
|
+
page.goto(old_url, wait_until="domcontentloaded")
|
|
1543
|
+
page.wait_for_timeout(3000)
|
|
1544
|
+
_ensure_old_reddit(page)
|
|
1545
|
+
|
|
1546
|
+
result = page.evaluate("""(params) => {
|
|
1547
|
+
const maxMessages = params.maxMessages;
|
|
1548
|
+
const ourUsername = params.ourUsername;
|
|
1549
|
+
let partnerName = '';
|
|
1550
|
+
const messages = [];
|
|
1551
|
+
|
|
1552
|
+
const msgEls = document.querySelectorAll('.message');
|
|
1553
|
+
for (const msg of msgEls) {
|
|
1554
|
+
const authorEl = msg.querySelector('.author');
|
|
1555
|
+
const sender = authorEl
|
|
1556
|
+
? authorEl.textContent.trim() : '';
|
|
1557
|
+
|
|
1558
|
+
const bodyEl = msg.querySelector('.md');
|
|
1559
|
+
const content = bodyEl
|
|
1560
|
+
? bodyEl.textContent.trim() : '';
|
|
1561
|
+
|
|
1562
|
+
const timeEl = msg.querySelector(
|
|
1563
|
+
'time, .live-timestamp'
|
|
1564
|
+
);
|
|
1565
|
+
const time = timeEl
|
|
1566
|
+
? (timeEl.getAttribute('title')
|
|
1567
|
+
|| timeEl.textContent.trim())
|
|
1568
|
+
: '';
|
|
1569
|
+
|
|
1570
|
+
const isFromUs = sender.toLowerCase()
|
|
1571
|
+
=== ourUsername.toLowerCase();
|
|
1572
|
+
|
|
1573
|
+
if (!isFromUs && sender) {
|
|
1574
|
+
partnerName = sender;
|
|
1575
|
+
}
|
|
1576
|
+
|
|
1577
|
+
if (content) {
|
|
1578
|
+
messages.push({
|
|
1579
|
+
sender: sender,
|
|
1580
|
+
content: content,
|
|
1581
|
+
time: time,
|
|
1582
|
+
is_from_us: isFromUs,
|
|
1583
|
+
});
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1587
|
+
const recent = messages.slice(-maxMessages);
|
|
1588
|
+
return {
|
|
1589
|
+
partner_name: partnerName,
|
|
1590
|
+
messages: recent,
|
|
1591
|
+
total_found: messages.length,
|
|
1592
|
+
};
|
|
1593
|
+
}""", {"maxMessages": max_messages, "ourUsername": OUR_USERNAME})
|
|
1594
|
+
|
|
1595
|
+
return result
|
|
1596
|
+
|
|
1597
|
+
finally:
|
|
1598
|
+
# Harness (is_cdp) tabs are REUSED across invocations, so never close
|
|
1599
|
+
# the page here: closing it forces the next run to new_page(), which
|
|
1600
|
+
# steals OS focus from whatever app is in front. Mirrors twitter_browser
|
|
1601
|
+
# (only the persistent-context fallback closes). cleanup_harness_tabs at
|
|
1602
|
+
# cycle start bounds tab count to one.
|
|
1603
|
+
try:
|
|
1604
|
+
if not is_cdp:
|
|
1605
|
+
page.context.close()
|
|
1606
|
+
except Exception:
|
|
1607
|
+
pass
|
|
1608
|
+
if not is_cdp:
|
|
1609
|
+
try:
|
|
1610
|
+
browser.close()
|
|
1611
|
+
except Exception:
|
|
1612
|
+
pass
|
|
1613
|
+
|
|
1614
|
+
|
|
1615
|
+
def _load_active_reddit_campaigns_for_dm():
|
|
1616
|
+
"""Best-effort: returns [(id, suffix, sample_rate), ...] for active reddit
|
|
1617
|
+
campaigns. On any failure (no API reachable, transient error, etc.)
|
|
1618
|
+
returns []. This keeps reddit_browser.py usable when the website route
|
|
1619
|
+
is down without blocking the DM send.
|
|
1620
|
+
|
|
1621
|
+
Migrated 2026-05-12 from direct SQL (campaigns table) to the
|
|
1622
|
+
/api/v1/campaigns route. The route's status/platform/has_suffix/
|
|
1623
|
+
with_budget_remaining filter set is an exact match for the legacy
|
|
1624
|
+
SELECT clauses (status='active', platforms ILIKE '%,reddit,%',
|
|
1625
|
+
suffix NOT NULL/empty, posts_made < max_posts_total).
|
|
1626
|
+
"""
|
|
1627
|
+
try:
|
|
1628
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
1629
|
+
from http_api import api_get
|
|
1630
|
+
resp = api_get(
|
|
1631
|
+
"/api/v1/campaigns",
|
|
1632
|
+
query={
|
|
1633
|
+
"status": "active",
|
|
1634
|
+
"platform": "reddit",
|
|
1635
|
+
"has_suffix": "true",
|
|
1636
|
+
"with_budget_remaining": "true",
|
|
1637
|
+
"limit": 500,
|
|
1638
|
+
},
|
|
1639
|
+
)
|
|
1640
|
+
data = (resp or {}).get("data") or {}
|
|
1641
|
+
rows = data.get("campaigns") or []
|
|
1642
|
+
out = []
|
|
1643
|
+
for r in rows:
|
|
1644
|
+
try:
|
|
1645
|
+
sample_rate = float(r.get("sample_rate") if r.get("sample_rate") is not None else 1.0)
|
|
1646
|
+
out.append((int(r["id"]), r["suffix"], sample_rate))
|
|
1647
|
+
except (TypeError, ValueError, KeyError):
|
|
1648
|
+
# Skip malformed rows rather than blowing up the entire load.
|
|
1649
|
+
continue
|
|
1650
|
+
return out
|
|
1651
|
+
except Exception:
|
|
1652
|
+
return []
|
|
1653
|
+
|
|
1654
|
+
|
|
1655
|
+
def _log_dm_outbound(chat_url, content, dm_id=None, minted_codes=None):
|
|
1656
|
+
"""After a successful send, log via the canonical CLI so the suffix-
|
|
1657
|
+
detection path attributes the message to the active campaign.
|
|
1658
|
+
|
|
1659
|
+
If `dm_id` is provided (preferred), skip the lookup. Otherwise fall back
|
|
1660
|
+
to looking up the most recent dms row by chat_url. Many production rows
|
|
1661
|
+
have an empty `dms.chat_url`, so the dm_id path is the reliable one.
|
|
1662
|
+
`minted_codes` is the list of dm_links codes minted for this outbound's
|
|
1663
|
+
URLs; passed through env so log-outbound can backfill dm_links.message_id
|
|
1664
|
+
after RETURNING id. Returns True if log-outbound was invoked."""
|
|
1665
|
+
try:
|
|
1666
|
+
if dm_id is None:
|
|
1667
|
+
# chat_url -> dms.id lookup. Migrated 2026-05-12 from direct
|
|
1668
|
+
# SQL to GET /api/v1/dms?platform=reddit&chat_url=<url>&limit=1.
|
|
1669
|
+
# The route filters by exact chat_url and orders by
|
|
1670
|
+
# discovered_at DESC, which (like the legacy id DESC) picks
|
|
1671
|
+
# the most recent row when duplicates exist.
|
|
1672
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
1673
|
+
try:
|
|
1674
|
+
from http_api import api_get
|
|
1675
|
+
resp = api_get(
|
|
1676
|
+
"/api/v1/dms",
|
|
1677
|
+
query={"platform": "reddit", "chat_url": chat_url, "limit": 1},
|
|
1678
|
+
)
|
|
1679
|
+
rows = ((resp or {}).get("data") or {}).get("dms") or []
|
|
1680
|
+
except Exception as e:
|
|
1681
|
+
print(f"[reddit_browser] log-outbound chat_url lookup failed: {e}",
|
|
1682
|
+
file=sys.stderr)
|
|
1683
|
+
return False
|
|
1684
|
+
if not rows:
|
|
1685
|
+
print("[reddit_browser] log-outbound skipped: no dm_id and chat_url lookup miss",
|
|
1686
|
+
file=sys.stderr)
|
|
1687
|
+
return False
|
|
1688
|
+
dm_id = rows[0].get("id")
|
|
1689
|
+
if not dm_id:
|
|
1690
|
+
print("[reddit_browser] log-outbound skipped: API row missing id",
|
|
1691
|
+
file=sys.stderr)
|
|
1692
|
+
return False
|
|
1693
|
+
env = os.environ.copy()
|
|
1694
|
+
if minted_codes:
|
|
1695
|
+
env["WRAP_MINTED_CODES"] = ",".join(minted_codes)
|
|
1696
|
+
subprocess.run(
|
|
1697
|
+
["python3", os.path.join(os.path.dirname(os.path.abspath(__file__)), "dm_conversation.py"),
|
|
1698
|
+
"log-outbound", "--dm-id", str(dm_id), "--content", content, "--verified"],
|
|
1699
|
+
capture_output=True, text=True, timeout=20, env=env,
|
|
1700
|
+
)
|
|
1701
|
+
return True
|
|
1702
|
+
except Exception as e:
|
|
1703
|
+
print(f"[reddit_browser] internal log-outbound failed: {e}", file=sys.stderr)
|
|
1704
|
+
return False
|
|
1705
|
+
|
|
1706
|
+
|
|
1707
|
+
def send_dm(chat_url, message, dm_id=None):
|
|
1708
|
+
"""Send a message in a Reddit chat or PM thread.
|
|
1709
|
+
|
|
1710
|
+
For chat URLs (reddit.com/chat/...), navigates to the chat room and
|
|
1711
|
+
types/sends the message. For PM URLs, uses old.reddit.com message compose.
|
|
1712
|
+
|
|
1713
|
+
Active Reddit campaigns with a `suffix` are applied at this tool layer:
|
|
1714
|
+
the suffix is appended to `message` (per `sample_rate` coin flip per
|
|
1715
|
+
campaign) before typing, so the literal text is guaranteed to be
|
|
1716
|
+
delivered. After a verified send, logs via dm_conversation.py log-outbound
|
|
1717
|
+
so the campaign counter advances automatically (the CLI auto-detects the
|
|
1718
|
+
suffix in stored content).
|
|
1719
|
+
|
|
1720
|
+
`dm_id` (optional) is preferred over chat_url for the post-send log; many
|
|
1721
|
+
rows have empty `dms.chat_url` so the chat_url lookup misses.
|
|
1722
|
+
|
|
1723
|
+
Returns: {"ok": true, "thread_url": "...", "message_sent": "...",
|
|
1724
|
+
"applied_campaigns": [...]} or {"ok": false, "error": "..."}
|
|
1725
|
+
"""
|
|
1726
|
+
from playwright.sync_api import sync_playwright
|
|
1727
|
+
|
|
1728
|
+
# Tool-level URL wrap pass: every URL in the model's message gets minted
|
|
1729
|
+
# through dm_short_links.wrap_text so clicks attribute to this DM. Runs
|
|
1730
|
+
# BEFORE campaign-suffix injection. Refuses if any URL points at a project
|
|
1731
|
+
# not in dms.target_projects[]; the pipeline must set-target-project
|
|
1732
|
+
# --append before retrying.
|
|
1733
|
+
minted_link_codes = []
|
|
1734
|
+
if dm_id is not None:
|
|
1735
|
+
from dm_short_links import wrap_text as _wrap_text
|
|
1736
|
+
wrap_res = _wrap_text(dm_id=dm_id, text=message)
|
|
1737
|
+
if not wrap_res.get("ok"):
|
|
1738
|
+
return {
|
|
1739
|
+
"ok": False,
|
|
1740
|
+
"error": "link_wrap_failed",
|
|
1741
|
+
"wrap_error": wrap_res.get("error"),
|
|
1742
|
+
"needed_project": wrap_res.get("needed_project"),
|
|
1743
|
+
"url": wrap_res.get("url"),
|
|
1744
|
+
}
|
|
1745
|
+
message = wrap_res["text"]
|
|
1746
|
+
minted_link_codes = wrap_res.get("minted_codes", [])
|
|
1747
|
+
|
|
1748
|
+
# Tool-level campaign suffix injection (guaranteed delivery of literal text).
|
|
1749
|
+
applied_campaigns = []
|
|
1750
|
+
for cid, suffix, sample_rate in _load_active_reddit_campaigns_for_dm():
|
|
1751
|
+
if random.random() < sample_rate:
|
|
1752
|
+
message = message + suffix
|
|
1753
|
+
applied_campaigns.append(cid)
|
|
1754
|
+
_diag_msg = f"[send_dm] applied_campaigns={applied_campaigns} minted_links={minted_link_codes} message_len={len(message)} dm_id={dm_id}"
|
|
1755
|
+
print(_diag_msg, file=sys.stderr)
|
|
1756
|
+
_diag_log(_diag_msg)
|
|
1757
|
+
|
|
1758
|
+
with sync_playwright() as p:
|
|
1759
|
+
browser, page, is_cdp = get_browser_and_page(p)
|
|
1760
|
+
|
|
1761
|
+
try:
|
|
1762
|
+
is_chat = "/chat" in chat_url and "message" not in chat_url
|
|
1763
|
+
|
|
1764
|
+
if is_chat:
|
|
1765
|
+
# Reddit Chat (SPA)
|
|
1766
|
+
page.goto(chat_url, wait_until="domcontentloaded")
|
|
1767
|
+
page.wait_for_timeout(5000)
|
|
1768
|
+
|
|
1769
|
+
# Reddit Chat uses a textbox with placeholder "Message"
|
|
1770
|
+
msg_box = page.get_by_role("textbox", name="Write message")
|
|
1771
|
+
try:
|
|
1772
|
+
msg_box.wait_for(state="visible", timeout=10000)
|
|
1773
|
+
except Exception:
|
|
1774
|
+
# Fallback selectors
|
|
1775
|
+
msg_box = None
|
|
1776
|
+
for selector in [
|
|
1777
|
+
'textarea[placeholder*="Message"]',
|
|
1778
|
+
'[role="textbox"]',
|
|
1779
|
+
'div[contenteditable="true"]',
|
|
1780
|
+
]:
|
|
1781
|
+
try:
|
|
1782
|
+
el = page.locator(selector).last
|
|
1783
|
+
if el.is_visible():
|
|
1784
|
+
msg_box = el
|
|
1785
|
+
break
|
|
1786
|
+
except Exception:
|
|
1787
|
+
continue
|
|
1788
|
+
|
|
1789
|
+
if not msg_box:
|
|
1790
|
+
return {"ok": False, "error": "chat_input_not_found"}
|
|
1791
|
+
|
|
1792
|
+
# Check if textbox is disabled (no chat selected)
|
|
1793
|
+
is_disabled = msg_box.evaluate(
|
|
1794
|
+
"el => el.disabled || el.getAttribute('aria-disabled') === 'true'"
|
|
1795
|
+
)
|
|
1796
|
+
if is_disabled:
|
|
1797
|
+
return {"ok": False, "error": "chat_input_disabled_no_chat_selected"}
|
|
1798
|
+
|
|
1799
|
+
# Click and type the message
|
|
1800
|
+
msg_box.click()
|
|
1801
|
+
page.wait_for_timeout(500)
|
|
1802
|
+
|
|
1803
|
+
# Use keyboard.type for contenteditable, fill for textarea
|
|
1804
|
+
tag = msg_box.evaluate("el => el.tagName.toLowerCase()")
|
|
1805
|
+
if tag == "textarea":
|
|
1806
|
+
msg_box.fill(message)
|
|
1807
|
+
else:
|
|
1808
|
+
page.keyboard.type(message, delay=10)
|
|
1809
|
+
|
|
1810
|
+
page.wait_for_timeout(1000)
|
|
1811
|
+
|
|
1812
|
+
# Send via Enter key (Reddit Chat sends on Enter)
|
|
1813
|
+
page.keyboard.press("Enter")
|
|
1814
|
+
page.wait_for_timeout(3000)
|
|
1815
|
+
|
|
1816
|
+
# Verify message appeared in aria-labels
|
|
1817
|
+
msg_start = message[:50]
|
|
1818
|
+
verified = page.evaluate("""(msgStart) => {
|
|
1819
|
+
const body = document.body.textContent || '';
|
|
1820
|
+
return body.includes(msgStart);
|
|
1821
|
+
}""", msg_start)
|
|
1822
|
+
|
|
1823
|
+
if verified:
|
|
1824
|
+
_log_dm_outbound(chat_url, message, dm_id=dm_id, minted_codes=minted_link_codes)
|
|
1825
|
+
|
|
1826
|
+
return {
|
|
1827
|
+
"ok": verified,
|
|
1828
|
+
"thread_url": page.url,
|
|
1829
|
+
"verified": verified,
|
|
1830
|
+
"message_sent": message,
|
|
1831
|
+
"applied_campaigns": applied_campaigns,
|
|
1832
|
+
"minted_link_codes": minted_link_codes,
|
|
1833
|
+
"error": None if verified else "send_unverified_no_dom_confirmation",
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
else:
|
|
1837
|
+
# Traditional PM reply on old.reddit.com
|
|
1838
|
+
old_url = _to_old_reddit(chat_url)
|
|
1839
|
+
page.goto(old_url, wait_until="domcontentloaded")
|
|
1840
|
+
page.wait_for_timeout(3000)
|
|
1841
|
+
_ensure_old_reddit(page)
|
|
1842
|
+
|
|
1843
|
+
# Find the reply textarea in the PM thread
|
|
1844
|
+
reply_box = page.locator(
|
|
1845
|
+
".usertext-edit textarea, textarea[name='text']"
|
|
1846
|
+
).last
|
|
1847
|
+
|
|
1848
|
+
try:
|
|
1849
|
+
reply_box.wait_for(state="visible", timeout=5000)
|
|
1850
|
+
except Exception:
|
|
1851
|
+
return {"ok": False, "error": "pm_reply_box_not_found"}
|
|
1852
|
+
|
|
1853
|
+
reply_box.fill(message)
|
|
1854
|
+
page.wait_for_timeout(1000)
|
|
1855
|
+
|
|
1856
|
+
# Click save/submit
|
|
1857
|
+
save_btn = page.locator(
|
|
1858
|
+
"button[type='submit']:has-text('save'), "
|
|
1859
|
+
"button[type='submit']"
|
|
1860
|
+
).last
|
|
1861
|
+
|
|
1862
|
+
try:
|
|
1863
|
+
save_btn.click()
|
|
1864
|
+
except Exception:
|
|
1865
|
+
return {"ok": False, "error": "pm_save_button_not_found"}
|
|
1866
|
+
|
|
1867
|
+
page.wait_for_timeout(4000)
|
|
1868
|
+
|
|
1869
|
+
_log_dm_outbound(chat_url, message, dm_id=dm_id, minted_codes=minted_link_codes)
|
|
1870
|
+
|
|
1871
|
+
return {
|
|
1872
|
+
"ok": True,
|
|
1873
|
+
"thread_url": page.url,
|
|
1874
|
+
"verified": True,
|
|
1875
|
+
"message_sent": message,
|
|
1876
|
+
"applied_campaigns": applied_campaigns,
|
|
1877
|
+
"minted_link_codes": minted_link_codes,
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
finally:
|
|
1881
|
+
# Harness (is_cdp) tabs are REUSED across invocations, so never close
|
|
1882
|
+
# the page here: closing it forces the next run to new_page(), which
|
|
1883
|
+
# steals OS focus from whatever app is in front. Mirrors twitter_browser
|
|
1884
|
+
# (only the persistent-context fallback closes). cleanup_harness_tabs at
|
|
1885
|
+
# cycle start bounds tab count to one.
|
|
1886
|
+
try:
|
|
1887
|
+
if not is_cdp:
|
|
1888
|
+
page.context.close()
|
|
1889
|
+
except Exception:
|
|
1890
|
+
pass
|
|
1891
|
+
if not is_cdp:
|
|
1892
|
+
try:
|
|
1893
|
+
browser.close()
|
|
1894
|
+
except Exception:
|
|
1895
|
+
pass
|
|
1896
|
+
|
|
1897
|
+
|
|
1898
|
+
def compose_dm(recipient, subject, body):
|
|
1899
|
+
"""Compose and send a new Reddit DM/chat to a user.
|
|
1900
|
+
|
|
1901
|
+
Navigates to reddit.com/message/compose/?to=recipient and fills in
|
|
1902
|
+
the subject and body fields. Supports both old reddit and new reddit
|
|
1903
|
+
compose forms.
|
|
1904
|
+
|
|
1905
|
+
Returns: {"ok": true} or {"ok": false, "error": "..."}
|
|
1906
|
+
"""
|
|
1907
|
+
from playwright.sync_api import sync_playwright
|
|
1908
|
+
|
|
1909
|
+
with sync_playwright() as p:
|
|
1910
|
+
browser, page, is_cdp = get_browser_and_page(p)
|
|
1911
|
+
|
|
1912
|
+
try:
|
|
1913
|
+
# Use new reddit compose page directly (old reddit often redirects)
|
|
1914
|
+
compose_url = (
|
|
1915
|
+
f"https://www.reddit.com/message/compose/?to={recipient}"
|
|
1916
|
+
)
|
|
1917
|
+
page.goto(compose_url, wait_until="domcontentloaded")
|
|
1918
|
+
page.wait_for_timeout(3000)
|
|
1919
|
+
|
|
1920
|
+
# Check if we got redirected to new reddit chat
|
|
1921
|
+
if "chat" in page.url and "message/compose" not in page.url:
|
|
1922
|
+
# We're on new reddit chat - type and send
|
|
1923
|
+
page.wait_for_timeout(3000)
|
|
1924
|
+
|
|
1925
|
+
# Find the message input
|
|
1926
|
+
msg_box = None
|
|
1927
|
+
for selector in [
|
|
1928
|
+
'textarea',
|
|
1929
|
+
'div[contenteditable="true"]',
|
|
1930
|
+
'[role="textbox"]',
|
|
1931
|
+
]:
|
|
1932
|
+
try:
|
|
1933
|
+
el = page.locator(selector).last
|
|
1934
|
+
if el.is_visible():
|
|
1935
|
+
msg_box = el
|
|
1936
|
+
break
|
|
1937
|
+
except Exception:
|
|
1938
|
+
continue
|
|
1939
|
+
|
|
1940
|
+
if not msg_box:
|
|
1941
|
+
return {"ok": False, "error": "chat_input_not_found"}
|
|
1942
|
+
|
|
1943
|
+
full_msg = f"{subject}\n\n{body}" if subject else body
|
|
1944
|
+
msg_box.click()
|
|
1945
|
+
page.wait_for_timeout(500)
|
|
1946
|
+
|
|
1947
|
+
tag = msg_box.evaluate("el => el.tagName.toLowerCase()")
|
|
1948
|
+
if tag == "textarea":
|
|
1949
|
+
msg_box.fill(full_msg)
|
|
1950
|
+
else:
|
|
1951
|
+
page.keyboard.type(full_msg, delay=10)
|
|
1952
|
+
|
|
1953
|
+
page.wait_for_timeout(1000)
|
|
1954
|
+
|
|
1955
|
+
# Send
|
|
1956
|
+
try:
|
|
1957
|
+
send_btn = page.locator(
|
|
1958
|
+
'button[aria-label*="Send"], '
|
|
1959
|
+
'button:has-text("Send")'
|
|
1960
|
+
).first
|
|
1961
|
+
if send_btn.is_visible():
|
|
1962
|
+
send_btn.click()
|
|
1963
|
+
else:
|
|
1964
|
+
page.keyboard.press("Enter")
|
|
1965
|
+
except Exception:
|
|
1966
|
+
page.keyboard.press("Enter")
|
|
1967
|
+
|
|
1968
|
+
page.wait_for_timeout(3000)
|
|
1969
|
+
|
|
1970
|
+
# Verify message appeared in conversation DOM
|
|
1971
|
+
msg_start = full_msg[:50]
|
|
1972
|
+
verified = page.evaluate("""(msgStart) => {
|
|
1973
|
+
const body = document.body.textContent || '';
|
|
1974
|
+
return body.includes(msgStart);
|
|
1975
|
+
}""", msg_start)
|
|
1976
|
+
|
|
1977
|
+
return {
|
|
1978
|
+
"ok": verified,
|
|
1979
|
+
"thread_url": page.url,
|
|
1980
|
+
"verified": verified,
|
|
1981
|
+
"error": None if verified else "compose_unverified_no_dom_confirmation",
|
|
1982
|
+
}
|
|
1983
|
+
|
|
1984
|
+
elif "old.reddit.com" in page.url:
|
|
1985
|
+
# Old reddit compose form
|
|
1986
|
+
_ensure_old_reddit(page)
|
|
1987
|
+
|
|
1988
|
+
# Fill subject
|
|
1989
|
+
subject_input = page.locator(
|
|
1990
|
+
'input[name="subject"]'
|
|
1991
|
+
).first
|
|
1992
|
+
try:
|
|
1993
|
+
subject_input.wait_for(state="visible", timeout=3000)
|
|
1994
|
+
subject_input.fill(subject)
|
|
1995
|
+
except Exception:
|
|
1996
|
+
return {"ok": False, "error": "subject_field_not_found"}
|
|
1997
|
+
|
|
1998
|
+
# Fill body
|
|
1999
|
+
body_input = page.locator(
|
|
2000
|
+
'textarea[name="text"]'
|
|
2001
|
+
).first
|
|
2002
|
+
try:
|
|
2003
|
+
body_input.wait_for(state="visible", timeout=3000)
|
|
2004
|
+
body_input.fill(body)
|
|
2005
|
+
except Exception:
|
|
2006
|
+
return {"ok": False, "error": "body_field_not_found"}
|
|
2007
|
+
|
|
2008
|
+
page.wait_for_timeout(1000)
|
|
2009
|
+
|
|
2010
|
+
# Submit
|
|
2011
|
+
submit_btn = page.locator(
|
|
2012
|
+
'button[type="submit"]'
|
|
2013
|
+
).first
|
|
2014
|
+
try:
|
|
2015
|
+
submit_btn.click()
|
|
2016
|
+
except Exception:
|
|
2017
|
+
return {"ok": False, "error": "submit_button_not_found"}
|
|
2018
|
+
|
|
2019
|
+
page.wait_for_timeout(4000)
|
|
2020
|
+
|
|
2021
|
+
# Check for success (redirects to sent messages)
|
|
2022
|
+
if "sent" in page.url or "message" in page.url:
|
|
2023
|
+
return {"ok": True, "thread_url": page.url}
|
|
2024
|
+
|
|
2025
|
+
# Check for errors
|
|
2026
|
+
error_el = page.locator(".error").first
|
|
2027
|
+
try:
|
|
2028
|
+
if error_el.is_visible():
|
|
2029
|
+
return {
|
|
2030
|
+
"ok": False,
|
|
2031
|
+
"error": (error_el.text_content() or ""),
|
|
2032
|
+
}
|
|
2033
|
+
except Exception:
|
|
2034
|
+
pass
|
|
2035
|
+
|
|
2036
|
+
return {"ok": True, "thread_url": page.url}
|
|
2037
|
+
|
|
2038
|
+
else:
|
|
2039
|
+
# New reddit compose form (www.reddit.com/message/compose)
|
|
2040
|
+
# Reddit uses faceplate-text-input / faceplate-textarea-input
|
|
2041
|
+
# web components with shadow DOMs containing real inputs.
|
|
2042
|
+
|
|
2043
|
+
page.wait_for_timeout(4000)
|
|
2044
|
+
|
|
2045
|
+
# Fill form fields via shadow DOM inputs
|
|
2046
|
+
fill_result = page.evaluate("""(args) => {
|
|
2047
|
+
const {recipient, subject, body} = args;
|
|
2048
|
+
|
|
2049
|
+
// Helper: find real input inside shadow root
|
|
2050
|
+
function findShadowInput(host) {
|
|
2051
|
+
if (!host || !host.shadowRoot) return null;
|
|
2052
|
+
return host.shadowRoot.querySelector('input, textarea');
|
|
2053
|
+
}
|
|
2054
|
+
|
|
2055
|
+
// Helper: set value with native setter + events
|
|
2056
|
+
function setVal(el, value) {
|
|
2057
|
+
const proto = el.tagName === 'TEXTAREA'
|
|
2058
|
+
? HTMLTextAreaElement.prototype
|
|
2059
|
+
: HTMLInputElement.prototype;
|
|
2060
|
+
const setter = Object.getOwnPropertyDescriptor(proto, 'value').set;
|
|
2061
|
+
setter.call(el, value);
|
|
2062
|
+
el.dispatchEvent(new Event('input', { bubbles: true, composed: true }));
|
|
2063
|
+
el.dispatchEvent(new Event('change', { bubbles: true, composed: true }));
|
|
2064
|
+
}
|
|
2065
|
+
|
|
2066
|
+
// Deep search through shadow roots
|
|
2067
|
+
function deepQuery(root, selector) {
|
|
2068
|
+
let result = root.querySelector(selector);
|
|
2069
|
+
if (result) return result;
|
|
2070
|
+
const all = root.querySelectorAll('*');
|
|
2071
|
+
for (const el of all) {
|
|
2072
|
+
if (el.shadowRoot) {
|
|
2073
|
+
result = deepQuery(el.shadowRoot, selector);
|
|
2074
|
+
if (result) return result;
|
|
2075
|
+
}
|
|
2076
|
+
}
|
|
2077
|
+
return null;
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
// Find the faceplate custom elements (may be in shadow DOM)
|
|
2081
|
+
const recipientHost = deepQuery(document, 'faceplate-text-input[name="message-recipient-input"]');
|
|
2082
|
+
const titleHost = deepQuery(document, 'faceplate-text-input[name="message-title"]');
|
|
2083
|
+
const messageHost = deepQuery(document, 'faceplate-textarea-input[name="message-content"]');
|
|
2084
|
+
|
|
2085
|
+
if (!recipientHost || !titleHost || !messageHost) {
|
|
2086
|
+
// Debug: check what's on the page
|
|
2087
|
+
const url = window.location.href;
|
|
2088
|
+
const title_text = document.title;
|
|
2089
|
+
const bodyText = (document.body ? document.body.textContent : '').substring(0, 500);
|
|
2090
|
+
return {ok: false, error: 'faceplate_elements_not_found',
|
|
2091
|
+
found: {recipient: !!recipientHost, title: !!titleHost, message: !!messageHost},
|
|
2092
|
+
debug: {url, title_text, bodyText}};
|
|
2093
|
+
}
|
|
2094
|
+
|
|
2095
|
+
const recipientInput = findShadowInput(recipientHost);
|
|
2096
|
+
const titleInput = findShadowInput(titleHost);
|
|
2097
|
+
const messageInput = findShadowInput(messageHost);
|
|
2098
|
+
|
|
2099
|
+
if (!recipientInput || !titleInput || !messageInput) {
|
|
2100
|
+
return {ok: false, error: 'shadow_inputs_not_found',
|
|
2101
|
+
found: {recipient: !!recipientInput, title: !!titleInput, message: !!messageInput}};
|
|
2102
|
+
}
|
|
2103
|
+
|
|
2104
|
+
// Fill recipient if needed
|
|
2105
|
+
if (!recipientInput.value || recipientInput.value.trim() !== recipient) {
|
|
2106
|
+
setVal(recipientInput, recipient);
|
|
2107
|
+
recipientHost.setAttribute('value', recipient);
|
|
2108
|
+
}
|
|
2109
|
+
|
|
2110
|
+
// Fill title
|
|
2111
|
+
setVal(titleInput, subject);
|
|
2112
|
+
titleHost.setAttribute('value', subject);
|
|
2113
|
+
|
|
2114
|
+
// Fill message
|
|
2115
|
+
setVal(messageInput, body);
|
|
2116
|
+
messageHost.setAttribute('value', body);
|
|
2117
|
+
|
|
2118
|
+
return {ok: true};
|
|
2119
|
+
}""", {"recipient": recipient, "subject": subject, "body": body})
|
|
2120
|
+
|
|
2121
|
+
if not fill_result.get("ok"):
|
|
2122
|
+
return {"ok": False, "error": fill_result.get("error", "js_fill_failed")}
|
|
2123
|
+
|
|
2124
|
+
page.wait_for_timeout(1500)
|
|
2125
|
+
|
|
2126
|
+
# Click Send button
|
|
2127
|
+
send_clicked = page.evaluate("""() => {
|
|
2128
|
+
// Search in shadow roots too
|
|
2129
|
+
function findButtons(root) {
|
|
2130
|
+
const btns = [];
|
|
2131
|
+
root.querySelectorAll('button').forEach(b => btns.push(b));
|
|
2132
|
+
root.querySelectorAll('*').forEach(el => {
|
|
2133
|
+
if (el.shadowRoot) {
|
|
2134
|
+
el.shadowRoot.querySelectorAll('button').forEach(b => btns.push(b));
|
|
2135
|
+
}
|
|
2136
|
+
});
|
|
2137
|
+
return btns;
|
|
2138
|
+
}
|
|
2139
|
+
const buttons = findButtons(document);
|
|
2140
|
+
for (const btn of buttons) {
|
|
2141
|
+
const text = (btn.textContent || '').trim().toLowerCase();
|
|
2142
|
+
if (text === 'send' && !btn.disabled) {
|
|
2143
|
+
btn.click();
|
|
2144
|
+
return {ok: true};
|
|
2145
|
+
}
|
|
2146
|
+
}
|
|
2147
|
+
for (const btn of buttons) {
|
|
2148
|
+
const text = (btn.textContent || '').trim().toLowerCase();
|
|
2149
|
+
if (text === 'send') {
|
|
2150
|
+
btn.click();
|
|
2151
|
+
return {ok: true, was_disabled: true};
|
|
2152
|
+
}
|
|
2153
|
+
}
|
|
2154
|
+
return {ok: false, error: 'send_button_not_found'};
|
|
2155
|
+
}""")
|
|
2156
|
+
|
|
2157
|
+
if not send_clicked.get("ok"):
|
|
2158
|
+
return {"ok": False, "error": "send_button_not_found"}
|
|
2159
|
+
|
|
2160
|
+
page.wait_for_timeout(4000)
|
|
2161
|
+
|
|
2162
|
+
# Check for "Message sent" confirmation
|
|
2163
|
+
try:
|
|
2164
|
+
page_text = page.text_content("body") or ""
|
|
2165
|
+
if "Message sent" in page_text:
|
|
2166
|
+
return {"ok": True, "thread_url": page.url}
|
|
2167
|
+
except Exception:
|
|
2168
|
+
pass
|
|
2169
|
+
|
|
2170
|
+
# Check for error messages
|
|
2171
|
+
try:
|
|
2172
|
+
error_el = page.locator('[role="alert"]').first
|
|
2173
|
+
if error_el.is_visible():
|
|
2174
|
+
return {
|
|
2175
|
+
"ok": False,
|
|
2176
|
+
"error": (error_el.text_content() or ""),
|
|
2177
|
+
}
|
|
2178
|
+
except Exception:
|
|
2179
|
+
pass
|
|
2180
|
+
|
|
2181
|
+
# If we're still on compose page, assume success
|
|
2182
|
+
if "message" in page.url:
|
|
2183
|
+
return {"ok": True, "thread_url": page.url}
|
|
2184
|
+
|
|
2185
|
+
return {"ok": True, "thread_url": page.url}
|
|
2186
|
+
|
|
2187
|
+
finally:
|
|
2188
|
+
# Harness (is_cdp) tabs are REUSED across invocations, so never close
|
|
2189
|
+
# the page here: closing it forces the next run to new_page(), which
|
|
2190
|
+
# steals OS focus from whatever app is in front. Mirrors twitter_browser
|
|
2191
|
+
# (only the persistent-context fallback closes). cleanup_harness_tabs at
|
|
2192
|
+
# cycle start bounds tab count to one.
|
|
2193
|
+
try:
|
|
2194
|
+
if not is_cdp:
|
|
2195
|
+
page.context.close()
|
|
2196
|
+
except Exception:
|
|
2197
|
+
pass
|
|
2198
|
+
if not is_cdp:
|
|
2199
|
+
try:
|
|
2200
|
+
browser.close()
|
|
2201
|
+
except Exception:
|
|
2202
|
+
pass
|
|
2203
|
+
|
|
2204
|
+
|
|
2205
|
+
def scrape_views(username, max_scrolls=300):
|
|
2206
|
+
"""Scrape Reddit view counts from the user's profile pages.
|
|
2207
|
+
|
|
2208
|
+
Navigates to 4 profile page variants (comments sorted by top/new,
|
|
2209
|
+
submitted sorted by top/new) and extracts view counts from articles.
|
|
2210
|
+
|
|
2211
|
+
Returns: {"ok": true, "total": N, "with_views": N, "results": [{url, views}]}
|
|
2212
|
+
"""
|
|
2213
|
+
from playwright.sync_api import sync_playwright
|
|
2214
|
+
|
|
2215
|
+
profile_urls = [
|
|
2216
|
+
f"https://www.reddit.com/user/{username}/comments/?sort=top",
|
|
2217
|
+
f"https://www.reddit.com/user/{username}/comments/?sort=new",
|
|
2218
|
+
f"https://www.reddit.com/user/{username}/submitted/?sort=top&t=all",
|
|
2219
|
+
f"https://www.reddit.com/user/{username}/submitted/?sort=new",
|
|
2220
|
+
]
|
|
2221
|
+
|
|
2222
|
+
# Extract per-article: url (permalink), views (via visible text scan),
|
|
2223
|
+
# score + comment-count. Sources:
|
|
2224
|
+
# Thread rows: <shreddit-post> SSR attrs → score + comment-count
|
|
2225
|
+
# Comment rows: <shreddit-comment-action-row> nested in
|
|
2226
|
+
# <shreddit-profile-comment> → score (no reply count)
|
|
2227
|
+
extract_js = """() => {
|
|
2228
|
+
const results = [];
|
|
2229
|
+
document.querySelectorAll("article").forEach(article => {
|
|
2230
|
+
const post = article.querySelector("shreddit-post");
|
|
2231
|
+
let url = null;
|
|
2232
|
+
let score = null;
|
|
2233
|
+
let commentsCount = null;
|
|
2234
|
+
if (post) {
|
|
2235
|
+
const permalink = post.getAttribute("permalink");
|
|
2236
|
+
if (permalink) url = permalink;
|
|
2237
|
+
const s = post.getAttribute("score");
|
|
2238
|
+
if (s !== null && s !== "") {
|
|
2239
|
+
const n = parseInt(s, 10);
|
|
2240
|
+
if (!Number.isNaN(n)) score = n;
|
|
2241
|
+
}
|
|
2242
|
+
const cc = post.getAttribute("comment-count");
|
|
2243
|
+
if (cc !== null && cc !== "") {
|
|
2244
|
+
const n = parseInt(cc, 10);
|
|
2245
|
+
if (!Number.isNaN(n)) commentsCount = n;
|
|
2246
|
+
}
|
|
2247
|
+
} else {
|
|
2248
|
+
const row = article.querySelector("shreddit-comment-action-row");
|
|
2249
|
+
if (row) {
|
|
2250
|
+
const permalink = row.getAttribute("permalink");
|
|
2251
|
+
if (permalink) url = permalink;
|
|
2252
|
+
const s = row.getAttribute("score");
|
|
2253
|
+
if (s !== null && s !== "") {
|
|
2254
|
+
const n = parseInt(s, 10);
|
|
2255
|
+
if (!Number.isNaN(n)) score = n;
|
|
2256
|
+
}
|
|
2257
|
+
}
|
|
2258
|
+
}
|
|
2259
|
+
if (!url) {
|
|
2260
|
+
const links = article.querySelectorAll('a[href*="/comments/"]');
|
|
2261
|
+
for (const link of links) {
|
|
2262
|
+
const href = link.getAttribute("href");
|
|
2263
|
+
if (href && href.includes("/comments/")) {
|
|
2264
|
+
if (!url || href.includes("/comment/")) url = href;
|
|
2265
|
+
}
|
|
2266
|
+
}
|
|
2267
|
+
}
|
|
2268
|
+
let views = null;
|
|
2269
|
+
for (const el of article.querySelectorAll("*")) {
|
|
2270
|
+
const text = el.textContent.trim();
|
|
2271
|
+
const match = text.match(/^([\\d,.]+)\\s*([KkMm])?\\s+views?$/);
|
|
2272
|
+
if (match) {
|
|
2273
|
+
let v = parseFloat(match[1].replace(/,/g, ""));
|
|
2274
|
+
if (match[2] && match[2].toLowerCase() === "k") v *= 1000;
|
|
2275
|
+
if (match[2] && match[2].toLowerCase() === "m") v *= 1000000;
|
|
2276
|
+
views = Math.round(v);
|
|
2277
|
+
break;
|
|
2278
|
+
}
|
|
2279
|
+
}
|
|
2280
|
+
if (url) {
|
|
2281
|
+
results.push({
|
|
2282
|
+
url: url.startsWith("http") ? url : "https://www.reddit.com" + url,
|
|
2283
|
+
views: views,
|
|
2284
|
+
score: score,
|
|
2285
|
+
comments_count: commentsCount,
|
|
2286
|
+
});
|
|
2287
|
+
}
|
|
2288
|
+
});
|
|
2289
|
+
return results;
|
|
2290
|
+
}"""
|
|
2291
|
+
|
|
2292
|
+
# url -> {views, score, comments_count} — keep non-null values across pages
|
|
2293
|
+
all_results = {}
|
|
2294
|
+
|
|
2295
|
+
with sync_playwright() as p:
|
|
2296
|
+
browser, page, is_cdp = get_browser_and_page(p)
|
|
2297
|
+
|
|
2298
|
+
try:
|
|
2299
|
+
for page_url in profile_urls:
|
|
2300
|
+
page.goto(page_url, wait_until="domcontentloaded")
|
|
2301
|
+
page.wait_for_timeout(3000)
|
|
2302
|
+
|
|
2303
|
+
def merge_items(items):
|
|
2304
|
+
for item in items:
|
|
2305
|
+
url = item["url"]
|
|
2306
|
+
prev = all_results.get(url)
|
|
2307
|
+
if prev is None:
|
|
2308
|
+
all_results[url] = {
|
|
2309
|
+
"views": item.get("views"),
|
|
2310
|
+
"score": item.get("score"),
|
|
2311
|
+
"comments_count": item.get("comments_count"),
|
|
2312
|
+
}
|
|
2313
|
+
continue
|
|
2314
|
+
# Keep non-null values across repeated sightings.
|
|
2315
|
+
for k in ("views", "score", "comments_count"):
|
|
2316
|
+
v = item.get(k)
|
|
2317
|
+
if v is not None:
|
|
2318
|
+
prev[k] = v
|
|
2319
|
+
|
|
2320
|
+
merge_items(page.evaluate(extract_js))
|
|
2321
|
+
|
|
2322
|
+
# Scroll to load more
|
|
2323
|
+
prev_height = 0
|
|
2324
|
+
same_count = 0
|
|
2325
|
+
scroll_count = 0
|
|
2326
|
+
per_page_max = max_scrolls // 4
|
|
2327
|
+
|
|
2328
|
+
while same_count < 4 and scroll_count < per_page_max:
|
|
2329
|
+
cur_height = page.evaluate("document.body.scrollHeight")
|
|
2330
|
+
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
|
2331
|
+
page.wait_for_timeout(2000)
|
|
2332
|
+
|
|
2333
|
+
merge_items(page.evaluate(extract_js))
|
|
2334
|
+
|
|
2335
|
+
if cur_height == prev_height:
|
|
2336
|
+
same_count += 1
|
|
2337
|
+
else:
|
|
2338
|
+
same_count = 0
|
|
2339
|
+
prev_height = cur_height
|
|
2340
|
+
scroll_count += 1
|
|
2341
|
+
|
|
2342
|
+
results_list = [
|
|
2343
|
+
{"url": url, "views": d.get("views"),
|
|
2344
|
+
"score": d.get("score"), "comments_count": d.get("comments_count")}
|
|
2345
|
+
for url, d in all_results.items()
|
|
2346
|
+
]
|
|
2347
|
+
with_views = sum(1 for d in all_results.values() if d.get("views") is not None)
|
|
2348
|
+
with_score = sum(1 for d in all_results.values() if d.get("score") is not None)
|
|
2349
|
+
with_cc = sum(1 for d in all_results.values() if d.get("comments_count") is not None)
|
|
2350
|
+
|
|
2351
|
+
return {
|
|
2352
|
+
"ok": True,
|
|
2353
|
+
"total": len(results_list),
|
|
2354
|
+
"with_views": with_views,
|
|
2355
|
+
"with_score": with_score,
|
|
2356
|
+
"with_comments_count": with_cc,
|
|
2357
|
+
"results": results_list,
|
|
2358
|
+
}
|
|
2359
|
+
|
|
2360
|
+
except Exception as e:
|
|
2361
|
+
return {"ok": False, "error": str(e)}
|
|
2362
|
+
finally:
|
|
2363
|
+
# Harness (is_cdp) tabs are REUSED across invocations, so never close
|
|
2364
|
+
# the page here: closing it forces the next run to new_page(), which
|
|
2365
|
+
# steals OS focus from whatever app is in front. Mirrors twitter_browser
|
|
2366
|
+
# (only the persistent-context fallback closes). cleanup_harness_tabs at
|
|
2367
|
+
# cycle start bounds tab count to one.
|
|
2368
|
+
try:
|
|
2369
|
+
if not is_cdp:
|
|
2370
|
+
page.context.close()
|
|
2371
|
+
except Exception:
|
|
2372
|
+
pass
|
|
2373
|
+
if not is_cdp:
|
|
2374
|
+
try:
|
|
2375
|
+
browser.close()
|
|
2376
|
+
except Exception:
|
|
2377
|
+
pass
|
|
2378
|
+
|
|
2379
|
+
|
|
2380
|
+
def main():
|
|
2381
|
+
if len(sys.argv) < 2:
|
|
2382
|
+
print(__doc__)
|
|
2383
|
+
sys.exit(1)
|
|
2384
|
+
|
|
2385
|
+
cmd = sys.argv[1]
|
|
2386
|
+
|
|
2387
|
+
if cmd == "post-comment":
|
|
2388
|
+
if len(sys.argv) < 4:
|
|
2389
|
+
print(
|
|
2390
|
+
"Usage: reddit_browser.py post-comment <thread_url> <text>",
|
|
2391
|
+
file=sys.stderr,
|
|
2392
|
+
)
|
|
2393
|
+
sys.exit(1)
|
|
2394
|
+
result = post_comment(sys.argv[2], sys.argv[3])
|
|
2395
|
+
print(json.dumps(result, indent=2))
|
|
2396
|
+
try:
|
|
2397
|
+
print(f"[reddit_browser] result={json.dumps(result)}", file=sys.stderr)
|
|
2398
|
+
except Exception:
|
|
2399
|
+
pass
|
|
2400
|
+
|
|
2401
|
+
elif cmd == "reply":
|
|
2402
|
+
if len(sys.argv) < 4:
|
|
2403
|
+
print(
|
|
2404
|
+
"Usage: reddit_browser.py reply <comment_permalink> <text> [dm_id]",
|
|
2405
|
+
file=sys.stderr,
|
|
2406
|
+
)
|
|
2407
|
+
sys.exit(1)
|
|
2408
|
+
dm_id_arg = None
|
|
2409
|
+
if len(sys.argv) >= 5 and sys.argv[4].strip():
|
|
2410
|
+
try:
|
|
2411
|
+
dm_id_arg = int(sys.argv[4])
|
|
2412
|
+
except ValueError:
|
|
2413
|
+
print(f"[reply] ignoring non-int dm_id arg: {sys.argv[4]!r}", file=sys.stderr)
|
|
2414
|
+
result = reply_to_comment(sys.argv[2], sys.argv[3], dm_id=dm_id_arg)
|
|
2415
|
+
print(json.dumps(result, indent=2))
|
|
2416
|
+
|
|
2417
|
+
elif cmd == "edit":
|
|
2418
|
+
if len(sys.argv) < 4:
|
|
2419
|
+
print(
|
|
2420
|
+
"Usage: reddit_browser.py edit <comment_permalink> <new_text>",
|
|
2421
|
+
file=sys.stderr,
|
|
2422
|
+
)
|
|
2423
|
+
sys.exit(1)
|
|
2424
|
+
result = edit_comment(sys.argv[2], sys.argv[3])
|
|
2425
|
+
print(json.dumps(result, indent=2))
|
|
2426
|
+
|
|
2427
|
+
elif cmd == "edit-thread":
|
|
2428
|
+
if len(sys.argv) < 4:
|
|
2429
|
+
print(
|
|
2430
|
+
"Usage: reddit_browser.py edit-thread <thread_permalink> <new_body>",
|
|
2431
|
+
file=sys.stderr,
|
|
2432
|
+
)
|
|
2433
|
+
sys.exit(1)
|
|
2434
|
+
result = edit_thread(sys.argv[2], sys.argv[3])
|
|
2435
|
+
print(json.dumps(result, indent=2))
|
|
2436
|
+
|
|
2437
|
+
elif cmd == "unread-dms":
|
|
2438
|
+
result = unread_dms()
|
|
2439
|
+
print(json.dumps(result, indent=2))
|
|
2440
|
+
|
|
2441
|
+
elif cmd == "read-conversation":
|
|
2442
|
+
if len(sys.argv) < 3:
|
|
2443
|
+
print(
|
|
2444
|
+
"Usage: reddit_browser.py read-conversation <chat_url>",
|
|
2445
|
+
file=sys.stderr,
|
|
2446
|
+
)
|
|
2447
|
+
sys.exit(1)
|
|
2448
|
+
result = read_conversation(sys.argv[2])
|
|
2449
|
+
print(json.dumps(result, indent=2))
|
|
2450
|
+
|
|
2451
|
+
elif cmd == "send-dm":
|
|
2452
|
+
if len(sys.argv) < 4:
|
|
2453
|
+
print(
|
|
2454
|
+
"Usage: reddit_browser.py send-dm <chat_url> <message> [dm_id]",
|
|
2455
|
+
file=sys.stderr,
|
|
2456
|
+
)
|
|
2457
|
+
sys.exit(1)
|
|
2458
|
+
dm_id_arg = None
|
|
2459
|
+
if len(sys.argv) >= 5 and sys.argv[4].strip():
|
|
2460
|
+
try:
|
|
2461
|
+
dm_id_arg = int(sys.argv[4])
|
|
2462
|
+
except ValueError:
|
|
2463
|
+
print(f"[send-dm] ignoring non-int dm_id arg: {sys.argv[4]!r}", file=sys.stderr)
|
|
2464
|
+
result = send_dm(sys.argv[2], sys.argv[3], dm_id=dm_id_arg)
|
|
2465
|
+
print(json.dumps(result, indent=2))
|
|
2466
|
+
|
|
2467
|
+
elif cmd == "compose-dm":
|
|
2468
|
+
if len(sys.argv) < 5:
|
|
2469
|
+
print(
|
|
2470
|
+
"Usage: reddit_browser.py compose-dm <recipient> <subject> <body>",
|
|
2471
|
+
file=sys.stderr,
|
|
2472
|
+
)
|
|
2473
|
+
sys.exit(1)
|
|
2474
|
+
result = compose_dm(sys.argv[2], sys.argv[3], sys.argv[4])
|
|
2475
|
+
print(json.dumps(result, indent=2))
|
|
2476
|
+
|
|
2477
|
+
elif cmd == "scrape-views":
|
|
2478
|
+
if len(sys.argv) < 3:
|
|
2479
|
+
print(
|
|
2480
|
+
"Usage: reddit_browser.py scrape-views <username> [max_scrolls]",
|
|
2481
|
+
file=sys.stderr,
|
|
2482
|
+
)
|
|
2483
|
+
sys.exit(1)
|
|
2484
|
+
max_scrolls = int(sys.argv[3]) if len(sys.argv) > 3 else 300
|
|
2485
|
+
result = scrape_views(sys.argv[2], max_scrolls)
|
|
2486
|
+
print(json.dumps(result, indent=2))
|
|
2487
|
+
|
|
2488
|
+
else:
|
|
2489
|
+
print(f"Unknown command: {cmd}", file=sys.stderr)
|
|
2490
|
+
print(__doc__)
|
|
2491
|
+
sys.exit(1)
|
|
2492
|
+
|
|
2493
|
+
|
|
2494
|
+
def _install_stderr_tee():
|
|
2495
|
+
"""Mirror stderr to <repo>/log/reddit_browser.<utc>.<pid>.err.
|
|
2496
|
+
|
|
2497
|
+
Each reddit_browser.py invocation is captured by the parent post_reddit.py
|
|
2498
|
+
with stderr=PIPE; when the child raises before printing JSON to stdout, the
|
|
2499
|
+
parent prints only a truncated slice of the captured stderr (post_reddit
|
|
2500
|
+
historically clipped at 200 chars). Teeing here keeps the full traceback
|
|
2501
|
+
on disk regardless of parent-side handling.
|
|
2502
|
+
"""
|
|
2503
|
+
try:
|
|
2504
|
+
repo_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
2505
|
+
log_dir = os.path.join(repo_dir, "log")
|
|
2506
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
2507
|
+
ts = time.strftime("%Y%m%d-%H%M%S", time.gmtime())
|
|
2508
|
+
path = os.path.join(log_dir, f"reddit_browser.{ts}.{os.getpid()}.err")
|
|
2509
|
+
fh = open(path, "a", encoding="utf-8", buffering=1)
|
|
2510
|
+
cmd_repr = " ".join(sys.argv[1:3])[:160] if len(sys.argv) > 1 else "(no cmd)"
|
|
2511
|
+
fh.write(f"--- reddit_browser invocation pid={os.getpid()} cmd={cmd_repr!r} ts={ts} ---\n")
|
|
2512
|
+
fh.flush()
|
|
2513
|
+
real_stderr = sys.stderr
|
|
2514
|
+
|
|
2515
|
+
class _Tee:
|
|
2516
|
+
def write(self, s):
|
|
2517
|
+
try:
|
|
2518
|
+
fh.write(s)
|
|
2519
|
+
except Exception:
|
|
2520
|
+
pass
|
|
2521
|
+
return real_stderr.write(s)
|
|
2522
|
+
|
|
2523
|
+
def flush(self):
|
|
2524
|
+
try:
|
|
2525
|
+
fh.flush()
|
|
2526
|
+
except Exception:
|
|
2527
|
+
pass
|
|
2528
|
+
return real_stderr.flush()
|
|
2529
|
+
|
|
2530
|
+
def __getattr__(self, name):
|
|
2531
|
+
return getattr(real_stderr, name)
|
|
2532
|
+
|
|
2533
|
+
sys.stderr = _Tee()
|
|
2534
|
+
|
|
2535
|
+
def _close():
|
|
2536
|
+
try:
|
|
2537
|
+
fh.close()
|
|
2538
|
+
except Exception:
|
|
2539
|
+
pass
|
|
2540
|
+
|
|
2541
|
+
atexit.register(_close)
|
|
2542
|
+
except Exception:
|
|
2543
|
+
# Never let logging setup kill the invocation.
|
|
2544
|
+
pass
|
|
2545
|
+
|
|
2546
|
+
|
|
2547
|
+
if __name__ == "__main__":
|
|
2548
|
+
_install_stderr_tee()
|
|
2549
|
+
main()
|