@m13v/s4l 1.6.197-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1314 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +497 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Scan GitHub issues for new replies to our comments.
|
|
3
|
+
|
|
4
|
+
Finds all issues we've commented on, checks for new comments from other users,
|
|
5
|
+
inserts into `replies` table as 'pending' or 'skipped'.
|
|
6
|
+
|
|
7
|
+
Works by scanning via thread_url + gh API - doesn't require our_url to be set.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
import time
|
|
16
|
+
|
|
17
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
18
|
+
from http_api import api_get, api_post
|
|
19
|
+
|
|
20
|
+
MIN_WORDS = 5
|
|
21
|
+
CONFIG_PATH = os.path.expanduser("~/social-autoposter/config.json")
|
|
22
|
+
|
|
23
|
+
# NOTE: posts/replies for GitHub live under platform='github' in the DB; the
|
|
24
|
+
# 'github_issues' value used here matches zero rows, so Phase A has long been a
|
|
25
|
+
# no-op. Preserved verbatim during the HTTP-only migration to avoid an
|
|
26
|
+
# unrequested volume/cost change (switching to 'github' would suddenly scan all
|
|
27
|
+
# ~6.8k GitHub posts). If you want to actually scan GitHub replies, flip
|
|
28
|
+
# SCAN_PLATFORM to 'github' deliberately.
|
|
29
|
+
SCAN_PLATFORM = "github_issues"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_config():
|
|
33
|
+
if os.path.exists(CONFIG_PATH):
|
|
34
|
+
with open(CONFIG_PATH) as f:
|
|
35
|
+
return json.load(f)
|
|
36
|
+
return {}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def word_count(text):
|
|
40
|
+
return len(text.split()) if text else 0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def main():
|
|
44
|
+
config = load_config()
|
|
45
|
+
github_user = config.get("accounts", {}).get("github", {}).get("username", "m13v")
|
|
46
|
+
|
|
47
|
+
# Get all active GitHub posts we've commented on. The posts GET returns id +
|
|
48
|
+
# thread_url together, so we capture the post_id map here and skip the
|
|
49
|
+
# per-thread lookup the direct-SQL version used to do.
|
|
50
|
+
resp = api_get("/api/v1/posts",
|
|
51
|
+
query={"platform": SCAN_PLATFORM, "status": "active", "limit": 500})
|
|
52
|
+
rows = ((resp or {}).get("data") or {}).get("posts") or []
|
|
53
|
+
|
|
54
|
+
issues = {}
|
|
55
|
+
post_id_by_url = {}
|
|
56
|
+
for row in rows:
|
|
57
|
+
url = row.get("thread_url")
|
|
58
|
+
if not url:
|
|
59
|
+
continue
|
|
60
|
+
# First post per thread_url wins (mirrors the old "use the first one").
|
|
61
|
+
post_id_by_url.setdefault(url, row.get("id"))
|
|
62
|
+
match = re.match(r"https://github\.com/([^/]+/[^/]+)/issues/(\d+)", url)
|
|
63
|
+
if match:
|
|
64
|
+
repo = match.group(1)
|
|
65
|
+
issue_num = match.group(2)
|
|
66
|
+
issues[f"{repo}/{issue_num}"] = url
|
|
67
|
+
|
|
68
|
+
# Load exclusions
|
|
69
|
+
excluded_authors = {a.lower() for a in config.get("exclusions", {}).get("authors", [])}
|
|
70
|
+
excluded_repos = {r.lower() for r in config.get("exclusions", {}).get("github_repos", [])}
|
|
71
|
+
|
|
72
|
+
# Filter out issues from excluded repos
|
|
73
|
+
issues = {k: v for k, v in issues.items()
|
|
74
|
+
if not any(repo_pat in k.lower() for repo_pat in excluded_repos)}
|
|
75
|
+
|
|
76
|
+
print(f"Scanning {len(issues)} GitHub issues for replies...")
|
|
77
|
+
|
|
78
|
+
discovered = 0
|
|
79
|
+
skipped = 0
|
|
80
|
+
errors = 0
|
|
81
|
+
|
|
82
|
+
for issue_key, thread_url in issues.items():
|
|
83
|
+
repo, issue_num = issue_key.rsplit("/", 1)
|
|
84
|
+
|
|
85
|
+
# post_id captured alongside thread_url in the posts GET above.
|
|
86
|
+
post_id = post_id_by_url.get(thread_url)
|
|
87
|
+
if not post_id:
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
# Fetch all comments on the issue
|
|
91
|
+
try:
|
|
92
|
+
result = subprocess.run(
|
|
93
|
+
["gh", "api", f"repos/{repo}/issues/{issue_num}/comments",
|
|
94
|
+
"--jq", f'[.[] | {{id: .id, user: .user.login, body: .body, url: .html_url, created: .created_at}}]'],
|
|
95
|
+
capture_output=True, text=True, timeout=15
|
|
96
|
+
)
|
|
97
|
+
if result.returncode != 0:
|
|
98
|
+
errors += 1
|
|
99
|
+
continue
|
|
100
|
+
comments = json.loads(result.stdout) if result.stdout.strip() else []
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f" ERROR scanning {issue_key}: {e}")
|
|
103
|
+
errors += 1
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
# Find our comments to know their timestamps
|
|
107
|
+
our_comments = [c for c in comments if c.get("user") == github_user]
|
|
108
|
+
other_comments = [c for c in comments if c.get("user") != github_user]
|
|
109
|
+
|
|
110
|
+
if not our_comments:
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
# Get the timestamp of our first comment
|
|
114
|
+
our_first_ts = min(c["created"] for c in our_comments)
|
|
115
|
+
|
|
116
|
+
# Only look at comments after our first comment
|
|
117
|
+
replies_to_us = [c for c in other_comments if c["created"] > our_first_ts]
|
|
118
|
+
|
|
119
|
+
for comment in replies_to_us:
|
|
120
|
+
author = comment.get("user", "")
|
|
121
|
+
body = comment.get("body", "")
|
|
122
|
+
comment_id = str(comment.get("id", ""))
|
|
123
|
+
comment_url = comment.get("url", "")
|
|
124
|
+
|
|
125
|
+
# Determine status + skip_reason up front; the (platform,
|
|
126
|
+
# their_comment_id) UNIQUE index on the API handles "already
|
|
127
|
+
# tracked" (returns 409), so the old COUNT pre-check is gone.
|
|
128
|
+
if author.lower() in excluded_authors:
|
|
129
|
+
status, skip_reason = "skipped", "excluded_author"
|
|
130
|
+
elif word_count(body) < MIN_WORDS:
|
|
131
|
+
status, skip_reason = "skipped", f"too_short ({word_count(body)} words)"
|
|
132
|
+
else:
|
|
133
|
+
status, skip_reason = "pending", None
|
|
134
|
+
|
|
135
|
+
payload = {
|
|
136
|
+
"post_id": post_id,
|
|
137
|
+
"platform": SCAN_PLATFORM,
|
|
138
|
+
"their_comment_id": comment_id,
|
|
139
|
+
"their_author": author,
|
|
140
|
+
"their_content": body,
|
|
141
|
+
"their_comment_url": comment_url,
|
|
142
|
+
"depth": 1,
|
|
143
|
+
"status": status,
|
|
144
|
+
}
|
|
145
|
+
if skip_reason:
|
|
146
|
+
payload["skip_reason"] = skip_reason
|
|
147
|
+
|
|
148
|
+
resp = api_post("/api/v1/replies", payload, ok_on_conflict=True)
|
|
149
|
+
if not (resp or {}).get("ok"):
|
|
150
|
+
# 409 duplicate_reply: already tracked from a prior run. Skip.
|
|
151
|
+
continue
|
|
152
|
+
reply = ((resp or {}).get("data") or {}).get("reply")
|
|
153
|
+
if reply is None:
|
|
154
|
+
# Blocklist / velocity gate dropped this fresh pending row.
|
|
155
|
+
continue
|
|
156
|
+
if status == "skipped":
|
|
157
|
+
skipped += 1
|
|
158
|
+
else:
|
|
159
|
+
discovered += 1
|
|
160
|
+
print(f" NEW: @{author} on {issue_key}: {body[:80]}...")
|
|
161
|
+
|
|
162
|
+
time.sleep(1) # Light rate limiting
|
|
163
|
+
|
|
164
|
+
print(f"\nGitHub scan complete: {discovered} new pending, {skipped} skipped, {errors} errors")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__":
|
|
168
|
+
main()
|
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Scan Instagram Graph API for new comments on our posts.
|
|
3
|
+
|
|
4
|
+
For each enabled Instagram account in config.json (matt_diak, matthewheartful,
|
|
5
|
+
omidotme), this:
|
|
6
|
+
|
|
7
|
+
1. Fetches /api/v1/posts?platform=instagram&our_account=<username> to build
|
|
8
|
+
a {shortcode: post_id} map of our DB-tracked IG posts.
|
|
9
|
+
2. Lists /me/media for the account (reuses the same Graph API call shape
|
|
10
|
+
update_instagram_stats.py uses).
|
|
11
|
+
3. For each media item present in our DB, calls /{media-id}/comments with
|
|
12
|
+
the replies sub-resource expanded.
|
|
13
|
+
4. Inserts each comment (and its nested replies) into the `replies` table
|
|
14
|
+
via reply_insert.insert_reply(). Server-side UNIQUE (platform,
|
|
15
|
+
their_comment_id) handles dedup; this script never SELECTs.
|
|
16
|
+
|
|
17
|
+
Filters (mirrors scan_reddit_replies / scan_github_replies behavior):
|
|
18
|
+
- Skip comments whose author is in config.exclusions.authors
|
|
19
|
+
- Skip our own usernames (matt_diak / matthewheartful / omidotme) so we
|
|
20
|
+
don't try to reply to ourselves
|
|
21
|
+
- Skip backfill-old comments (older than BACKFILL_HOURS) with
|
|
22
|
+
status='skipped' / skip_reason='backfill_old'
|
|
23
|
+
- Skip too-short comments (< MIN_WORDS) with skip_reason='too_short'
|
|
24
|
+
|
|
25
|
+
This is discovery-only. Posting replies back to Instagram lives in a separate
|
|
26
|
+
engage script (Phase 2, not built yet); for now new rows surface in the
|
|
27
|
+
dashboard replies feed as platform='instagram', status='pending'.
|
|
28
|
+
|
|
29
|
+
Usage:
|
|
30
|
+
python3 scripts/scan_instagram_comments.py [--quiet] [--limit N]
|
|
31
|
+
[--account NAME]
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import argparse
|
|
37
|
+
import json
|
|
38
|
+
import os
|
|
39
|
+
import sys
|
|
40
|
+
import time
|
|
41
|
+
import urllib.error
|
|
42
|
+
import urllib.parse
|
|
43
|
+
import urllib.request
|
|
44
|
+
from datetime import datetime, timezone
|
|
45
|
+
from pathlib import Path
|
|
46
|
+
|
|
47
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
48
|
+
from http_api import api_get
|
|
49
|
+
from reply_insert import insert_reply as _insert_reply
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
IG_ENV_PATH = Path.home() / "instagram-graph-api" / ".env"
|
|
53
|
+
GRAPH = "https://graph.instagram.com/v22.0"
|
|
54
|
+
SA_CONFIG = Path(__file__).resolve().parent.parent / "config.json"
|
|
55
|
+
|
|
56
|
+
# Discovery filters
|
|
57
|
+
BACKFILL_HOURS = 48
|
|
58
|
+
MIN_WORDS = 5
|
|
59
|
+
# Per-Graph-API-call sleep so we stay polite under the 60/hr, 4800/day caps.
|
|
60
|
+
# 3 accounts * ~10 media * (1 list + 1 comments call) = ~60 calls/cycle;
|
|
61
|
+
# at 0.2s sleep that's ~12s per cycle, well inside 30-minute scheduling.
|
|
62
|
+
GRAPH_SLEEP_SECS = 0.2
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ── env / config ──────────────────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
def load_ig_env() -> dict:
|
|
68
|
+
if not IG_ENV_PATH.exists():
|
|
69
|
+
return {}
|
|
70
|
+
env = {}
|
|
71
|
+
for line in IG_ENV_PATH.read_text().splitlines():
|
|
72
|
+
line = line.strip()
|
|
73
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
74
|
+
continue
|
|
75
|
+
k, v = line.split("=", 1)
|
|
76
|
+
env[k.strip()] = v.strip()
|
|
77
|
+
return env
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def load_config() -> dict:
|
|
81
|
+
try:
|
|
82
|
+
return json.loads(SA_CONFIG.read_text())
|
|
83
|
+
except FileNotFoundError:
|
|
84
|
+
return {}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def resolve_account_creds(account_name: str, ig_env: dict, accounts_cfg: list):
|
|
88
|
+
"""Return (ig_user_id, long_token) or (None, None). Matches the lookup
|
|
89
|
+
pattern in scripts/update_instagram_stats.py."""
|
|
90
|
+
match = next(
|
|
91
|
+
(a for a in accounts_cfg if a.get("username", "").lower() == account_name.lower()),
|
|
92
|
+
None,
|
|
93
|
+
)
|
|
94
|
+
if match:
|
|
95
|
+
uid = ig_env.get(match.get("ig_user_id_env", "IG_USER_ID"))
|
|
96
|
+
tok = ig_env.get(match.get("ig_long_token_env", "IG_LONG_TOKEN"))
|
|
97
|
+
if uid and tok:
|
|
98
|
+
return uid, tok
|
|
99
|
+
uid = ig_env.get("IG_USER_ID")
|
|
100
|
+
tok = ig_env.get("IG_LONG_TOKEN")
|
|
101
|
+
return uid, tok
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# ── Graph API helpers ─────────────────────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
def graph_get(path: str, token: str, **params):
|
|
107
|
+
params["access_token"] = token
|
|
108
|
+
url = f"{GRAPH}/{path}?{urllib.parse.urlencode(params)}"
|
|
109
|
+
with urllib.request.urlopen(url, timeout=20) as r:
|
|
110
|
+
return json.loads(r.read())
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def shortcode_from_url(url: str | None) -> str | None:
|
|
114
|
+
"""Extract shortcode from an IG permalink.
|
|
115
|
+
|
|
116
|
+
https://www.instagram.com/reel/DYkkj8RDo9P/ -> DYkkj8RDo9P
|
|
117
|
+
"""
|
|
118
|
+
import re
|
|
119
|
+
m = re.search(r"/(?:reel|p|tv)/([A-Za-z0-9_-]+)", url or "")
|
|
120
|
+
return m.group(1) if m else None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def fetch_media_list(ig_user_id: str, token: str, max_pages: int = 5) -> list[dict]:
|
|
124
|
+
"""Page through /me/media. Returns the raw items list with permalink + id."""
|
|
125
|
+
out = []
|
|
126
|
+
fields = "id,media_type,media_product_type,permalink,timestamp"
|
|
127
|
+
url = (
|
|
128
|
+
f"{GRAPH}/{ig_user_id}/media"
|
|
129
|
+
f"?fields={fields}&limit=100&access_token={token}"
|
|
130
|
+
)
|
|
131
|
+
pages = 0
|
|
132
|
+
while url and pages < max_pages:
|
|
133
|
+
with urllib.request.urlopen(url, timeout=20) as r:
|
|
134
|
+
data = json.loads(r.read())
|
|
135
|
+
out.extend(data.get("data", []) or [])
|
|
136
|
+
url = (data.get("paging") or {}).get("next")
|
|
137
|
+
pages += 1
|
|
138
|
+
if url:
|
|
139
|
+
time.sleep(GRAPH_SLEEP_SECS)
|
|
140
|
+
return out
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def fetch_comments(media_id: str, token: str) -> list[dict]:
|
|
144
|
+
"""Return top-level comments for a media item, each with a nested
|
|
145
|
+
`replies.data[]` list (Graph API caps the sub-list at 25 by default; that
|
|
146
|
+
matches typical traffic on our posts)."""
|
|
147
|
+
fields = (
|
|
148
|
+
"id,username,text,timestamp,"
|
|
149
|
+
"replies{id,username,text,timestamp}"
|
|
150
|
+
)
|
|
151
|
+
try:
|
|
152
|
+
data = graph_get(f"{media_id}/comments", token, fields=fields, limit=50)
|
|
153
|
+
except urllib.error.HTTPError as e:
|
|
154
|
+
body = e.read().decode(errors="replace")[:200]
|
|
155
|
+
raise GraphApiError(f"HTTP {e.code} on /{media_id}/comments: {body}")
|
|
156
|
+
return data.get("data", []) or []
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class GraphApiError(Exception):
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# ── posts lookup ──────────────────────────────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
def fetch_posts_map(account_username: str) -> dict[str, int]:
|
|
166
|
+
"""Build {shortcode: post_id} for posts.platform='instagram' AND
|
|
167
|
+
posts.our_account=account_username. Uses the same /api/v1/posts endpoint
|
|
168
|
+
scan_reddit_replies.py uses for its post-id lookup."""
|
|
169
|
+
out: dict[str, int] = {}
|
|
170
|
+
resp = api_get(
|
|
171
|
+
"/api/v1/posts",
|
|
172
|
+
query={"platform": "instagram", "limit": 500},
|
|
173
|
+
)
|
|
174
|
+
posts = ((resp or {}).get("data") or {}).get("posts") or []
|
|
175
|
+
for p in posts:
|
|
176
|
+
if (p.get("our_account") or "").lower() != account_username.lower():
|
|
177
|
+
continue
|
|
178
|
+
code = shortcode_from_url(p.get("our_url"))
|
|
179
|
+
if code:
|
|
180
|
+
out[code] = int(p.get("id"))
|
|
181
|
+
return out
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ── parse / classify ──────────────────────────────────────────────────────────
|
|
185
|
+
|
|
186
|
+
def parse_ts(ts: str | None) -> float:
|
|
187
|
+
"""Parse an IG ISO-8601 timestamp to a unix timestamp. Returns 0 on
|
|
188
|
+
failure (which counts as "old" for backfill purposes)."""
|
|
189
|
+
if not ts:
|
|
190
|
+
return 0.0
|
|
191
|
+
try:
|
|
192
|
+
# Instagram returns +0000 (no colon), strip and parse as UTC.
|
|
193
|
+
s = ts.replace("+0000", "+00:00")
|
|
194
|
+
return datetime.fromisoformat(s).timestamp()
|
|
195
|
+
except Exception:
|
|
196
|
+
return 0.0
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def word_count(text: str | None) -> int:
|
|
200
|
+
return len((text or "").split())
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def build_comment_url(shortcode: str, comment_id: str) -> str:
|
|
204
|
+
return f"https://www.instagram.com/p/{shortcode}/c/{comment_id}/"
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# ── main scan loop ────────────────────────────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
class IgCommentScanner:
|
|
210
|
+
def __init__(
|
|
211
|
+
self,
|
|
212
|
+
account_username: str,
|
|
213
|
+
ig_user_id: str,
|
|
214
|
+
token: str,
|
|
215
|
+
posts_map: dict[str, int],
|
|
216
|
+
excluded_authors: set[str],
|
|
217
|
+
quiet: bool = False,
|
|
218
|
+
media_limit: int | None = None,
|
|
219
|
+
):
|
|
220
|
+
self.account = account_username
|
|
221
|
+
self.ig_user_id = ig_user_id
|
|
222
|
+
self.token = token
|
|
223
|
+
self.posts_map = posts_map
|
|
224
|
+
self.excluded = excluded_authors
|
|
225
|
+
self.quiet = quiet
|
|
226
|
+
self.media_limit = media_limit
|
|
227
|
+
|
|
228
|
+
self.discovered = 0
|
|
229
|
+
self.backfill_skipped = 0
|
|
230
|
+
self.too_short_skipped = 0
|
|
231
|
+
self.excluded_skipped = 0
|
|
232
|
+
self.already_tracked = 0
|
|
233
|
+
self.media_checked = 0
|
|
234
|
+
self.media_no_post = 0
|
|
235
|
+
self.comments_seen = 0
|
|
236
|
+
|
|
237
|
+
def log(self, msg: str):
|
|
238
|
+
if not self.quiet:
|
|
239
|
+
print(msg)
|
|
240
|
+
|
|
241
|
+
def _insert(
|
|
242
|
+
self,
|
|
243
|
+
post_id: int,
|
|
244
|
+
comment_id: str,
|
|
245
|
+
author: str,
|
|
246
|
+
content: str,
|
|
247
|
+
comment_url: str,
|
|
248
|
+
depth: int,
|
|
249
|
+
status: str,
|
|
250
|
+
skip_reason: str | None = None,
|
|
251
|
+
):
|
|
252
|
+
result = _insert_reply(
|
|
253
|
+
None, post_id, "instagram", comment_id, author, content, comment_url,
|
|
254
|
+
parent_reply_id=None, depth=depth, status=status, skip_reason=skip_reason,
|
|
255
|
+
)
|
|
256
|
+
if result is None:
|
|
257
|
+
self.already_tracked += 1
|
|
258
|
+
return
|
|
259
|
+
if result == "pending":
|
|
260
|
+
self.discovered += 1
|
|
261
|
+
elif result == "skipped":
|
|
262
|
+
if skip_reason == "backfill_old":
|
|
263
|
+
self.backfill_skipped += 1
|
|
264
|
+
elif skip_reason and skip_reason.startswith("too_short"):
|
|
265
|
+
self.too_short_skipped += 1
|
|
266
|
+
elif skip_reason == "excluded_author":
|
|
267
|
+
self.excluded_skipped += 1
|
|
268
|
+
|
|
269
|
+
def _classify_and_insert(
|
|
270
|
+
self,
|
|
271
|
+
post_id: int,
|
|
272
|
+
shortcode: str,
|
|
273
|
+
comment: dict,
|
|
274
|
+
backfill_cutoff: float,
|
|
275
|
+
depth: int,
|
|
276
|
+
):
|
|
277
|
+
comment_id = str(comment.get("id") or "")
|
|
278
|
+
if not comment_id:
|
|
279
|
+
return
|
|
280
|
+
self.comments_seen += 1
|
|
281
|
+
author = comment.get("username") or ""
|
|
282
|
+
content = comment.get("text") or ""
|
|
283
|
+
comment_url = build_comment_url(shortcode, comment_id)
|
|
284
|
+
created = parse_ts(comment.get("timestamp"))
|
|
285
|
+
|
|
286
|
+
if author.lower() in self.excluded:
|
|
287
|
+
self._insert(
|
|
288
|
+
post_id, comment_id, author, content, comment_url, depth,
|
|
289
|
+
status="skipped", skip_reason="excluded_author",
|
|
290
|
+
)
|
|
291
|
+
return
|
|
292
|
+
|
|
293
|
+
if created and created < backfill_cutoff:
|
|
294
|
+
self._insert(
|
|
295
|
+
post_id, comment_id, author, content, comment_url, depth,
|
|
296
|
+
status="skipped", skip_reason="backfill_old",
|
|
297
|
+
)
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
wc = word_count(content)
|
|
301
|
+
if wc < MIN_WORDS:
|
|
302
|
+
self._insert(
|
|
303
|
+
post_id, comment_id, author, content, comment_url, depth,
|
|
304
|
+
status="skipped", skip_reason=f"too_short ({wc} words)",
|
|
305
|
+
)
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
self._insert(
|
|
309
|
+
post_id, comment_id, author, content, comment_url, depth,
|
|
310
|
+
status="pending", skip_reason=None,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
def scan(self):
|
|
314
|
+
self.log(f"[scan-ig-comments] account={self.account} posts_in_db={len(self.posts_map)}")
|
|
315
|
+
if not self.posts_map:
|
|
316
|
+
self.log(f"[scan-ig-comments] no instagram posts in DB for account={self.account}; nothing to scan")
|
|
317
|
+
return
|
|
318
|
+
|
|
319
|
+
try:
|
|
320
|
+
media_items = fetch_media_list(self.ig_user_id, self.token)
|
|
321
|
+
except urllib.error.HTTPError as e:
|
|
322
|
+
body = e.read().decode(errors="replace")[:200]
|
|
323
|
+
self.log(f"[scan-ig-comments] /me/media failed for {self.account}: HTTP {e.code} {body}")
|
|
324
|
+
return
|
|
325
|
+
except Exception as e:
|
|
326
|
+
self.log(f"[scan-ig-comments] /me/media failed for {self.account}: {e}")
|
|
327
|
+
return
|
|
328
|
+
|
|
329
|
+
self.log(f"[scan-ig-comments] /me/media returned {len(media_items)} items")
|
|
330
|
+
backfill_cutoff = time.time() - BACKFILL_HOURS * 3600
|
|
331
|
+
|
|
332
|
+
checked = 0
|
|
333
|
+
for item in media_items:
|
|
334
|
+
if self.media_limit and checked >= self.media_limit:
|
|
335
|
+
break
|
|
336
|
+
permalink = item.get("permalink")
|
|
337
|
+
shortcode = shortcode_from_url(permalink)
|
|
338
|
+
if not shortcode:
|
|
339
|
+
continue
|
|
340
|
+
post_id = self.posts_map.get(shortcode)
|
|
341
|
+
if not post_id:
|
|
342
|
+
self.media_no_post += 1
|
|
343
|
+
continue
|
|
344
|
+
|
|
345
|
+
media_id = item.get("id")
|
|
346
|
+
try:
|
|
347
|
+
comments = fetch_comments(media_id, self.token)
|
|
348
|
+
except GraphApiError as e:
|
|
349
|
+
self.log(f"[scan-ig-comments] media={media_id} shortcode={shortcode} comments fetch failed: {e}")
|
|
350
|
+
continue
|
|
351
|
+
|
|
352
|
+
self.media_checked += 1
|
|
353
|
+
checked += 1
|
|
354
|
+
self.log(
|
|
355
|
+
f"[scan-ig-comments] media={media_id} shortcode={shortcode} "
|
|
356
|
+
f"top_level_comments={len(comments)}"
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
for c in comments:
|
|
360
|
+
self._classify_and_insert(post_id, shortcode, c, backfill_cutoff, depth=1)
|
|
361
|
+
# Nested replies (replies to top-level comments). Author may
|
|
362
|
+
# be us (we already replied) or someone else (we got a reply
|
|
363
|
+
# to OUR reply). The excluded-author filter inside
|
|
364
|
+
# _classify_and_insert handles the first case.
|
|
365
|
+
replies = ((c.get("replies") or {}).get("data") or [])
|
|
366
|
+
for r in replies:
|
|
367
|
+
self._classify_and_insert(post_id, shortcode, r, backfill_cutoff, depth=2)
|
|
368
|
+
|
|
369
|
+
time.sleep(GRAPH_SLEEP_SECS)
|
|
370
|
+
|
|
371
|
+
def summary(self) -> dict:
|
|
372
|
+
return {
|
|
373
|
+
"account": self.account,
|
|
374
|
+
"media_checked": self.media_checked,
|
|
375
|
+
"media_no_post_in_db": self.media_no_post,
|
|
376
|
+
"comments_seen": self.comments_seen,
|
|
377
|
+
"discovered": self.discovered,
|
|
378
|
+
"backfill_skipped": self.backfill_skipped,
|
|
379
|
+
"too_short_skipped": self.too_short_skipped,
|
|
380
|
+
"excluded_skipped": self.excluded_skipped,
|
|
381
|
+
"already_tracked": self.already_tracked,
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def main():
|
|
386
|
+
parser = argparse.ArgumentParser()
|
|
387
|
+
parser.add_argument("--quiet", action="store_true")
|
|
388
|
+
parser.add_argument("--limit", type=int, default=None,
|
|
389
|
+
help="Cap media items inspected per account (debug)")
|
|
390
|
+
parser.add_argument("--account", default=None,
|
|
391
|
+
help="Scan only this account (default: all enabled)")
|
|
392
|
+
args = parser.parse_args()
|
|
393
|
+
|
|
394
|
+
ig_env = load_ig_env()
|
|
395
|
+
cfg = load_config()
|
|
396
|
+
accounts_cfg = ((cfg.get("instagram") or {}).get("accounts") or [])
|
|
397
|
+
exclusions = cfg.get("exclusions") or {}
|
|
398
|
+
base_excluded = {a.lower() for a in (exclusions.get("authors") or [])}
|
|
399
|
+
# Always exclude our own usernames so we don't reply to ourselves.
|
|
400
|
+
own_usernames = {a.get("username", "").lower() for a in accounts_cfg if a.get("username")}
|
|
401
|
+
|
|
402
|
+
if args.account:
|
|
403
|
+
accounts_to_scan = [a for a in accounts_cfg
|
|
404
|
+
if a.get("username", "").lower() == args.account.lower()]
|
|
405
|
+
else:
|
|
406
|
+
accounts_to_scan = [a for a in accounts_cfg if a.get("enabled", True)]
|
|
407
|
+
|
|
408
|
+
if not accounts_to_scan:
|
|
409
|
+
print("[scan-ig-comments] no instagram accounts to scan; exiting")
|
|
410
|
+
print("SUMMARY:DISCOVERED=0 SKIPPED=0 CHECKED=0 ALREADY=0 ACCOUNTS=0")
|
|
411
|
+
return
|
|
412
|
+
|
|
413
|
+
totals = {
|
|
414
|
+
"discovered": 0,
|
|
415
|
+
"backfill_skipped": 0,
|
|
416
|
+
"too_short_skipped": 0,
|
|
417
|
+
"excluded_skipped": 0,
|
|
418
|
+
"already_tracked": 0,
|
|
419
|
+
"media_checked": 0,
|
|
420
|
+
"comments_seen": 0,
|
|
421
|
+
"accounts": 0,
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
for account_cfg in accounts_to_scan:
|
|
425
|
+
username = account_cfg.get("username", "")
|
|
426
|
+
if not username:
|
|
427
|
+
continue
|
|
428
|
+
uid, tok = resolve_account_creds(username, ig_env, accounts_cfg)
|
|
429
|
+
if not uid or not tok:
|
|
430
|
+
print(f"[scan-ig-comments] missing creds for account={username}; skipping")
|
|
431
|
+
continue
|
|
432
|
+
|
|
433
|
+
excluded_for_account = set(base_excluded) | set(own_usernames)
|
|
434
|
+
|
|
435
|
+
try:
|
|
436
|
+
posts_map = fetch_posts_map(username)
|
|
437
|
+
except Exception as e:
|
|
438
|
+
print(f"[scan-ig-comments] posts lookup failed for {username}: {e}")
|
|
439
|
+
continue
|
|
440
|
+
|
|
441
|
+
scanner = IgCommentScanner(
|
|
442
|
+
username, uid, tok, posts_map, excluded_for_account,
|
|
443
|
+
quiet=args.quiet, media_limit=args.limit,
|
|
444
|
+
)
|
|
445
|
+
scanner.scan()
|
|
446
|
+
s = scanner.summary()
|
|
447
|
+
if not args.quiet:
|
|
448
|
+
print(
|
|
449
|
+
f"[scan-ig-comments] account={username} done: "
|
|
450
|
+
f"media_checked={s['media_checked']} comments_seen={s['comments_seen']} "
|
|
451
|
+
f"discovered={s['discovered']} "
|
|
452
|
+
f"backfill_skipped={s['backfill_skipped']} "
|
|
453
|
+
f"too_short_skipped={s['too_short_skipped']} "
|
|
454
|
+
f"excluded_skipped={s['excluded_skipped']} "
|
|
455
|
+
f"already_tracked={s['already_tracked']}"
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
totals["discovered"] += s["discovered"]
|
|
459
|
+
totals["backfill_skipped"] += s["backfill_skipped"]
|
|
460
|
+
totals["too_short_skipped"] += s["too_short_skipped"]
|
|
461
|
+
totals["excluded_skipped"] += s["excluded_skipped"]
|
|
462
|
+
totals["already_tracked"] += s["already_tracked"]
|
|
463
|
+
totals["media_checked"] += s["media_checked"]
|
|
464
|
+
totals["comments_seen"] += s["comments_seen"]
|
|
465
|
+
totals["accounts"] += 1
|
|
466
|
+
|
|
467
|
+
skipped_total = (
|
|
468
|
+
totals["backfill_skipped"]
|
|
469
|
+
+ totals["too_short_skipped"]
|
|
470
|
+
+ totals["excluded_skipped"]
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
print(
|
|
474
|
+
f"SUMMARY:DISCOVERED={totals['discovered']} SKIPPED={skipped_total} "
|
|
475
|
+
f"CHECKED={totals['media_checked']} ALREADY={totals['already_tracked']} "
|
|
476
|
+
f"ACCOUNTS={totals['accounts']}"
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
if __name__ == "__main__":
|
|
481
|
+
main()
|