@m13v/s4l 1.6.197-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1336 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +513 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Social Autoposter - X/Twitter thread follow-up scanner
|
|
3
|
+
# Revisits our recent X replies and captures depth-2+ public follow-ups
|
|
4
|
+
# that the /notifications scraper misses (when @-tag is dropped in nested replies).
|
|
5
|
+
# Companion to scan_twitter_mentions_browser.py (run via engage-twitter.sh).
|
|
6
|
+
# Scheduled overnight by launchd (1:14 AM only). Waits up to 30min for the
|
|
7
|
+
# twitter-browser lock to free, then yields. Single overnight firing chosen
|
|
8
|
+
# because twitter-cycle parallel firings keep the lock busy during waking
|
|
9
|
+
# hours; 13:14 PM firing was dropped 2026-05-19 after weeks of "skipping"
|
|
10
|
+
# bails (acquire_lock timeout=0 was the original yield strategy, replaced
|
|
11
|
+
# with a 1800s wait that lets the scan run when twitter-cycle is quieter).
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
set -euo pipefail
|
|
15
|
+
|
|
16
|
+
# Bootstrap log paths early so the singleton-cleanup output below gets captured
|
|
17
|
+
# in the same log file the rest of the run uses.
|
|
18
|
+
LOG_DIR="$HOME/social-autoposter/skill/logs"
|
|
19
|
+
mkdir -p "$LOG_DIR"
|
|
20
|
+
LOG_FILE="$LOG_DIR/scan-twitter-followups-$(date +%Y-%m-%d_%H%M%S).log"
|
|
21
|
+
|
|
22
|
+
# Browser-profile lock shared with all twitter pipelines.
|
|
23
|
+
source "$(dirname "$0")/lock.sh"
|
|
24
|
+
# Harness-only browser bootstrap (twitter-agent path fully removed 2026-05-19).
|
|
25
|
+
# scan_twitter_thread_followups.py uses twitter_browser.py functions, which
|
|
26
|
+
# honor TWITTER_CDP_URL exported by the lib.
|
|
27
|
+
source "$(dirname "$0")/lib/twitter-backend.sh"
|
|
28
|
+
acquire_lock "twitter-browser" 1800
|
|
29
|
+
ensure_twitter_browser_for_backend 2>&1 | tee -a "$LOG_FILE"
|
|
30
|
+
acquire_lock "scan-twitter-followups" 0
|
|
31
|
+
|
|
32
|
+
[ -f "$HOME/social-autoposter/.env" ] && source "$HOME/social-autoposter/.env"
|
|
33
|
+
|
|
34
|
+
REPO_DIR="$HOME/social-autoposter"
|
|
35
|
+
# (LOG_DIR/LOG_FILE bootstrapped at top of script.)
|
|
36
|
+
|
|
37
|
+
echo "=== Scan Twitter Follow-ups Run: $(date) ===" | tee -a "$LOG_FILE"
|
|
38
|
+
START_TS=$(date +%s)
|
|
39
|
+
|
|
40
|
+
DAYS="${FOLLOWUP_DAYS:-14}"
|
|
41
|
+
MAX_URLS="${FOLLOWUP_MAX_URLS:-40}"
|
|
42
|
+
SCROLL_COUNT="${FOLLOWUP_SCROLLS:-3}"
|
|
43
|
+
|
|
44
|
+
PYTHONUNBUFFERED=1 python3 "$REPO_DIR/scripts/scan_twitter_thread_followups.py" \
|
|
45
|
+
--days "$DAYS" --max-urls "$MAX_URLS" --scroll-count "$SCROLL_COUNT" \
|
|
46
|
+
2>&1 | tee -a "$LOG_FILE" || true
|
|
47
|
+
|
|
48
|
+
ELAPSED=$(( $(date +%s) - START_TS ))
|
|
49
|
+
# grep -c prints "0" AND exits 1 on zero matches, so `|| echo 0` was
|
|
50
|
+
# appending a second "0" and making FOUND multiline, which silently broke
|
|
51
|
+
# log_run.py. Use `|| FOUND=0` so the fallback only fires when the file is
|
|
52
|
+
# unreadable.
|
|
53
|
+
FOUND=$(grep -c "NEW follow-up:" "$LOG_FILE" 2>/dev/null) || FOUND=0
|
|
54
|
+
python3 "$REPO_DIR/scripts/log_run.py" --script "scan_twitter_followups" --posted "$FOUND" --skipped 0 --failed 0 --cost 0 --elapsed "$ELAPSED" || true
|
|
55
|
+
|
|
56
|
+
echo "=== Scan Twitter Follow-ups complete: $(date) (elapsed ${ELAPSED}s, found ${FOUND}) ===" | tee -a "$LOG_FILE"
|
|
57
|
+
find "$LOG_DIR" -name "scan-twitter-followups-*.log" -mtime +7 -delete 2>/dev/null || true
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# social-autoposter-update.sh — standalone self-updater for SHIPPED client
|
|
3
|
+
# installs. Driven by the launchd/systemd job com.m13v.social-autoposter-update
|
|
4
|
+
# (daily) and reused by the per-cycle guard (run-cycle-update-guard.sh).
|
|
5
|
+
#
|
|
6
|
+
# WHAT IT DOES
|
|
7
|
+
# 1. Refuses to run on a dev/source checkout (presence of .git). A dev box
|
|
8
|
+
# edits code in place; `npx social-autoposter@latest update` would clobber
|
|
9
|
+
# the working tree. This is the single most important guard here.
|
|
10
|
+
# 2. Compares the installed package version to the latest published on npm.
|
|
11
|
+
# 3. If behind, runs `npx -y social-autoposter@latest update`, which pulls the
|
|
12
|
+
# latest tarball, copies it over the install, re-runs install.mjs (re-stamps
|
|
13
|
+
# dist/version.json + re-registers the MCP). The running MCP keeps the old
|
|
14
|
+
# code until the client reconnects; the next headless cycle picks it up.
|
|
15
|
+
#
|
|
16
|
+
# Safe to call frequently: the version check is one `npm view` call; the heavy
|
|
17
|
+
# `npx update` only fires when actually behind.
|
|
18
|
+
#
|
|
19
|
+
# Exit codes: 0 = up to date OR updated OK OR skipped (dev box / offline);
|
|
20
|
+
# non-zero only when the update command itself failed.
|
|
21
|
+
|
|
22
|
+
set -u
|
|
23
|
+
|
|
24
|
+
REPO_DIR="${S4L_REPO_DIR:-$HOME/social-autoposter}"
|
|
25
|
+
LOG_DIR="$REPO_DIR/skill/logs"
|
|
26
|
+
LOG="$LOG_DIR/self-update.log"
|
|
27
|
+
mkdir -p "$LOG_DIR" 2>/dev/null || true
|
|
28
|
+
|
|
29
|
+
log() { echo "[$(date '+%Y-%m-%dT%H:%M:%S%z')] $*" | tee -a "$LOG" >&2; }
|
|
30
|
+
|
|
31
|
+
# --- guard 1: never self-update a dev/source checkout -----------------------
|
|
32
|
+
if [ -d "$REPO_DIR/.git" ]; then
|
|
33
|
+
log "skip: $REPO_DIR is a git checkout (dev mode); self-update disabled to avoid clobbering the working tree."
|
|
34
|
+
exit 0
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
# --- resolve installed version ----------------------------------------------
|
|
38
|
+
installed=""
|
|
39
|
+
if [ -f "$REPO_DIR/mcp/dist/version.json" ]; then
|
|
40
|
+
installed="$(node -e 'try{process.stdout.write(require(process.argv[1]).version||"")}catch(e){}' "$REPO_DIR/mcp/dist/version.json" 2>/dev/null)"
|
|
41
|
+
fi
|
|
42
|
+
if [ -z "$installed" ] && [ -f "$REPO_DIR/package.json" ]; then
|
|
43
|
+
installed="$(node -e 'try{process.stdout.write(require(process.argv[1]).version||"")}catch(e){}' "$REPO_DIR/package.json" 2>/dev/null)"
|
|
44
|
+
fi
|
|
45
|
+
|
|
46
|
+
# --- resolve latest published version ---------------------------------------
|
|
47
|
+
latest="$(npm view social-autoposter version 2>/dev/null | tr -d '[:space:]')"
|
|
48
|
+
if [ -z "$latest" ]; then
|
|
49
|
+
log "skip: could not reach npm to check latest version (offline or registry error). installed=${installed:-unknown}"
|
|
50
|
+
exit 0
|
|
51
|
+
fi
|
|
52
|
+
|
|
53
|
+
if [ -n "$installed" ] && [ "$installed" = "$latest" ]; then
|
|
54
|
+
log "up to date: installed=$installed latest=$latest"
|
|
55
|
+
exit 0
|
|
56
|
+
fi
|
|
57
|
+
|
|
58
|
+
log "update available: installed=${installed:-unknown} latest=$latest — running npx social-autoposter@latest update"
|
|
59
|
+
if npx -y social-autoposter@latest update >>"$LOG" 2>&1; then
|
|
60
|
+
log "update OK -> $latest (takes effect on next MCP reconnect / next headless cycle)"
|
|
61
|
+
exit 0
|
|
62
|
+
else
|
|
63
|
+
rc=$?
|
|
64
|
+
log "update FAILED (exit $rc); staying on installed=${installed:-unknown}"
|
|
65
|
+
exit "$rc"
|
|
66
|
+
fi
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# stats-instagram.sh — Refresh engagement stats for Instagram posts.
|
|
3
|
+
#
|
|
4
|
+
# Mirrors the per-platform stats pattern used by stats-reddit.sh / stats-twitter.sh
|
|
5
|
+
# but is API-only (no browser): calls IG Graph API insights for each posts row
|
|
6
|
+
# with platform='instagram', updates upvotes/comments_count/views, and logs the
|
|
7
|
+
# run so it surfaces in the dashboard Jobs panel.
|
|
8
|
+
#
|
|
9
|
+
# Logs: skill/logs/stats-instagram-YYYY-MM-DD_HHMMSS.log
|
|
10
|
+
|
|
11
|
+
set -uo pipefail
|
|
12
|
+
|
|
13
|
+
REPO_DIR="$HOME/social-autoposter"
|
|
14
|
+
LOG_DIR="$REPO_DIR/skill/logs"
|
|
15
|
+
mkdir -p "$LOG_DIR"
|
|
16
|
+
LOG_FILE="$LOG_DIR/stats-instagram-$(date +%Y-%m-%d_%H%M%S).log"
|
|
17
|
+
|
|
18
|
+
log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG_FILE"; }
|
|
19
|
+
log "=== stats-instagram fire: $(date) ==="
|
|
20
|
+
|
|
21
|
+
RUN_START=$(date +%s)
|
|
22
|
+
|
|
23
|
+
# Lock — instagram-poster reuses this lane; stats and post must not race.
|
|
24
|
+
# shellcheck source=lock.sh
|
|
25
|
+
source "$REPO_DIR/skill/lock.sh"
|
|
26
|
+
acquire_lock instagram-poster 30
|
|
27
|
+
|
|
28
|
+
# Step 1: sync any newly-posted media_posts -> posts (idempotent).
|
|
29
|
+
log "step 1: sync_ig_to_posts"
|
|
30
|
+
if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/sync_ig_to_posts.py" --quiet >>"$LOG_FILE" 2>&1; then
|
|
31
|
+
log "sync_ig_to_posts failed (continuing to refresh existing rows)"
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
# Step 2: refresh stats for all platform='instagram' rows.
|
|
35
|
+
log "step 2: update_instagram_stats"
|
|
36
|
+
OUTPUT_FILE="/tmp/stats-instagram-$$.out"
|
|
37
|
+
if ! /opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/update_instagram_stats.py" 2>>"$LOG_FILE" | tee -a "$LOG_FILE" >"$OUTPUT_FILE"; then
|
|
38
|
+
log "update_instagram_stats.py exited non-zero — logging run as failed"
|
|
39
|
+
CHECKED=0; UPDATED=0; NOT_FOUND=0; FAILED=0; VIEWS_REFRESHED=0
|
|
40
|
+
else
|
|
41
|
+
SUMMARY=$(grep '^SUMMARY:' "$OUTPUT_FILE" | tail -1)
|
|
42
|
+
CHECKED=$(echo "$SUMMARY" | sed -n 's/.*CHECKED=\([0-9]*\).*/\1/p'); CHECKED=${CHECKED:-0}
|
|
43
|
+
UPDATED=$(echo "$SUMMARY" | sed -n 's/.*UPDATED=\([0-9]*\).*/\1/p'); UPDATED=${UPDATED:-0}
|
|
44
|
+
NOT_FOUND=$(echo "$SUMMARY" | sed -n 's/.*NOT_FOUND=\([0-9]*\).*/\1/p'); NOT_FOUND=${NOT_FOUND:-0}
|
|
45
|
+
FAILED=$(echo "$SUMMARY" | sed -n 's/.*FAILED=\([0-9]*\).*/\1/p'); FAILED=${FAILED:-0}
|
|
46
|
+
VIEWS_REFRESHED=$(echo "$SUMMARY" | sed -n 's/.*VIEWS_REFRESHED=\([0-9]*\).*/\1/p'); VIEWS_REFRESHED=${VIEWS_REFRESHED:-0}
|
|
47
|
+
fi
|
|
48
|
+
rm -f "$OUTPUT_FILE"
|
|
49
|
+
|
|
50
|
+
RUN_ELAPSED=$(( $(date +%s) - RUN_START ))
|
|
51
|
+
|
|
52
|
+
log "logging run: checked=$CHECKED updated=$UPDATED not_found=$NOT_FOUND failed=$FAILED views_refreshed=$VIEWS_REFRESHED elapsed=${RUN_ELAPSED}s"
|
|
53
|
+
|
|
54
|
+
/opt/homebrew/bin/python3.11 "$REPO_DIR/scripts/log_run.py" \
|
|
55
|
+
--script "stats_instagram" \
|
|
56
|
+
--posted 0 \
|
|
57
|
+
--skipped 0 \
|
|
58
|
+
--failed "$FAILED" \
|
|
59
|
+
--replies-refreshed 0 \
|
|
60
|
+
--checked "$CHECKED" \
|
|
61
|
+
--updated "$UPDATED" \
|
|
62
|
+
--removed 0 \
|
|
63
|
+
--unavailable 0 \
|
|
64
|
+
--not-found "$NOT_FOUND" \
|
|
65
|
+
--scanned "$CHECKED" \
|
|
66
|
+
--changed "$UPDATED" \
|
|
67
|
+
--views-refreshed "$VIEWS_REFRESHED" \
|
|
68
|
+
--cost 0 \
|
|
69
|
+
--elapsed "$RUN_ELAPSED" >>"$LOG_FILE" 2>&1 || log "log_run.py failed"
|
|
70
|
+
|
|
71
|
+
log "=== stats-instagram done ==="
|
|
72
|
+
exit 0
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# stats-linkedin.sh — Unified LinkedIn stats refresh.
|
|
3
|
+
#
|
|
4
|
+
# Single pipeline that mirrors the Twitter logic shape: one source of truth
|
|
5
|
+
# (LinkedIn's /in/me/recent-activity/comments/ activity tab), one DB write
|
|
6
|
+
# path across all LinkedIn engagement rows. Replaces:
|
|
7
|
+
# - The deprecated skill/stats.sh Step 4 (which called the now-stubbed
|
|
8
|
+
# scripts/scrape_linkedin_stats_browser.py and silently no-op'd).
|
|
9
|
+
# - The standalone skill/stats-linkedin-comments.sh, which only updated
|
|
10
|
+
# the legacy `replies` table. This script kept calling that updater
|
|
11
|
+
# too so we don't lose the ~173 replies-table rows.
|
|
12
|
+
#
|
|
13
|
+
# What this does, in order:
|
|
14
|
+
# 1. Acquire the linkedin-browser lock (serializes against run-linkedin.sh
|
|
15
|
+
# / engage-linkedin.sh / dm-outreach-linkedin.sh / engage-dm-replies.sh).
|
|
16
|
+
# 2. Run scripts/scrape_linkedin_comment_stats.py ONCE. It CDP-attaches
|
|
17
|
+
# to the linkedin-harness Chrome on port 9556 (2026-05-26 migration:
|
|
18
|
+
# replaced the legacy ps-discovery of linkedin-agent MCP. The harness
|
|
19
|
+
# is multi-client safe so no kill+reopen, no Singleton fight. The
|
|
20
|
+
# LINKEDIN_CDP_URL env var exported by skill/lib/linkedin-backend.sh
|
|
21
|
+
# tells linkedin_browser.py to attach via CDP directly), opens a
|
|
22
|
+
# tab to /in/me/recent-activity/comments/, harvests per-comment
|
|
23
|
+
# impressions / reactions / replies into a single JSON feed.
|
|
24
|
+
# 3. Run scripts/update_linkedin_stats_from_feed.py — writes the feed
|
|
25
|
+
# into the `posts` table for rows whose `our_url` carries a
|
|
26
|
+
# `?commentUrn=` (the 97 pre-existing rows from reply_to_comment +
|
|
27
|
+
# the 225 rows migrated from the legacy `replies` table on
|
|
28
|
+
# 2026-05-11 + every new row posted 2026-05-11 onward after
|
|
29
|
+
# linkedin_api.py:comment_on_post was patched to embed it).
|
|
30
|
+
# 4. Release the browser lock; the updater is DB-only.
|
|
31
|
+
#
|
|
32
|
+
# History note (2026-05-11): there used to be a second writer that wrote
|
|
33
|
+
# the same feed into the legacy `replies` table (~257 LinkedIn rows). On
|
|
34
|
+
# 2026-05-11 those rows were migrated into `posts` (Twitter-parity) and
|
|
35
|
+
# the source rows marked status='migrated'. The replies-table writer
|
|
36
|
+
# (scripts/update_linkedin_comment_stats_from_feed.py) and its standalone
|
|
37
|
+
# entrypoint (skill/stats-linkedin-comments.sh) were retired in the same
|
|
38
|
+
# pass. If you see references to them anywhere, they are stale and
|
|
39
|
+
# should be removed.
|
|
40
|
+
#
|
|
41
|
+
# Bot-detection prevention (carries over the carve-out from
|
|
42
|
+
# stats-linkedin-comments.sh; do NOT loosen):
|
|
43
|
+
# - ONE page.goto per fire to /in/me/recent-activity/comments/.
|
|
44
|
+
# - ONE page.evaluate; scroll + harvest happen inside the same JS run.
|
|
45
|
+
# - No clicks, no permalink hops, no "Show more", no Voyager API.
|
|
46
|
+
# - SESSION_INVALID detection: redirect to /login or /checkpoint -> stop.
|
|
47
|
+
#
|
|
48
|
+
# Cadence: every 4-6h. LinkedIn updates impressions in near real time but
|
|
49
|
+
# per-fire fingerprint risk is non-zero; do not run hotter.
|
|
50
|
+
|
|
51
|
+
set -euo pipefail
|
|
52
|
+
|
|
53
|
+
# LinkedIn killswitch (2026-05-27): refuse to run if a prior fire detected
|
|
54
|
+
# session compromise (http_999, authwall, throttle, li_at cleared).
|
|
55
|
+
# State: ~/.claude/social-autoposter/linkedin.killswitch
|
|
56
|
+
# Clear: python3 ~/social-autoposter/scripts/linkedin_killswitch.py clear
|
|
57
|
+
if [ -f "$HOME/.claude/social-autoposter/linkedin.killswitch" ]; then
|
|
58
|
+
echo "[$(date +%H:%M:%S)] LINKEDIN_KILLSWITCH active. Aborting LinkedIn pipeline."
|
|
59
|
+
echo " Re-auth LinkedIn in harness Chrome, then: python3 ~/social-autoposter/scripts/linkedin_killswitch.py clear"
|
|
60
|
+
exit 0
|
|
61
|
+
fi
|
|
62
|
+
|
|
63
|
+
source "$(dirname "$0")/lock.sh"
|
|
64
|
+
# 2026-05-26 harness migration: linkedin-backend.sh exports LINKEDIN_CDP_URL
|
|
65
|
+
# (http://127.0.0.1:9556) and exposes ensure_linkedin_browser_for_backend
|
|
66
|
+
# which probes + launches the linkedin-harness Chrome idempotently. The
|
|
67
|
+
# scraper picks up LINKEDIN_CDP_URL automatically via linkedin_browser.py's
|
|
68
|
+
# harness-cdp fast-path.
|
|
69
|
+
source "$(dirname "$0")/lib/linkedin-backend.sh"
|
|
70
|
+
|
|
71
|
+
# shellcheck source=/dev/null
|
|
72
|
+
[ -f "$HOME/social-autoposter/.env" ] && source "$HOME/social-autoposter/.env"
|
|
73
|
+
|
|
74
|
+
REPO_DIR="$HOME/social-autoposter"
|
|
75
|
+
LOG_DIR="$REPO_DIR/skill/logs"
|
|
76
|
+
PYTHON_BIN="/opt/homebrew/bin/python3"
|
|
77
|
+
# /usr/bin/python3 is the only interpreter with playwright installed; this
|
|
78
|
+
# matches engage-dm-replies.sh's call to linkedin_browser.py. DB scripts
|
|
79
|
+
# stay on homebrew python where psycopg2 is installed.
|
|
80
|
+
SCRAPER_PYTHON_BIN="/usr/bin/python3"
|
|
81
|
+
|
|
82
|
+
# Tunables.
|
|
83
|
+
MAX_SCROLLS=400 # 2026-05-28 set to 400 per user direction; natural stagnant>=8 bail should fire well before this (~tick 150). Safety ceiling, not target. Previous: 300 (auto-commit) <- 1000 (runaway 2026-05-27).
|
|
84
|
+
SCRAPER_TIMEOUT_SEC=900 # 15min outer gtimeout. Inner JS deadline now defaults to 10min via S4L_SCRAPER_DEADLINE_MS; the 15min outer is a 5min margin for cdp_attach + page.goto + the JS deadline + finalize().
|
|
85
|
+
|
|
86
|
+
mkdir -p "$LOG_DIR"
|
|
87
|
+
LOG_FILE="$LOG_DIR/stats-linkedin-$(date +%Y-%m-%d_%H%M%S).log"
|
|
88
|
+
log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG_FILE"; }
|
|
89
|
+
|
|
90
|
+
RUN_START=$(date +%s)
|
|
91
|
+
log "=== LinkedIn Stats Run (unified): $(date) ==="
|
|
92
|
+
log "mode: python (no LLM); MAX_SCROLLS=$MAX_SCROLLS; timeout=${SCRAPER_TIMEOUT_SEC}s"
|
|
93
|
+
|
|
94
|
+
# Coverage hint. Reads via the s4l.ai HTTP API (no DATABASE_URL needed); the
|
|
95
|
+
# linkedin-engagement-comments GET returns every addressable row, so we just
|
|
96
|
+
# count them. Purely informational; never blocks the run.
|
|
97
|
+
COVERAGE=$("$PYTHON_BIN" -c "
|
|
98
|
+
import sys; sys.path.insert(0, '$REPO_DIR/scripts')
|
|
99
|
+
from http_api import api_get
|
|
100
|
+
resp = api_get('/api/v1/linkedin-engagement-comments')
|
|
101
|
+
rows = (resp.get('data') or {}).get('rows') or []
|
|
102
|
+
print(f'posts={len(rows)}')
|
|
103
|
+
" 2>/dev/null || echo "posts=?")
|
|
104
|
+
log "Active LinkedIn comments addressable by this feed: $COVERAGE"
|
|
105
|
+
|
|
106
|
+
FEED_JSON="$LOG_DIR/stats-linkedin-feed-$(date +%Y%m%d_%H%M%S).json"
|
|
107
|
+
POSTS_SUMMARY_JSON=$(mktemp -t fazm-li-posts-summary.XXXXXX).json
|
|
108
|
+
SCRAPER_STDOUT=$(mktemp -t fazm-li-scrape.XXXXXX).json
|
|
109
|
+
|
|
110
|
+
# Forensic-bundle directory. The scraper writes screenshots, html, cookies,
|
|
111
|
+
# console.jsonl / navigation.jsonl / network.jsonl + a Python traceback on
|
|
112
|
+
# any non-ok return path here, then tar.gz's it and prints the path to
|
|
113
|
+
# stderr as `[scrape_linkedin] debug_bundle=<tarball>`. We grep that out of
|
|
114
|
+
# the captured stderr below.
|
|
115
|
+
#
|
|
116
|
+
# On session_invalid / captcha_or_checkpoint specifically, we promote the
|
|
117
|
+
# tarball to skill/logs/linkedin-debug-failures/ — that subdir is NOT swept
|
|
118
|
+
# by the 14-day retention sweep at the end of this script. Permanent
|
|
119
|
+
# archive so the next failure can be diff'd byte-for-byte against the last
|
|
120
|
+
# known good/bad bundle.
|
|
121
|
+
DEBUG_BUNDLE_BASE="$LOG_DIR/linkedin-debug"
|
|
122
|
+
DEBUG_BUNDLE_DIR="$DEBUG_BUNDLE_BASE/$(date +%Y%m%d_%H%M%S)"
|
|
123
|
+
DEBUG_FAILURES_DIR="$LOG_DIR/linkedin-debug-failures"
|
|
124
|
+
mkdir -p "$DEBUG_BUNDLE_BASE" "$DEBUG_FAILURES_DIR"
|
|
125
|
+
|
|
126
|
+
# 1. Acquire the linkedin-browser lock. Two CDP clients hammering the same
|
|
127
|
+
# DOM corrupt each other's evaluate() calls, so the lock matters even
|
|
128
|
+
# though we no longer launch a second Chrome.
|
|
129
|
+
#
|
|
130
|
+
# DELIBERATELY do NOT call ensure_browser_healthy "linkedin" — that
|
|
131
|
+
# helper SIGKILLs the linkedin-agent MCP and clears Singleton lockfiles
|
|
132
|
+
# so a second Chrome can launch on the same profile. With the 2026-05-26
|
|
133
|
+
# harness cutover, scrape_linkedin_comment_stats.py CDP-attaches to the
|
|
134
|
+
# linkedin-harness Chrome (port 9556) which is multi-client safe.
|
|
135
|
+
acquire_lock "linkedin-browser" 1800
|
|
136
|
+
|
|
137
|
+
# Probe + launch harness Chrome idempotently if it's down. Safe to call under
|
|
138
|
+
# the linkedin-browser lock; harness CDP supports concurrent clients on the
|
|
139
|
+
# same profile so no SingletonLock fight.
|
|
140
|
+
ensure_linkedin_browser_for_backend
|
|
141
|
+
|
|
142
|
+
# 2. Run the headed-Chromium scraper (single scrape, shared between writers).
|
|
143
|
+
log "Launching headed Chromium scraper..."
|
|
144
|
+
log "Debug bundle dir (pre-tar): $DEBUG_BUNDLE_DIR"
|
|
145
|
+
SCRAPER_RC=0
|
|
146
|
+
set +e
|
|
147
|
+
SOCIAL_AUTOPOSTER_LINKEDIN_COMMENT_STATS=1 \
|
|
148
|
+
/opt/homebrew/bin/gtimeout "$SCRAPER_TIMEOUT_SEC" \
|
|
149
|
+
"$SCRAPER_PYTHON_BIN" "$REPO_DIR/scripts/scrape_linkedin_comment_stats.py" \
|
|
150
|
+
--out "$FEED_JSON" \
|
|
151
|
+
--max-scrolls "$MAX_SCROLLS" \
|
|
152
|
+
--debug-dir "$DEBUG_BUNDLE_DIR" \
|
|
153
|
+
> "$SCRAPER_STDOUT" 2>&1
|
|
154
|
+
SCRAPER_RC=$?
|
|
155
|
+
set -e
|
|
156
|
+
|
|
157
|
+
# Always release the browser lock; updaters are DB-only.
|
|
158
|
+
release_lock "linkedin-browser"
|
|
159
|
+
# 2026-05-26 harness migration: the linkedin-agent JSON lockfile still gets
|
|
160
|
+
# written by linkedin_browser._acquire_browser_lock for serialization between
|
|
161
|
+
# concurrent Python invocations; sweep it on the way out so it doesn't
|
|
162
|
+
# accumulate stale entries.
|
|
163
|
+
rm -f "$HOME/.claude/linkedin-agent-lock.json"
|
|
164
|
+
|
|
165
|
+
# Echo scraper output to log.
|
|
166
|
+
cat "$SCRAPER_STDOUT" | tee -a "$LOG_FILE"
|
|
167
|
+
|
|
168
|
+
# Surface the debug-bundle tarball path. The scraper writes a single
|
|
169
|
+
# `[scrape_linkedin] debug_bundle=<path>` line to stderr right before exit;
|
|
170
|
+
# grep it back out so it's visible in the orchestrator log without needing
|
|
171
|
+
# to unpack the tarball.
|
|
172
|
+
DEBUG_TARBALL=$(grep -m1 -E '^\[scrape_linkedin\] debug_bundle=' "$SCRAPER_STDOUT" | sed -E 's/^\[scrape_linkedin\] debug_bundle=//')
|
|
173
|
+
if [ -n "$DEBUG_TARBALL" ] && [ -f "$DEBUG_TARBALL" ]; then
|
|
174
|
+
log "Debug bundle: $DEBUG_TARBALL"
|
|
175
|
+
else
|
|
176
|
+
log "Debug bundle: <missing — scraper did not emit debug_bundle marker>"
|
|
177
|
+
fi
|
|
178
|
+
|
|
179
|
+
# Also surface the linkedin_browser mode line — this is the #1 signal for
|
|
180
|
+
# answering "did we cdp_attach or cold_launch?" after a failure.
|
|
181
|
+
BROWSER_MODE_LINE=$(grep -m1 -E '^\[linkedin_browser\] mode=' "$SCRAPER_STDOUT" || true)
|
|
182
|
+
if [ -n "$BROWSER_MODE_LINE" ]; then
|
|
183
|
+
log "Browser mode: $BROWSER_MODE_LINE"
|
|
184
|
+
else
|
|
185
|
+
log "Browser mode: <missing — _connect_to_running_or_launch never logged>"
|
|
186
|
+
fi
|
|
187
|
+
|
|
188
|
+
if [ "$SCRAPER_RC" -ne 0 ]; then
|
|
189
|
+
log "ERROR: scraper exited rc=$SCRAPER_RC"
|
|
190
|
+
SCRAPER_ERROR=$("$PYTHON_BIN" -c "
|
|
191
|
+
import json, sys
|
|
192
|
+
try:
|
|
193
|
+
obj = json.load(open('$SCRAPER_STDOUT'))
|
|
194
|
+
print(obj.get('error', 'unknown'))
|
|
195
|
+
except Exception:
|
|
196
|
+
print('parse_failed')
|
|
197
|
+
" 2>/dev/null || echo "unknown")
|
|
198
|
+
log "scraper error code: $SCRAPER_ERROR"
|
|
199
|
+
|
|
200
|
+
# Permanent archive of session_invalid / captcha tarballs. We never
|
|
201
|
+
# want to wake up to another 14-line "session_invalid" log file with
|
|
202
|
+
# no way to forensically inspect the DOM that triggered it. Keep these
|
|
203
|
+
# forever (or until the user manually cleans the dir).
|
|
204
|
+
if [ "$SCRAPER_ERROR" = "session_invalid" ] \
|
|
205
|
+
|| [ "$SCRAPER_ERROR" = "captcha_or_checkpoint" ]; then
|
|
206
|
+
log "SESSION_INVALID — abort run, do not retry."
|
|
207
|
+
if [ -n "$DEBUG_TARBALL" ] && [ -f "$DEBUG_TARBALL" ]; then
|
|
208
|
+
FAILURE_COPY="$DEBUG_FAILURES_DIR/$(basename "$DEBUG_TARBALL" .tar.gz)__${SCRAPER_ERROR}.tar.gz"
|
|
209
|
+
cp -p "$DEBUG_TARBALL" "$FAILURE_COPY" 2>/dev/null \
|
|
210
|
+
&& log "Archived failure bundle: $FAILURE_COPY" \
|
|
211
|
+
|| log "WARN: failed to archive failure bundle to $FAILURE_COPY"
|
|
212
|
+
else
|
|
213
|
+
log "WARN: no debug tarball available to archive for $SCRAPER_ERROR"
|
|
214
|
+
fi
|
|
215
|
+
fi
|
|
216
|
+
|
|
217
|
+
if [ ! -s "$FEED_JSON" ]; then
|
|
218
|
+
log "No feed JSON produced; skipping updater."
|
|
219
|
+
rm -f "$SCRAPER_STDOUT" "$POSTS_SUMMARY_JSON"
|
|
220
|
+
RUN_ELAPSED=$(( $(date +%s) - RUN_START ))
|
|
221
|
+
"$PYTHON_BIN" "$REPO_DIR/scripts/log_run.py" \
|
|
222
|
+
--script "stats_linkedin" \
|
|
223
|
+
--posted 0 --skipped 0 --failed 1 \
|
|
224
|
+
--cost "0.0000" --elapsed "$RUN_ELAPSED" \
|
|
225
|
+
2>/dev/null || true
|
|
226
|
+
log "=== LinkedIn stats failed: $(date) ==="
|
|
227
|
+
exit 1
|
|
228
|
+
fi
|
|
229
|
+
log "Feed JSON exists despite rc=$SCRAPER_RC; running updater anyway."
|
|
230
|
+
fi
|
|
231
|
+
|
|
232
|
+
# 3. Apply to `posts` (Twitter-parity table; sole stats target).
|
|
233
|
+
log "Writer: posts table..."
|
|
234
|
+
"$PYTHON_BIN" "$REPO_DIR/scripts/update_linkedin_stats_from_feed.py" \
|
|
235
|
+
--from-json "$FEED_JSON" \
|
|
236
|
+
--summary "$POSTS_SUMMARY_JSON" \
|
|
237
|
+
2>&1 | tee -a "$LOG_FILE" \
|
|
238
|
+
|| log "WARNING: posts updater exited with code $?"
|
|
239
|
+
|
|
240
|
+
# 4. Surface counters.
|
|
241
|
+
REFRESHED_POSTS=0
|
|
242
|
+
NOT_FOUND=0
|
|
243
|
+
if [ -s "$POSTS_SUMMARY_JSON" ]; then
|
|
244
|
+
REFRESHED_POSTS=$("$PYTHON_BIN" -c "import json; print(json.load(open('$POSTS_SUMMARY_JSON')).get('refreshed', 0))" 2>/dev/null || echo 0)
|
|
245
|
+
NOT_FOUND=$("$PYTHON_BIN" -c "import json; print(json.load(open('$POSTS_SUMMARY_JSON')).get('not_found', 0))" 2>/dev/null || echo 0)
|
|
246
|
+
fi
|
|
247
|
+
log "Comment stats refresh: posts=$REFRESHED_POSTS total=$REFRESHED_POSTS unmatched=$NOT_FOUND"
|
|
248
|
+
|
|
249
|
+
# 5. Log run to persistent monitor.
|
|
250
|
+
RUN_ELAPSED=$(( $(date +%s) - RUN_START ))
|
|
251
|
+
"$PYTHON_BIN" "$REPO_DIR/scripts/log_run.py" --script "stats_linkedin" \
|
|
252
|
+
--posted "$REFRESHED_POSTS" --skipped 0 --failed 0 \
|
|
253
|
+
--cost "0.0000" --elapsed "$RUN_ELAPSED" \
|
|
254
|
+
2>/dev/null || true
|
|
255
|
+
|
|
256
|
+
# Cleanup.
|
|
257
|
+
rm -f "$POSTS_SUMMARY_JSON" "$SCRAPER_STDOUT"
|
|
258
|
+
find "$LOG_DIR" -name "stats-linkedin-*.log" -mtime +14 -delete 2>/dev/null || true
|
|
259
|
+
find "$LOG_DIR" -name "stats-linkedin-feed-*.json" -mtime +7 -delete 2>/dev/null || true
|
|
260
|
+
|
|
261
|
+
# Debug-bundle retention. Two layers:
|
|
262
|
+
# - linkedin-debug/<ts>/ : per-fire unpacked dirs, 14d
|
|
263
|
+
# - linkedin-debug/<ts>.tar.gz : per-fire tarballs, 14d
|
|
264
|
+
# - linkedin-debug-failures/ : permanent archive of session_invalid /
|
|
265
|
+
# captcha tarballs; NEVER swept here.
|
|
266
|
+
# Adjust the +14 numbers if disk pressure becomes an issue; do NOT add the
|
|
267
|
+
# failures dir to the find sweep without explicit user instruction.
|
|
268
|
+
find "$DEBUG_BUNDLE_BASE" -maxdepth 1 -type d -name "20*" -mtime +14 -exec rm -rf {} + 2>/dev/null || true
|
|
269
|
+
find "$DEBUG_BUNDLE_BASE" -maxdepth 1 -type f -name "20*.tar.gz" -mtime +14 -delete 2>/dev/null || true
|
|
270
|
+
|
|
271
|
+
log "=== LinkedIn stats complete: $(date) ==="
|