@m13v/s4l 1.6.197-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1336 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +513 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
package/skill/lock.sh
ADDED
|
@@ -0,0 +1,647 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Portable file locking (no flock needed)
|
|
3
|
+
# Usage: source lock.sh; acquire_lock "platform-name" [timeout_seconds]
|
|
4
|
+
#
|
|
5
|
+
# Multiple acquire_lock calls stack: all held locks are cleaned up on exit by
|
|
6
|
+
# a single trap. Acquire platform-browser locks BEFORE pipeline-specific locks
|
|
7
|
+
# to avoid deadlock across pipelines that share a browser profile.
|
|
8
|
+
|
|
9
|
+
# shellcheck source=lib/platform.sh
|
|
10
|
+
source "$(dirname "${BASH_SOURCE[0]}")/lib/platform.sh"
|
|
11
|
+
|
|
12
|
+
# --- Lock-event instrumentation (added 2026-06-16) ---------------------------
|
|
13
|
+
# Single shared, DATED log of every lock lifecycle event from EVERY pipeline,
|
|
14
|
+
# so cross-pipeline contention is reconstructable from ONE file instead of
|
|
15
|
+
# merging undated per-pipeline launchd stderr streams (the exact thing that
|
|
16
|
+
# made the 2026-06-15 twitter-browser double-hold so hard to prove). Purely
|
|
17
|
+
# additive: best-effort, never fails the caller, changes NO lock behavior.
|
|
18
|
+
# The high-value field is `owner=self|OTHER` at every deletion point: if we
|
|
19
|
+
# ever delete a lock dir whose recorded pid is NOT ours, that line is the
|
|
20
|
+
# red-handed proof of an ownership-blind rm.
|
|
21
|
+
_SA_LOCK_EVENT_LOG="${_SA_LOCK_EVENT_LOG:-$(dirname "${BASH_SOURCE[0]}")/logs/lock-events.log}"
|
|
22
|
+
mkdir -p "$(dirname "$_SA_LOCK_EVENT_LOG")" 2>/dev/null || true
|
|
23
|
+
_sa_lock_event() {
|
|
24
|
+
# usage: _sa_lock_event <event> <lock_name> [extra k=v ...]
|
|
25
|
+
printf '%s pid=%s event=%s lock=%s %s\n' \
|
|
26
|
+
"$(date '+%Y-%m-%d %H:%M:%S')" "$$" "$1" "$2" "${*:3}" \
|
|
27
|
+
>> "$_SA_LOCK_EVENT_LOG" 2>/dev/null || true
|
|
28
|
+
}
|
|
29
|
+
_sa_lock_owner_tag() {
|
|
30
|
+
# echoes "owner=self on_disk=<pid>" or "owner=OTHER on_disk=<pid|none>" for $1=lock_dir.
|
|
31
|
+
# owner=OTHER means the pid recorded in the lock dir is NOT us -> we are about
|
|
32
|
+
# to delete a DIFFERENT holder's lock (the double-hold smoking gun).
|
|
33
|
+
local _odp=""
|
|
34
|
+
if [ -f "$1/pid" ]; then
|
|
35
|
+
_odp="$(head -1 "$1/pid" 2>/dev/null || true)"
|
|
36
|
+
fi
|
|
37
|
+
if [ "$_odp" = "$$" ]; then
|
|
38
|
+
printf 'owner=self on_disk=%s' "$_odp"
|
|
39
|
+
else
|
|
40
|
+
printf 'owner=OTHER on_disk=%s' "${_odp:-none}"
|
|
41
|
+
fi
|
|
42
|
+
}
|
|
43
|
+
# Ownership guard (added 2026-06-17): returns 0 (true) ONLY if the lock dir $1's
|
|
44
|
+
# recorded pid is OURS. Used to gate every rm of a lock dir so we never delete a
|
|
45
|
+
# lock a peer currently holds. Proven necessary: 17h of lock-events.log caught 32
|
|
46
|
+
# `owner=OTHER` deletions on twitter-browser (trap + release blindly rm-ing a live
|
|
47
|
+
# peer's lock), which cascaded into real double-holds (two pipelines on one Chrome).
|
|
48
|
+
# Safe failure mode: if this ever wrongly returns false for OUR OWN lock (e.g. a
|
|
49
|
+
# transient pid-file read miss), we just skip our own cleanup; the acquire-side
|
|
50
|
+
# kill -0 stale path then reclaims it within one cycle. Never deadlocks.
|
|
51
|
+
_sa_we_own_lock() {
|
|
52
|
+
local _odp=""
|
|
53
|
+
[ -f "$1/pid" ] && _odp="$(head -1 "$1/pid" 2>/dev/null || true)"
|
|
54
|
+
[ "$_odp" = "$$" ]
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Stack of currently-held lock directories AND outstanding queue tickets,
|
|
58
|
+
# both cleaned up on exit. Declared at source time so they survive across
|
|
59
|
+
# acquire_lock calls.
|
|
60
|
+
if [ -z "${_SA_LOCK_DIRS+x}" ]; then
|
|
61
|
+
declare -a _SA_LOCK_DIRS=()
|
|
62
|
+
declare -a _SA_LOCK_TICKETS=()
|
|
63
|
+
_sa_release_locks() {
|
|
64
|
+
local d t
|
|
65
|
+
# Browser-profile cleanup BEFORE releasing locks (added 2026-05-13).
|
|
66
|
+
# Before this, dm-outreach-twitter would exit, _sa_release_locks would
|
|
67
|
+
# rm the twitter-browser lock dir, the next pipeline (engage-twitter,
|
|
68
|
+
# engage-dm-replies-twitter) would acquire the now-free shell lock,
|
|
69
|
+
# then find the Chrome profile's SingletonLock STILL held by the
|
|
70
|
+
# previous pipeline's Chrome (which hadn't fully torn down yet) and
|
|
71
|
+
# crash with "chromium profile locked by another process; waited 45s".
|
|
72
|
+
# Observed 2026-05-13 14:06 (engage-twitter), 14:13 (engage-dm-replies-twitter,
|
|
73
|
+
# spawned dozens of Chrome respawns at 3-5s cadence).
|
|
74
|
+
#
|
|
75
|
+
# Fix: for any held lock that LOOKS like a browser lock (suffix -browser),
|
|
76
|
+
# kill any top-level Chrome on the corresponding profile BEFORE releasing
|
|
77
|
+
# the shell lock, regardless of ppid. We hold the lock so this is safe
|
|
78
|
+
# by construction (no peer can race us, unlike the post-acquire sweep
|
|
79
|
+
# in acquire_lock which restricts to ppid==1 to avoid clobbering peers).
|
|
80
|
+
# The next pipeline then takes a clean profile.
|
|
81
|
+
for d in ${_SA_LOCK_DIRS[@]+"${_SA_LOCK_DIRS[@]}"}; do
|
|
82
|
+
local lock_name="${d##*/}"
|
|
83
|
+
lock_name="${lock_name#social-autoposter-}"
|
|
84
|
+
lock_name="${lock_name%.lock}"
|
|
85
|
+
case "$lock_name" in
|
|
86
|
+
twitter-browser|reddit-browser|linkedin-browser)
|
|
87
|
+
local plat="${lock_name%-browser}"
|
|
88
|
+
local profile_dir="$HOME/.claude/browser-profiles/$plat"
|
|
89
|
+
# Top-level Chromes on this profile (skip --type= subprocesses).
|
|
90
|
+
local chrome_pids
|
|
91
|
+
chrome_pids=$(ps -A -o pid=,command= 2>/dev/null | awk -v p="user-data-dir=$profile_dir" '
|
|
92
|
+
index($0,p)>0 && index($0,"--type=")==0 && index($0,"awk ")==0 {print $1}' || true)
|
|
93
|
+
if [ -n "$chrome_pids" ]; then
|
|
94
|
+
# SIGTERM first for graceful close, brief pause, then SIGKILL stragglers.
|
|
95
|
+
echo "$chrome_pids" | xargs kill -TERM 2>/dev/null || true
|
|
96
|
+
sleep 1
|
|
97
|
+
local still_alive
|
|
98
|
+
still_alive=$(ps -A -o pid=,command= 2>/dev/null | awk -v p="user-data-dir=$profile_dir" '
|
|
99
|
+
index($0,p)>0 && index($0,"--type=")==0 && index($0,"awk ")==0 {print $1}' || true)
|
|
100
|
+
if [ -n "$still_alive" ]; then
|
|
101
|
+
echo "$still_alive" | xargs kill -KILL 2>/dev/null || true
|
|
102
|
+
fi
|
|
103
|
+
# Also kill matching MCP wrappers so they can't relaunch Chrome.
|
|
104
|
+
pkill -KILL -f "${plat}-agent.json" 2>/dev/null || true
|
|
105
|
+
# Clear singletons so the next launch_persistent_context starts clean.
|
|
106
|
+
rm -f "$profile_dir/SingletonLock" "$profile_dir/SingletonCookie" "$profile_dir/SingletonSocket" 2>/dev/null || true
|
|
107
|
+
fi
|
|
108
|
+
;;
|
|
109
|
+
esac
|
|
110
|
+
done
|
|
111
|
+
# Safe for bash 3.2: ${arr[@]+"${arr[@]}"} expands to nothing when arr is
|
|
112
|
+
# unset or empty, avoiding the "unbound variable" error with set -u.
|
|
113
|
+
# The earlier if+for guard was insufficient because bash 3.2 treats even
|
|
114
|
+
# ${#unset_arr[@]} as an "unbound variable" error in some exit-trap contexts.
|
|
115
|
+
for d in ${_SA_LOCK_DIRS[@]+"${_SA_LOCK_DIRS[@]}"}; do
|
|
116
|
+
local _lname="${d##*/}"
|
|
117
|
+
_lname="${_lname#social-autoposter-}"
|
|
118
|
+
_lname="${_lname%.lock}"
|
|
119
|
+
# Ownership guard: only delete the dir if WE still hold it. A peer may have
|
|
120
|
+
# legitimately re-acquired it after our mid-cycle release; deleting it here
|
|
121
|
+
# is defect "owner=OTHER" (wipes a live peer's lock -> double-hold).
|
|
122
|
+
if _sa_we_own_lock "$d"; then
|
|
123
|
+
_sa_lock_event trap_rm "$_lname" "$(_sa_lock_owner_tag "$d")"
|
|
124
|
+
echo "[lock] trap-released $_lname pid=$$ at $(date +%H:%M:%S)" >&2
|
|
125
|
+
rm -rf "$d"
|
|
126
|
+
else
|
|
127
|
+
_sa_lock_event trap_rm_skipped "$_lname" "$(_sa_lock_owner_tag "$d")"
|
|
128
|
+
echo "[lock] trap-release SKIPPED $_lname pid=$$ (not owner) at $(date +%H:%M:%S)" >&2
|
|
129
|
+
fi
|
|
130
|
+
done
|
|
131
|
+
for t in ${_SA_LOCK_TICKETS[@]+"${_SA_LOCK_TICKETS[@]}"}; do
|
|
132
|
+
rm -f "$t"
|
|
133
|
+
done
|
|
134
|
+
}
|
|
135
|
+
trap _sa_release_locks EXIT INT TERM HUP
|
|
136
|
+
fi
|
|
137
|
+
|
|
138
|
+
acquire_lock() {
|
|
139
|
+
local name="$1"
|
|
140
|
+
local timeout="${2:-3600}"
|
|
141
|
+
local lock_dir="/tmp/social-autoposter-${name}.lock"
|
|
142
|
+
local queue_dir="${lock_dir}.queue"
|
|
143
|
+
local waited=0
|
|
144
|
+
# logged_holder: per-acquire flag so we surface "who is holding this lock"
|
|
145
|
+
# exactly once when we start waiting, not on every 2s poll. Added 2026-05-26
|
|
146
|
+
# so the operator can answer "why did Twitter cycle 90245 wait 60s for the
|
|
147
|
+
# browser lock?" by grepping the cycle log for `[lock] waiting for
|
|
148
|
+
# twitter-browser` instead of cross-correlating launchd start times.
|
|
149
|
+
local logged_holder=false
|
|
150
|
+
|
|
151
|
+
# Platform-browser locks still get the orphan-Chrome sweep on acquire (after
|
|
152
|
+
# the lock is taken). Peers do NOT force-kill each other: a long-running
|
|
153
|
+
# holder is the watchdog's responsibility (per-script caps in
|
|
154
|
+
# scripts/watchdog_hung_runs.py), not a peer pipeline's. Prior versions
|
|
155
|
+
# killed the holder's whole process group at lock_age > 600s and clobbered
|
|
156
|
+
# unrelated steps (e.g. stats.sh Step 2 was SIGTERMed mid-API-call by a
|
|
157
|
+
# waiting dm-replies-reddit on 2026-04-25).
|
|
158
|
+
local is_browser_lock=false
|
|
159
|
+
case "$name" in
|
|
160
|
+
reddit-browser|linkedin-browser|twitter-browser) is_browser_lock=true ;;
|
|
161
|
+
esac
|
|
162
|
+
|
|
163
|
+
# FIFO ticket queue (added 2026-05-01). Without this, mkdir-race acquisition
|
|
164
|
+
# starved long-waiters under parallel cycles: a fresh cycle entering Phase 1
|
|
165
|
+
# would race-win the lock the moment the prior holder released, ahead of a
|
|
166
|
+
# peer's Phase 2b-post that had been waiting 5+ min. Observed live: cycle
|
|
167
|
+
# 90205's Phase 2b-post waited 8+ min while three newer cycles cut in line
|
|
168
|
+
# for their own Phase 1 scrapes.
|
|
169
|
+
#
|
|
170
|
+
# Mechanism: each waiter writes a `<ns_timestamp>-<pid>` ticket into
|
|
171
|
+
# `${lock_dir}.queue/`. ls-sort gives FIFO order. Only the head-of-queue
|
|
172
|
+
# waiter races to mkdir the lock_dir, so post-release acquisition is
|
|
173
|
+
# deterministic by arrival time. Ticket is removed once the lock is held
|
|
174
|
+
# (and on EXIT trap as a safety net for SIGKILLed waiters).
|
|
175
|
+
mkdir -p "$queue_dir"
|
|
176
|
+
local ticket
|
|
177
|
+
ticket="$(python3 -c 'import time; print(time.time_ns())' 2>/dev/null)-$$"
|
|
178
|
+
if [ -z "${ticket%-$$}" ]; then
|
|
179
|
+
# python3 unavailable; fall back to seconds + microsecond approximation.
|
|
180
|
+
# PID disambiguates same-second collisions; loses sub-second FIFO ordering
|
|
181
|
+
# but maintains correctness (waiters in same second arbitrate by PID).
|
|
182
|
+
ticket="$(date +%s)000000000-$$"
|
|
183
|
+
fi
|
|
184
|
+
local ticket_file="$queue_dir/$ticket"
|
|
185
|
+
echo $$ > "$ticket_file"
|
|
186
|
+
_SA_LOCK_TICKETS+=("$ticket_file")
|
|
187
|
+
|
|
188
|
+
while true; do
|
|
189
|
+
# GC stale tickets: any ticket whose owning PID is dead. Without this a
|
|
190
|
+
# SIGKILLed waiter (no trap fired) would block all newer waiters forever
|
|
191
|
+
# because its ticket would always be oldest.
|
|
192
|
+
local t tpid
|
|
193
|
+
for t in $(ls -1 "$queue_dir" 2>/dev/null); do
|
|
194
|
+
tpid=$(cat "$queue_dir/$t" 2>/dev/null || echo "")
|
|
195
|
+
if [ -n "$tpid" ] && ! kill -0 "$tpid" 2>/dev/null; then
|
|
196
|
+
rm -f "$queue_dir/$t"
|
|
197
|
+
fi
|
|
198
|
+
done
|
|
199
|
+
|
|
200
|
+
# Check our position. ls -1 + sort gives lexicographic (== numeric) order
|
|
201
|
+
# over fixed-width nanosecond timestamps, so head is the oldest waiter.
|
|
202
|
+
local oldest
|
|
203
|
+
oldest=$(ls -1 "$queue_dir" 2>/dev/null | sort | head -1)
|
|
204
|
+
|
|
205
|
+
if [ "$oldest" = "$ticket" ]; then
|
|
206
|
+
# We are the head. Try to acquire the lock.
|
|
207
|
+
if mkdir "$lock_dir" 2>/dev/null; then
|
|
208
|
+
# Won the lock. Write PID, register for trap-cleanup, drop our ticket,
|
|
209
|
+
# then break out into the post-acquire (Chrome sweep + return).
|
|
210
|
+
echo $$ > "$lock_dir/pid"
|
|
211
|
+
# Initial 90s lease so watchdog reads lease_remaining instead of
|
|
212
|
+
# "missing" before the first heartbeat fires. Pipelines that go
|
|
213
|
+
# through reddit_browser.py or MCP hooks will bump this on each CDP
|
|
214
|
+
# op; bash-only acquires get the 90s grace window.
|
|
215
|
+
echo $(($(date +%s) + 90)) > "$lock_dir/expires_at" 2>/dev/null || true
|
|
216
|
+
_SA_LOCK_DIRS+=("$lock_dir")
|
|
217
|
+
rm -f "$ticket_file"
|
|
218
|
+
# Remove our ticket from _SA_LOCK_TICKETS so the EXIT trap doesn't
|
|
219
|
+
# try to rm-f it again (harmless, but keeps the array honest).
|
|
220
|
+
local _new_t=()
|
|
221
|
+
local _existing
|
|
222
|
+
for _existing in ${_SA_LOCK_TICKETS[@]+"${_SA_LOCK_TICKETS[@]}"}; do
|
|
223
|
+
[ "$_existing" != "$ticket_file" ] && _new_t+=("$_existing")
|
|
224
|
+
done
|
|
225
|
+
_SA_LOCK_TICKETS=(${_new_t[@]+"${_new_t[@]}"})
|
|
226
|
+
echo "[lock] acquired $name pid=$$ at $(date +%H:%M:%S) waited=${waited}s" >&2
|
|
227
|
+
_sa_lock_event acquired "$name" "waited=${waited}s"
|
|
228
|
+
break
|
|
229
|
+
fi
|
|
230
|
+
|
|
231
|
+
# We're head-of-queue but lock_dir exists. Either the holder is alive
|
|
232
|
+
# and active (normal — wait), or they died uncleanly. Apply the same
|
|
233
|
+
# stale-detection used pre-FIFO.
|
|
234
|
+
local should_remove=false
|
|
235
|
+
if [ ! -f "$lock_dir/pid" ]; then
|
|
236
|
+
should_remove=true
|
|
237
|
+
else
|
|
238
|
+
local holder_pid
|
|
239
|
+
holder_pid=$(cat "$lock_dir/pid" 2>/dev/null || echo "")
|
|
240
|
+
if [ -z "$holder_pid" ] || ! kill -0 "$holder_pid" 2>/dev/null; then
|
|
241
|
+
should_remove=true
|
|
242
|
+
fi
|
|
243
|
+
fi
|
|
244
|
+
# Safety net: remove any lock older than 3 hours regardless. Watchdog's
|
|
245
|
+
# per-script caps (45m default, 120m for stats_reddit/github-engage) will
|
|
246
|
+
# SIGTERM a hung holder long before this fires.
|
|
247
|
+
if [ -d "$lock_dir" ]; then
|
|
248
|
+
local lock_age
|
|
249
|
+
lock_age=$(( $(date +%s) - $(stat_mtime "$lock_dir") ))
|
|
250
|
+
if [ "$lock_age" -gt 10800 ]; then
|
|
251
|
+
should_remove=true
|
|
252
|
+
fi
|
|
253
|
+
fi
|
|
254
|
+
if $should_remove; then
|
|
255
|
+
_sa_lock_event stale_reclaim "$name" "$(_sa_lock_owner_tag "$lock_dir")"
|
|
256
|
+
echo "Removing stale $name lock"
|
|
257
|
+
rm -rf "$lock_dir"
|
|
258
|
+
continue
|
|
259
|
+
fi
|
|
260
|
+
fi
|
|
261
|
+
|
|
262
|
+
if [ "$waited" -ge "$timeout" ]; then
|
|
263
|
+
echo "Previous $name run still active after $((timeout/60))min, skipping"
|
|
264
|
+
rm -f "$ticket_file"
|
|
265
|
+
exit 0
|
|
266
|
+
fi
|
|
267
|
+
# Holder identity: log who is holding the lock the first time we sleep.
|
|
268
|
+
# Read pid file, confirm liveness, then best-effort extract the .sh script
|
|
269
|
+
# name from `ps -o args=`. Without this we only knew lock waits happened
|
|
270
|
+
# (the `waited=Ns` at acquire time); we never knew which peer cycle caused
|
|
271
|
+
# them, which made cross-cycle contention impossible to attribute. We log
|
|
272
|
+
# at most once per acquire_lock call to avoid flooding the cycle log on
|
|
273
|
+
# long waits (a 60s wait would otherwise produce 30 identical lines).
|
|
274
|
+
if ! $logged_holder && [ -d "$lock_dir" ] && [ -f "$lock_dir/pid" ]; then
|
|
275
|
+
local hpid hcmd hscript
|
|
276
|
+
hpid=$(cat "$lock_dir/pid" 2>/dev/null || echo "")
|
|
277
|
+
if [ -n "$hpid" ] && kill -0 "$hpid" 2>/dev/null; then
|
|
278
|
+
hcmd=$(ps -o args= -p "$hpid" 2>/dev/null | head -c 240)
|
|
279
|
+
hscript=$(echo "$hcmd" | grep -oE '[^ /]+\.sh' | head -1)
|
|
280
|
+
[ -z "$hscript" ] && hscript='(non-shell)'
|
|
281
|
+
echo "[lock] waiting for $name pid=$$ held_by=$hpid script=$hscript cmd='${hcmd}'" >&2
|
|
282
|
+
_sa_lock_event waiting "$name" "held_by=$hpid script=$hscript"
|
|
283
|
+
logged_holder=true
|
|
284
|
+
fi
|
|
285
|
+
fi
|
|
286
|
+
# 2s poll keeps head-of-queue snappy after release without burning CPU.
|
|
287
|
+
# Pre-FIFO this was 10s, but FIFO means only the head actually contends —
|
|
288
|
+
# tighter polling here mostly affects the winner, not the racing pack.
|
|
289
|
+
sleep 2
|
|
290
|
+
waited=$((waited + 2))
|
|
291
|
+
done
|
|
292
|
+
|
|
293
|
+
# Platform-browser locks: sweep orphan Chromes holding the profile. A prior
|
|
294
|
+
# run may have exited without cleanly closing Chrome (parent playwright-mcp
|
|
295
|
+
# dies, Chrome gets reparented to PID 1, profile stays locked). Since we
|
|
296
|
+
# now hold the exclusive shell lock, any Chrome on this profile is an
|
|
297
|
+
# orphan and safe to kill before the caller launches a fresh MCP session.
|
|
298
|
+
#
|
|
299
|
+
# Also sweep orphan playwright-mcp / node wrappers reparented to PID 1. A
|
|
300
|
+
# live holder's MCP child is parented to its claude process; only true
|
|
301
|
+
# orphans (parent died without running the EXIT trap, e.g. SIGKILL/OOM)
|
|
302
|
+
# end up at ppid=1 and survive. The ppid==1 filter keeps a manually-
|
|
303
|
+
# attached Claude session pointed at the same agent config safe: its MCP
|
|
304
|
+
# child has the live claude as parent, not init. Without this sweep,
|
|
305
|
+
# orphan wrappers accumulate over days and keep launchd from re-firing
|
|
306
|
+
# because launchd treats the slot as still in flight.
|
|
307
|
+
if $is_browser_lock; then
|
|
308
|
+
local platform="${name%-browser}"
|
|
309
|
+
# Chrome sweep: only kill Chromes whose top-level Chromium has been
|
|
310
|
+
# reparented to launchd (ppid==1), i.e. true orphans whose parent
|
|
311
|
+
# playwright-mcp died without cleanup. A LIVE peer's Chromium is parented
|
|
312
|
+
# to its mcp wrapper (alive), so this filter skips it. Without the
|
|
313
|
+
# ppid==1 guard, a peer that managed to acquire the lock concurrently
|
|
314
|
+
# would SIGTERM the legitimate holder's Chrome and trigger crashes like
|
|
315
|
+
# the GPU exit_code=15 we saw on 2026-04-28 14:12 PT.
|
|
316
|
+
local chrome_pids
|
|
317
|
+
chrome_pids=$(ps -A -o pid=,ppid=,command= | awk -v plat="browser-profiles/${platform}" '$2 == "1" && index($0, "user-data-dir=") > 0 && index($0, plat) > 0 {print $1}')
|
|
318
|
+
if [ -n "$chrome_pids" ]; then
|
|
319
|
+
echo "$chrome_pids" | xargs kill -TERM 2>/dev/null || true
|
|
320
|
+
echo "Killed orphan Chrome (ppid=1) holding ${platform} profile: $(echo $chrome_pids | tr '\n' ' ')"
|
|
321
|
+
sleep 1
|
|
322
|
+
fi
|
|
323
|
+
local mcp_pids
|
|
324
|
+
mcp_pids=$(ps -A -o pid=,ppid=,command= | awk -v plat="${platform}-agent.json" '$2 == "1" && index($0, plat) > 0 {print $1}')
|
|
325
|
+
if [ -n "$mcp_pids" ]; then
|
|
326
|
+
echo "$mcp_pids" | xargs kill -TERM 2>/dev/null || true
|
|
327
|
+
echo "Killed orphan MCP wrappers (ppid=1) for ${platform}-agent: $(echo $mcp_pids | tr '\n' ' ')"
|
|
328
|
+
sleep 1
|
|
329
|
+
fi
|
|
330
|
+
fi
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
# Probe + recover a wedged platform browser. Call ONLY after acquire_lock
|
|
334
|
+
# "<platform>-browser" — the lock holder has exclusive access to the profile,
|
|
335
|
+
# so killing live MCP/Chrome here is safe (peers cannot race us). The 2026-04-25
|
|
336
|
+
# stats-mid-API SIGTERM and 2026-04-28 GPU exit_code=15 regressions both came
|
|
337
|
+
# from peers killing the holder's processes; this is the inverse and is safe
|
|
338
|
+
# by construction.
|
|
339
|
+
#
|
|
340
|
+
# Detection: find the Chrome whose --user-data-dir matches this platform's
|
|
341
|
+
# profile, extract its --remote-debugging-port, GET /json/version with a 2s
|
|
342
|
+
# timeout. If port is missing, Chrome isn't there, or HTTP fails, the MCP
|
|
343
|
+
# is wedged or absent.
|
|
344
|
+
#
|
|
345
|
+
# Recovery: SIGTERM (then SIGKILL) any Chrome on the profile + any MCP wrapper
|
|
346
|
+
# matching <platform>-agent.json, regardless of ppid. Remove SingletonLock so
|
|
347
|
+
# the next caller can launch_persistent_context cleanly. The next claude -p /
|
|
348
|
+
# twitter_browser.py / reddit_browser.py invocation cold-starts a fresh MCP.
|
|
349
|
+
ensure_browser_healthy() {
|
|
350
|
+
local platform="$1"
|
|
351
|
+
local profile_dir="$HOME/.claude/browser-profiles/$platform"
|
|
352
|
+
|
|
353
|
+
# 1. Find Chrome on this profile, extract its remote-debugging-port.
|
|
354
|
+
# Skip renderer/gpu/utility subprocesses (those carry --type=...). They
|
|
355
|
+
# inherit --user-data-dir from the parent but get --remote-debugging-port=0,
|
|
356
|
+
# so without this filter we'd extract "0" from a renderer, fail the
|
|
357
|
+
# localhost:0 CDP probe, and (worse) the awk's `exit` mid-pipeline sends
|
|
358
|
+
# SIGPIPE to ps. With pipefail + set -e that propagates as exit 141 and
|
|
359
|
+
# silently kills the entire calling script before the scraper ever runs.
|
|
360
|
+
# `|| true` is the seatbelt against the SIGPIPE corner case in case ps
|
|
361
|
+
# races awk's exit on a future config change. Observed live 2026-05-06:
|
|
362
|
+
# stats-linkedin-comments fires at 19:56 + 20:15 + 20:17 all died here.
|
|
363
|
+
local cdp_port
|
|
364
|
+
cdp_port=$(ps -A -o command= 2>/dev/null \
|
|
365
|
+
| awk -v p="user-data-dir=$profile_dir" '
|
|
366
|
+
index($0,p)>0 && index($0,"--type=")==0 && index($0,"awk ")==0 {
|
|
367
|
+
if (match($0, /remote-debugging-port=[0-9]+/)) {
|
|
368
|
+
print substr($0, RSTART+22, RLENGTH-22); exit
|
|
369
|
+
}
|
|
370
|
+
}' || true)
|
|
371
|
+
|
|
372
|
+
# 2. Probe CDP. Healthy → return immediately.
|
|
373
|
+
if [ -n "$cdp_port" ] \
|
|
374
|
+
&& curl -fsS --max-time 2 "http://localhost:${cdp_port}/json/version" >/dev/null 2>&1; then
|
|
375
|
+
return 0
|
|
376
|
+
fi
|
|
377
|
+
|
|
378
|
+
# 3. CDP probe failed. Two reasons this can happen:
|
|
379
|
+
# (a) No Chrome at all on this profile — fall through to the singleton
|
|
380
|
+
# cleanup so launch_persistent_context starts fresh.
|
|
381
|
+
# (b) Chrome IS running but isn't reachable via CDP port — most likely
|
|
382
|
+
# a user-driven MCP session (linkedin-agent, twitter-agent, etc.)
|
|
383
|
+
# that uses --remote-debugging-pipe instead of a port. KILLING this
|
|
384
|
+
# Chrome destroys in-memory cookies (the disk copy can be 30-60s
|
|
385
|
+
# stale) and triggers anti-bot fingerprints, especially on LinkedIn
|
|
386
|
+
# (observed live 2026-05-06, Mediar account got authwalled).
|
|
387
|
+
#
|
|
388
|
+
# New behavior (was: kill immediately): when Chrome is running on the
|
|
389
|
+
# profile, WAIT up to BROWSER_WAIT_SEC for it to exit on its own. Only
|
|
390
|
+
# kill if it's still there after the wait. The lock is already held, so
|
|
391
|
+
# peer pipelines aren't the source — it's either a user MCP session
|
|
392
|
+
# (will close when they're done) or a stuck orphan (will need killing).
|
|
393
|
+
local has_chrome
|
|
394
|
+
has_chrome=$(ps -A -o command= 2>/dev/null \
|
|
395
|
+
| awk -v p="user-data-dir=$profile_dir" '
|
|
396
|
+
index($0,p)>0 && index($0,"--type=")==0 && index($0,"awk ")==0 {found=1; exit}
|
|
397
|
+
END {print (found ? "yes" : "no")}' \
|
|
398
|
+
|| echo "no")
|
|
399
|
+
|
|
400
|
+
if [ "$has_chrome" = "yes" ]; then
|
|
401
|
+
local browser_wait_sec="${BROWSER_WAIT_SEC:-60}"
|
|
402
|
+
echo "[ensure_browser_healthy] ${platform}: Chrome alive on profile but no reachable CDP port. Waiting up to ${browser_wait_sec}s for it to exit (likely user MCP session or slow-finishing prior run)."
|
|
403
|
+
local waited=0
|
|
404
|
+
while [ "$waited" -lt "$browser_wait_sec" ]; do
|
|
405
|
+
sleep 5
|
|
406
|
+
waited=$((waited + 5))
|
|
407
|
+
has_chrome=$(ps -A -o command= 2>/dev/null \
|
|
408
|
+
| awk -v p="user-data-dir=$profile_dir" '
|
|
409
|
+
index($0,p)>0 && index($0,"--type=")==0 && index($0,"awk ")==0 {found=1; exit}
|
|
410
|
+
END {print (found ? "yes" : "no")}' \
|
|
411
|
+
|| echo "no")
|
|
412
|
+
if [ "$has_chrome" = "no" ]; then
|
|
413
|
+
echo "[ensure_browser_healthy] ${platform}: Chrome exited cleanly after ${waited}s; safe to launch fresh."
|
|
414
|
+
break
|
|
415
|
+
fi
|
|
416
|
+
done
|
|
417
|
+
|
|
418
|
+
# Still here after the wait? Two cases:
|
|
419
|
+
# (a) Foreign MCP wrapper alive on this profile (user's IDE / Fazm Dev /
|
|
420
|
+
# Claude Code interactive session that has <platform>-agent.json in
|
|
421
|
+
# its MCP config) — DO NOT force-kill. Killing destroys the user's
|
|
422
|
+
# Chrome session mid-use. Log + exit cleanly so the next cron cycle
|
|
423
|
+
# retries when the user is done. Observed live 2026-05-13 14:15:14:
|
|
424
|
+
# run-twitter-cycle force-killed the user's Fazm Dev twitter-agent
|
|
425
|
+
# Chrome as "wedged orphan" and trashed an active IDE session.
|
|
426
|
+
# (b) No foreign MCP wrapper alive — true wedged orphan. Force-kill.
|
|
427
|
+
if [ "$has_chrome" = "yes" ]; then
|
|
428
|
+
if defer_if_foreign_browser_mcp_active "$platform"; then
|
|
429
|
+
echo "[ensure_browser_healthy] ${platform}: Chrome still alive after ${browser_wait_sec}s AND foreign MCP wrapper detected. NOT force-killing — exiting this run cleanly so the user's session is preserved."
|
|
430
|
+
exit 0
|
|
431
|
+
fi
|
|
432
|
+
echo "[ensure_browser_healthy] ${platform}: Chrome still alive after ${browser_wait_sec}s — no foreign MCP wrapper found, treating as wedged orphan and force-killing."
|
|
433
|
+
pkill -TERM -f "${platform}-agent.json" 2>/dev/null || true
|
|
434
|
+
pkill -TERM -f "user-data-dir=${profile_dir}" 2>/dev/null || true
|
|
435
|
+
sleep 1
|
|
436
|
+
pkill -KILL -f "${platform}-agent.json" 2>/dev/null || true
|
|
437
|
+
pkill -KILL -f "user-data-dir=${profile_dir}" 2>/dev/null || true
|
|
438
|
+
fi
|
|
439
|
+
fi
|
|
440
|
+
|
|
441
|
+
# 4. Clear singletons so launch_persistent_context can start fresh.
|
|
442
|
+
rm -f "$profile_dir/SingletonLock" \
|
|
443
|
+
"$profile_dir/SingletonCookie" \
|
|
444
|
+
"$profile_dir/SingletonSocket" 2>/dev/null || true
|
|
445
|
+
|
|
446
|
+
# 5. Normalize a "Crashed" exit_type left behind when the previous Chrome on
|
|
447
|
+
# this profile died ungracefully (the force-kill above, OOM/jetsam, force-
|
|
448
|
+
# quit, system sleep). Chrome reads profile.exit_type at startup; if it's
|
|
449
|
+
# "Crashed" it pops the "Something went wrong when opening your profile. Some
|
|
450
|
+
# features may be unavailable" modal. That dialog is GUI-only — it never
|
|
451
|
+
# reaches the launchd log — and blocks the headless pipeline until dismissed.
|
|
452
|
+
# Chrome is confirmed not running here (probe failed / we waited it out /
|
|
453
|
+
# force-killed), so editing Preferences is race-free. Mirrors the clean-exit
|
|
454
|
+
# flush Playwright/Selenium do internally but that a killed Chrome never runs.
|
|
455
|
+
local prefs_file="$profile_dir/Default/Preferences"
|
|
456
|
+
if [ -f "$prefs_file" ]; then
|
|
457
|
+
python3 - "$prefs_file" "$platform" <<'PYEOF' || true
|
|
458
|
+
import json, sys
|
|
459
|
+
prefs_path, platform = sys.argv[1], sys.argv[2]
|
|
460
|
+
try:
|
|
461
|
+
with open(prefs_path) as f:
|
|
462
|
+
data = json.load(f)
|
|
463
|
+
prof = data.setdefault("profile", {})
|
|
464
|
+
before = prof.get("exit_type")
|
|
465
|
+
changed = False
|
|
466
|
+
if prof.get("exit_type") != "Normal":
|
|
467
|
+
prof["exit_type"] = "Normal"; changed = True
|
|
468
|
+
if prof.get("exited_cleanly") is not True:
|
|
469
|
+
prof["exited_cleanly"] = True; changed = True
|
|
470
|
+
if changed:
|
|
471
|
+
with open(prefs_path, "w") as f:
|
|
472
|
+
json.dump(data, f)
|
|
473
|
+
print(f"[profile_health] {platform}: normalized exit_type={before!r} -> 'Normal'")
|
|
474
|
+
else:
|
|
475
|
+
print(f"[profile_health] {platform}: exit_type already clean")
|
|
476
|
+
except Exception as e:
|
|
477
|
+
print(f"[profile_health] {platform}: normalize skipped ({e})", file=sys.stderr)
|
|
478
|
+
PYEOF
|
|
479
|
+
fi
|
|
480
|
+
|
|
481
|
+
return 0
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
# Detect if a foreign playwright-mcp wrapper for the given platform agent has
|
|
485
|
+
# a LIVE Chrome under its process tree on the platform's profile directory.
|
|
486
|
+
# When found, the calling cron pipeline should exit cleanly without launching
|
|
487
|
+
# Chrome — racing the foreign MCP's profile crashes Chrome with "chromium
|
|
488
|
+
# profile locked by another process; waited 45s" and burns the run.
|
|
489
|
+
#
|
|
490
|
+
# Why "wrapper + Chrome" instead of "wrapper alone": playwright-mcp wrappers
|
|
491
|
+
# spawn at Claude Code session startup (regardless of whether mcp__<platform>-
|
|
492
|
+
# agent__* tools have ever been called) and stay alive for the lifetime of
|
|
493
|
+
# the IDE/CLI session — hours to days. Chrome only launches lazily on the
|
|
494
|
+
# first tool call. Treating a naked wrapper as a conflict permanently starves
|
|
495
|
+
# the cron whenever any developer keeps a Claude Code window open with the
|
|
496
|
+
# agent in their MCP config (the steady-state in this workflow). Observed
|
|
497
|
+
# live 2026-05-13 17:00 / 17:15 / 17:30 / 17:43 / 17:45: four consecutive
|
|
498
|
+
# reddit + one twitter cycle bailed with posted=0 cost=$0.00 elapsed=61s
|
|
499
|
+
# even though no Chrome was actually open on either profile.
|
|
500
|
+
#
|
|
501
|
+
# Observed live 2026-05-13 14:29 (still a real conflict): engage-twitter.sh
|
|
502
|
+
# fired on schedule while the user's Fazm Dev IDE held a twitter-agent MCP
|
|
503
|
+
# wrapper via codex-acp AND that wrapper had a live Chrome. Phase A
|
|
504
|
+
# Playwright SIGTRAPed after the 45s SingletonLock wait. THIS case is what
|
|
505
|
+
# the defer mechanism exists to prevent.
|
|
506
|
+
#
|
|
507
|
+
# Usage:
|
|
508
|
+
# defer_if_foreign_browser_mcp_active twitter || exit 0
|
|
509
|
+
# defer_if_foreign_browser_mcp_active reddit "$LOG_FILE" # optional log path
|
|
510
|
+
#
|
|
511
|
+
# Returns 0 (foreign conflict, caller should defer) or 1 (clean, caller proceeds).
|
|
512
|
+
defer_if_foreign_browser_mcp_active() {
|
|
513
|
+
local platform="$1"
|
|
514
|
+
local log_file="${2:-}"
|
|
515
|
+
local our_pid=$$
|
|
516
|
+
local cfg_pattern="${platform}-agent.json"
|
|
517
|
+
local profile_dir="$HOME/.claude/browser-profiles/$platform"
|
|
518
|
+
|
|
519
|
+
# Step 1. Find every playwright-mcp wrapper or node-playwright-mcp child
|
|
520
|
+
# whose command line references this platform's agent config file. Captures
|
|
521
|
+
# both the npm-exec wrapper layer and the underlying node process so we
|
|
522
|
+
# don't miss either tier of the tree.
|
|
523
|
+
local wrappers
|
|
524
|
+
wrappers=$(ps -A -o pid=,command= 2>/dev/null | awk -v cfg="$cfg_pattern" '
|
|
525
|
+
index($0,cfg)==0 { next }
|
|
526
|
+
/npm exec @playwright\/mcp/ || /playwright-mcp/ { print $1 }
|
|
527
|
+
' || true)
|
|
528
|
+
|
|
529
|
+
[ -z "$wrappers" ] && return 1
|
|
530
|
+
|
|
531
|
+
# Step 2. Partition wrappers into ours (descendants of $$) vs foreign by
|
|
532
|
+
# walking each wrapper's parent chain.
|
|
533
|
+
local wpid cur depth foreign_wrappers=""
|
|
534
|
+
for wpid in $wrappers; do
|
|
535
|
+
cur=$wpid
|
|
536
|
+
depth=0
|
|
537
|
+
local is_ours=false
|
|
538
|
+
# Cap at 20 hops to avoid pathological ancestry walks.
|
|
539
|
+
while [ -n "$cur" ] && [ "$cur" != "1" ] && [ "$depth" -lt 20 ]; do
|
|
540
|
+
if [ "$cur" = "$our_pid" ]; then
|
|
541
|
+
is_ours=true
|
|
542
|
+
break
|
|
543
|
+
fi
|
|
544
|
+
cur=$(ps -p "$cur" -o ppid= 2>/dev/null | tr -d ' ')
|
|
545
|
+
depth=$((depth+1))
|
|
546
|
+
done
|
|
547
|
+
if ! $is_ours; then
|
|
548
|
+
foreign_wrappers="$foreign_wrappers $wpid"
|
|
549
|
+
fi
|
|
550
|
+
done
|
|
551
|
+
foreign_wrappers="${foreign_wrappers# }"
|
|
552
|
+
|
|
553
|
+
[ -z "$foreign_wrappers" ] && return 1
|
|
554
|
+
|
|
555
|
+
# Step 3. Require that at least one foreign wrapper has a live Chrome
|
|
556
|
+
# child on this platform's profile. Walk every Chrome process whose
|
|
557
|
+
# cmdline references user-data-dir=$profile_dir (this catches both the
|
|
558
|
+
# top-level Chrome and its --type= renderer/utility subprocesses, all of
|
|
559
|
+
# which inherit the cmdline) and check whether any of their ancestors is
|
|
560
|
+
# one of the foreign wrappers. Bottom-up because pgrep -P is not portable
|
|
561
|
+
# to all macOS variants and we already do ancestor walks above.
|
|
562
|
+
local chrome_pids cpid
|
|
563
|
+
chrome_pids=$(ps -A -o pid=,command= 2>/dev/null | awk -v p="user-data-dir=$profile_dir" '
|
|
564
|
+
index($0,p)>0 && index($0,"awk ")==0 { print $1 }' || true)
|
|
565
|
+
|
|
566
|
+
local foreign_pid=""
|
|
567
|
+
if [ -n "$chrome_pids" ]; then
|
|
568
|
+
for cpid in $chrome_pids; do
|
|
569
|
+
cur=$cpid
|
|
570
|
+
depth=0
|
|
571
|
+
while [ -n "$cur" ] && [ "$cur" != "1" ] && [ "$depth" -lt 20 ]; do
|
|
572
|
+
for wpid in $foreign_wrappers; do
|
|
573
|
+
if [ "$cur" = "$wpid" ]; then
|
|
574
|
+
foreign_pid=$wpid
|
|
575
|
+
break
|
|
576
|
+
fi
|
|
577
|
+
done
|
|
578
|
+
[ -n "$foreign_pid" ] && break
|
|
579
|
+
cur=$(ps -p "$cur" -o ppid= 2>/dev/null | tr -d ' ')
|
|
580
|
+
depth=$((depth+1))
|
|
581
|
+
done
|
|
582
|
+
[ -n "$foreign_pid" ] && break
|
|
583
|
+
done
|
|
584
|
+
fi
|
|
585
|
+
|
|
586
|
+
if [ -z "$foreign_pid" ]; then
|
|
587
|
+
# Foreign wrapper(s) exist but none have a live Chrome on this profile.
|
|
588
|
+
# No collision risk — proceed. This is the steady state when the user
|
|
589
|
+
# has Claude Code open but hasn't invoked an mcp__<platform>-agent__*
|
|
590
|
+
# tool this session (or invoked one and Chrome already closed).
|
|
591
|
+
local first_foreign="${foreign_wrappers%% *}"
|
|
592
|
+
echo "[defer_foreign_mcp] ${platform}: foreign wrapper(s) detected (${foreign_wrappers}) but NO live Chrome on profile ${profile_dir}; proceeding." >&2
|
|
593
|
+
if [ -n "$log_file" ] && [ -w "$(dirname "$log_file")" ]; then
|
|
594
|
+
echo "[defer_foreign_mcp] ${platform}: foreign wrapper(s) detected (${foreign_wrappers}) but NO live Chrome on profile ${profile_dir}; proceeding." >> "$log_file"
|
|
595
|
+
fi
|
|
596
|
+
return 1
|
|
597
|
+
fi
|
|
598
|
+
|
|
599
|
+
# Step 4. Identify the root process owning the foreign wrapper so the log
|
|
600
|
+
# is useful (tells the user which IDE / cron session is holding Chrome).
|
|
601
|
+
local foreign_root=$foreign_pid
|
|
602
|
+
cur=$foreign_pid
|
|
603
|
+
while [ -n "$cur" ] && [ "$cur" != "1" ]; do
|
|
604
|
+
foreign_root=$cur
|
|
605
|
+
cur=$(ps -p "$cur" -o ppid= 2>/dev/null | tr -d ' ')
|
|
606
|
+
done
|
|
607
|
+
local foreign_root_cmd
|
|
608
|
+
foreign_root_cmd=$(ps -p "$foreign_root" -o command= 2>/dev/null | head -c 120)
|
|
609
|
+
|
|
610
|
+
local msg="[defer_foreign_mcp] ${platform}: foreign ${platform}-agent MCP wrapper PID ${foreign_pid} has a live Chrome on profile ${profile_dir} (root PID ${foreign_root}: ${foreign_root_cmd}). Skipping this run to avoid Chrome profile collision."
|
|
611
|
+
echo "$msg" >&2
|
|
612
|
+
if [ -n "$log_file" ] && [ -w "$(dirname "$log_file")" ]; then
|
|
613
|
+
echo "$msg" >> "$log_file"
|
|
614
|
+
fi
|
|
615
|
+
return 0
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
# Explicit early release. Use this when a long-running script only needs the
|
|
619
|
+
# browser for part of its run (e.g. run-twitter-cycle.sh holds the lock for
|
|
620
|
+
# Phase 1 scrape, releases between Phase 1 and Phase 2b posting, then re-acquires
|
|
621
|
+
# before Phase 2b). Without this, sibling pipelines waiting on the same profile
|
|
622
|
+
# lock block for the full cycle even when the holder is not using the browser.
|
|
623
|
+
release_lock() {
|
|
624
|
+
local name="$1"
|
|
625
|
+
local lock_dir="/tmp/social-autoposter-${name}.lock"
|
|
626
|
+
# Ownership guard: only delete the dir if WE still hold it. If a peer re-acquired
|
|
627
|
+
# it after our mid-cycle release (or it was already cleared), do NOT rm — that is
|
|
628
|
+
# exactly the defect that wiped live peers' locks. The stack rebuild below still
|
|
629
|
+
# runs so we stop tracking it either way.
|
|
630
|
+
if _sa_we_own_lock "$lock_dir"; then
|
|
631
|
+
_sa_lock_event release "$name" "$(_sa_lock_owner_tag "$lock_dir")"
|
|
632
|
+
rm -rf "$lock_dir"
|
|
633
|
+
echo "[lock] released $name pid=$$ at $(date +%H:%M:%S)" >&2
|
|
634
|
+
else
|
|
635
|
+
_sa_lock_event release_skipped "$name" "$(_sa_lock_owner_tag "$lock_dir")"
|
|
636
|
+
echo "[lock] release SKIPPED $name pid=$$ (not owner) at $(date +%H:%M:%S)" >&2
|
|
637
|
+
fi
|
|
638
|
+
# Rebuild the lock stack without this entry so the EXIT trap doesn't try to
|
|
639
|
+
# rm it again (harmless, but keeps the stack honest if release_lock is paired
|
|
640
|
+
# with a later re-acquire of the same name).
|
|
641
|
+
local new_stack=()
|
|
642
|
+
local d
|
|
643
|
+
for d in ${_SA_LOCK_DIRS[@]+"${_SA_LOCK_DIRS[@]}"}; do
|
|
644
|
+
[ "$d" != "$lock_dir" ] && new_stack+=("$d")
|
|
645
|
+
done
|
|
646
|
+
_SA_LOCK_DIRS=(${new_stack[@]+"${new_stack[@]}"})
|
|
647
|
+
}
|