social-autoposter 1.6.46 → 1.6.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/mcp/dist/index.js CHANGED
@@ -327,7 +327,18 @@ async function postApproved(batchId, plan) {
327
327
  // twitter_browser.py's reply handler reads this env (inherited through
328
328
  // twitter_post_plan.py's subprocess). The cron pipeline doesn't set it, so the
329
329
  // A/B disclosure experiment keeps running on autopilot/cron and on Reddit.
330
- const res = await runPython("scripts/twitter_post_plan.py", ["--plan", planPath(approvedBatch)], { timeoutMs: 900_000, env: { SAPS_SKIP_CAMPAIGN_SUFFIX: "1" } });
330
+ const res = await runPython("scripts/twitter_post_plan.py", ["--plan", planPath(approvedBatch)], {
331
+ timeoutMs: 900_000,
332
+ env: {
333
+ SAPS_SKIP_CAMPAIGN_SUFFIX: "1",
334
+ // The poster attaches to the twitter-harness Chrome over CDP. The cron
335
+ // pipeline exports this from skill/lib/twitter-backend.sh; the MCP path
336
+ // must set it explicitly or twitter_browser.py fails with "No twitter-
337
+ // harness Chrome reachable". Honor an inherited value (AppMaker / VM
338
+ // BYO-Chrome), else default to the local harness on port 9555.
339
+ TWITTER_CDP_URL: process.env.TWITTER_CDP_URL || "http://127.0.0.1:9555",
340
+ },
341
+ });
331
342
  let summary = res.stdout.trim();
332
343
  try {
333
344
  const lines = res.stdout.trim().split("\n");
@@ -1 +1 @@
1
- {"version":"1.6.44"}
1
+ {"version":"1.6.47"}
@@ -44,10 +44,20 @@ from mcp.server.fastmcp import FastMCP
44
44
  # --- Config ---
45
45
 
46
46
  PORT = int(os.environ.get("BH_PORT", "9555"))
47
- PROFILE_DIR = Path.home() / ".claude" / "browser-profiles" / "browser-harness"
48
- PID_FILE = Path.home() / ".claude" / "browser-profiles" / "browser-harness.chrome.pid"
49
- LOG_FILE = Path.home() / ".claude" / "browser-profiles" / "browser-harness.chrome.log"
50
- MCP_LOG_FILE = Path.home() / ".claude" / "browser-profiles" / "browser-harness.mcp.log"
47
+
48
+ # Profile name can be overridden via BH_PROFILE_NAME env so multiple harness
49
+ # instances (twitter-harness on 9555, linkedin-harness on 9556, reddit-harness
50
+ # on 9557) can run side by side on SEPARATE persistent profiles + PID files
51
+ # without stomping each other's cookies/sessions. If this is hardcoded to
52
+ # "browser-harness", every non-default-port instance lands on the Twitter
53
+ # profile and shares one PID_FILE, so the per-instance ensure_chrome() calls
54
+ # SIGKILL each other's Chrome (regression 2026-06-02, fixed by restoring this).
55
+ # Default "browser-harness" keeps the existing Twitter setup unchanged.
56
+ PROFILE_NAME = os.environ.get("BH_PROFILE_NAME", "browser-harness")
57
+ PROFILE_DIR = Path.home() / ".claude" / "browser-profiles" / PROFILE_NAME
58
+ PID_FILE = Path.home() / ".claude" / "browser-profiles" / f"{PROFILE_NAME}.chrome.pid"
59
+ LOG_FILE = Path.home() / ".claude" / "browser-profiles" / f"{PROFILE_NAME}.chrome.log"
60
+ MCP_LOG_FILE = Path.home() / ".claude" / "browser-profiles" / f"{PROFILE_NAME}.mcp.log"
51
61
 
52
62
 
53
63
  def _detect_chrome_bin() -> str:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "social-autoposter",
3
- "version": "1.6.46",
3
+ "version": "1.6.48",
4
4
  "description": "Automated social posting pipeline for Reddit, X/Twitter, LinkedIn, and Moltbook. Install as a Claude Code agent skill.",
5
5
  "bin": {
6
6
  "social-autoposter": "bin/cli.js"
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python3
2
+ """Scratch driver: read JSON list of {post_id, session, text} from argv[1] and
3
+ run link_edit_helper mark-edited + dm_short_links backfill-post for each.
4
+ Gitignored scratch helper for the reddit link-edit run."""
5
+ import json, subprocess, sys, os
6
+
7
+ HERE = os.path.dirname(os.path.abspath(__file__))
8
+ items = json.load(open(sys.argv[1]))
9
+ for it in items:
10
+ pid = str(it["post_id"]); sess = it["session"]; text = it["text"]
11
+ src = it.get("source", "plain_url_ab_skip")
12
+ r1 = subprocess.run([sys.executable, os.path.join(HERE, "link_edit_helper.py"),
13
+ "mark-edited", "--post-id", pid, "--content", text, "--source", src],
14
+ capture_output=True, text=True)
15
+ r2 = subprocess.run([sys.executable, os.path.join(HERE, "dm_short_links.py"),
16
+ "backfill-post", "--minted-session", sess, "--post-id", pid],
17
+ capture_output=True, text=True)
18
+ bf = (r2.stdout or "").strip().splitlines()[-1:] or [""]
19
+ print(f"post {pid}: mark_edited_rc={r1.returncode} backfill={bf[0]}"
20
+ + (f" ERR1={r1.stderr.strip()}" if r1.returncode else ""))
@@ -36,8 +36,21 @@ def main() -> int:
36
36
  if len(pages) <= 1:
37
37
  print(f"[cleanup_harness_tabs] {len(pages)} page tab(s), no cleanup needed")
38
38
  return 0
39
+ # Keep a REAL (http/https) tab when one exists, not blindly pages[0]. The
40
+ # /json order is roughly most-recently-active first, so a freshly-spawned
41
+ # about:blank can sit at index 0 and the old code would keep the blank and
42
+ # close the live x.com tab the harness daemon is attached to. Closing the
43
+ # daemon's tab forces it to re-attach and re-spawn another about:blank, which
44
+ # is exactly the orphan-tab churn this script is meant to clean up. Falling
45
+ # back to pages[0] preserves the prior behavior when every tab is blank.
46
+ def _is_real(t):
47
+ return (t.get("url") or "").startswith(("http://", "https://"))
48
+
49
+ keep = next((t for t in pages if _is_real(t)), pages[0])
39
50
  closed = 0
40
- for t in pages[1:]:
51
+ for t in pages:
52
+ if t is keep:
53
+ continue
41
54
  tid = t.get("id")
42
55
  if not tid:
43
56
  continue
@@ -46,7 +59,8 @@ def main() -> int:
46
59
  closed += 1
47
60
  except Exception:
48
61
  pass
49
- print(f"[cleanup_harness_tabs] closed {closed}/{len(pages) - 1} extra page tabs (kept 1)")
62
+ kept_kind = "1 real" if _is_real(keep) else "1"
63
+ print(f"[cleanup_harness_tabs] closed {closed}/{len(pages) - 1} extra page tabs (kept {kept_kind})")
50
64
  return 0
51
65
 
52
66
 
@@ -54,14 +54,34 @@ from datetime import datetime, timezone
54
54
  from email.mime.text import MIMEText
55
55
 
56
56
 
57
- STATE_DIR = os.path.expanduser("~/.claude/social-autoposter")
58
- STATE_FILE = os.path.join(STATE_DIR, "linkedin.killswitch")
59
- TRAIL_FILE = os.path.join(STATE_DIR, "linkedin.killswitch.trail.jsonl")
57
+ # State paths are env-overridable so the auto-recovery job can be tested
58
+ # against a throwaway killswitch file without touching the live one.
59
+ STATE_DIR = os.path.expanduser(
60
+ os.environ.get("LINKEDIN_KILLSWITCH_DIR", "~/.claude/social-autoposter")
61
+ )
62
+ STATE_FILE = os.path.expanduser(
63
+ os.environ.get("LINKEDIN_KILLSWITCH_FILE", os.path.join(STATE_DIR, "linkedin.killswitch"))
64
+ )
65
+ TRAIL_FILE = os.path.expanduser(
66
+ os.environ.get(
67
+ "LINKEDIN_KILLSWITCH_TRAIL", os.path.join(STATE_DIR, "linkedin.killswitch.trail.jsonl")
68
+ )
69
+ )
60
70
 
61
71
  GMAIL_TOKEN_PATH = os.path.expanduser("~/gmail-api/token_i_at_m13v.com.json")
62
72
  GMAIL_SCOPES = ["https://mail.google.com/"]
63
73
  NOTIFICATION_EMAIL = os.environ.get("NOTIFICATION_EMAIL", "i@m13v.com")
64
74
 
75
+ # Auto-recovery (2026-06-03): after the killswitch has been active this long,
76
+ # an hourly launchd job (skill/linkedin-recovery.sh) runs a gentle read-only
77
+ # probe of LinkedIn. If the session is healthy again, it clears the flag, which
78
+ # resumes every LinkedIn pipeline on its next fire (they all gate on this file).
79
+ # The wait protects the account: per the anti-bot rule we let the session sit
80
+ # idle ~24h after a 999/authwall before re-touching it, rather than hammering
81
+ # the login wall on every cron tick. Override for testing.
82
+ RECOVERY_MIN_AGE_HOURS = float(os.environ.get("LINKEDIN_RECOVERY_MIN_AGE_HOURS", "24"))
83
+ LINKEDIN_CDP_URL = os.environ.get("LINKEDIN_CDP_URL", "http://127.0.0.1:9556")
84
+
65
85
  VALID_SIGNALS = {
66
86
  "http_999",
67
87
  "authwall_redirect",
@@ -97,6 +117,25 @@ def read():
97
117
  return {"signal": "unknown", "detail": "state file unreadable"}
98
118
 
99
119
 
120
+ def _parse_ts(ts):
121
+ """Parse an ISO Z timestamp like 2026-06-03T07:23:10Z. None on failure."""
122
+ try:
123
+ return datetime.strptime(ts, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
124
+ except Exception:
125
+ return None
126
+
127
+
128
+ def age_seconds():
129
+ """Seconds since the killswitch engaged, or None if inactive/unparseable."""
130
+ p = read()
131
+ if not p:
132
+ return None
133
+ dt = _parse_ts(p.get("ts", ""))
134
+ if dt is None:
135
+ return None
136
+ return (datetime.now(timezone.utc) - dt).total_seconds()
137
+
138
+
100
139
  def _append_trail(payload):
101
140
  _ensure_dir()
102
141
  try:
@@ -219,6 +258,135 @@ def engage(signal, detail="", run_log_path="", extra=None, send_email=True):
219
258
  return on_disk
220
259
 
221
260
 
261
+ _LOGIN_MARKERS = ("/login", "/checkpoint", "/uas/login", "linkedin.com/authwall")
262
+
263
+
264
+ def _probe_linkedin_health(cdp_url):
265
+ """Gentle, read-only health probe of the LinkedIn session.
266
+
267
+ Attaches (CDP) to the already-running linkedin-harness Chrome and does the
268
+ minimal nav set the anti-bot carve-out allows: ONE nav to /feed/ (confirms
269
+ we are logged in) and ONE nav to the exact /in/me/recent-activity/comments/
270
+ endpoint that trips the killswitch (confirms it no longer bounces to the
271
+ authwall). No Voyager calls, no scroll loops, no permalink fan-out, no
272
+ clicks/typing, no programmatic login. Reuses an existing tab and never
273
+ closes the shared context.
274
+
275
+ Returns (healthy: bool, detail: str). Never raises.
276
+ """
277
+ try:
278
+ from playwright.sync_api import sync_playwright
279
+ except Exception as e:
280
+ return False, "playwright import failed: {}".format(e)
281
+
282
+ try:
283
+ with sync_playwright() as p:
284
+ try:
285
+ browser = p.chromium.connect_over_cdp(cdp_url, timeout=8000)
286
+ except Exception as e:
287
+ return False, "cdp attach failed ({}): {}".format(cdp_url, e)
288
+ contexts = browser.contexts
289
+ if not contexts:
290
+ return False, "cdp attach: zero contexts"
291
+ ctx = contexts[0]
292
+
293
+ page = None
294
+ reused = False
295
+ for pg in ctx.pages:
296
+ u = pg.url or ""
297
+ if "linkedin.com" in u and "login" not in u and "checkpoint" not in u:
298
+ page, reused = pg, True
299
+ break
300
+ if page is None and ctx.pages:
301
+ page, reused = ctx.pages[0], True
302
+ if page is None:
303
+ page = ctx.new_page()
304
+
305
+ try:
306
+ # Nav 1: /feed/ — are we still logged in?
307
+ page.goto(
308
+ "https://www.linkedin.com/feed/",
309
+ wait_until="domcontentloaded",
310
+ timeout=30000,
311
+ )
312
+ page.wait_for_timeout(2000)
313
+ u1 = page.url or ""
314
+ if any(m in u1 for m in _LOGIN_MARKERS):
315
+ return False, "feed redirected to auth: {}".format(u1)
316
+
317
+ # Nav 2: the exact endpoint that engaged the killswitch.
318
+ page.goto(
319
+ "https://www.linkedin.com/in/me/recent-activity/comments/",
320
+ wait_until="domcontentloaded",
321
+ timeout=30000,
322
+ )
323
+ page.wait_for_timeout(2000)
324
+ u2 = page.url or ""
325
+ if any(m in u2 for m in _LOGIN_MARKERS):
326
+ return False, "activity endpoint redirected to auth: {}".format(u2)
327
+
328
+ title = ""
329
+ try:
330
+ title = page.title() or ""
331
+ except Exception:
332
+ pass
333
+ return True, "feed+activity render (title={!r}, url={})".format(title, u2)
334
+ finally:
335
+ if page is not None and not reused:
336
+ try:
337
+ page.close()
338
+ except Exception:
339
+ pass
340
+ except Exception as e:
341
+ return False, "probe exception: {}: {}".format(type(e).__name__, e)
342
+
343
+
344
+ def _send_recovery_email(detail, age_sec):
345
+ """Notify that the killswitch auto-cleared after a healthy probe."""
346
+ try:
347
+ from google.auth.transport.requests import Request
348
+ from google.oauth2.credentials import Credentials
349
+ from googleapiclient.discovery import build
350
+
351
+ if not os.path.isfile(GMAIL_TOKEN_PATH):
352
+ return False, "gmail token missing"
353
+
354
+ creds = Credentials.from_authorized_user_file(GMAIL_TOKEN_PATH, GMAIL_SCOPES)
355
+ if creds.expired and creds.refresh_token:
356
+ creds.refresh(Request())
357
+ with open(GMAIL_TOKEN_PATH, "w") as f:
358
+ f.write(creds.to_json())
359
+
360
+ service = build("gmail", "v1", credentials=creds, cache_discovery=False)
361
+ age_h = round(age_sec / 3600.0, 1) if age_sec else "?"
362
+ subject = "[LI KILL] RECOVERED auto-probe healthy"
363
+ body_lines = [
364
+ "LinkedIn killswitch auto-cleared.",
365
+ "",
366
+ "The hourly recovery probe found the session healthy after the",
367
+ "killswitch had been active for " + str(age_h) + "h, so it cleared",
368
+ "the flag. Every LinkedIn pipeline resumes on its next launchd fire.",
369
+ "",
370
+ "Probe detail: " + str(detail),
371
+ "",
372
+ "If LinkedIn was NOT actually healthy, re-engage manually:",
373
+ " python3 ~/social-autoposter/scripts/linkedin_killswitch.py \\",
374
+ " engage --signal manual --detail 'auto-recovery false positive'",
375
+ "",
376
+ "State file: " + STATE_FILE,
377
+ "Trail file: " + TRAIL_FILE,
378
+ ]
379
+ body = _scrub_dashes("\n".join(body_lines))
380
+ msg = MIMEText(body, "plain", "utf-8")
381
+ msg["to"] = NOTIFICATION_EMAIL
382
+ msg["subject"] = _scrub_dashes(subject)
383
+ raw = base64.urlsafe_b64encode(msg.as_bytes()).decode("utf-8")
384
+ service.users().messages().send(userId="me", body={"raw": raw}).execute()
385
+ return True, "sent"
386
+ except Exception as exc:
387
+ return False, "send failed: " + str(exc)
388
+
389
+
222
390
  def clear():
223
391
  """Human ack: remove the flag. Trail row records who cleared it."""
224
392
  if not is_active():
@@ -276,6 +444,78 @@ def _cmd_clear(args):
276
444
  sys.exit(0)
277
445
 
278
446
 
447
+ def _cmd_recover_check(args):
448
+ """Gate for the hourly recovery job: exit 0 only if the killswitch is
449
+ active AND has been so for >= RECOVERY_MIN_AGE_HOURS. Lets the shell
450
+ wrapper decide whether to even bring up Chrome (no-op most hours)."""
451
+ if not is_active():
452
+ print("recover-check: killswitch not active, nothing to recover", file=sys.stderr)
453
+ sys.exit(1)
454
+ age = age_seconds()
455
+ min_age = RECOVERY_MIN_AGE_HOURS * 3600
456
+ if age is None:
457
+ print(
458
+ "recover-check: active but ts unparseable, manual clear required",
459
+ file=sys.stderr,
460
+ )
461
+ sys.exit(1)
462
+ if age < min_age:
463
+ print(
464
+ "recover-check: active but only {:.1f}h old (< {}h), waiting".format(
465
+ age / 3600.0, RECOVERY_MIN_AGE_HOURS
466
+ ),
467
+ file=sys.stderr,
468
+ )
469
+ sys.exit(1)
470
+ print(
471
+ "recover-check: eligible (active {:.1f}h >= {}h)".format(
472
+ age / 3600.0, RECOVERY_MIN_AGE_HOURS
473
+ ),
474
+ file=sys.stderr,
475
+ )
476
+ sys.exit(0)
477
+
478
+
479
+ def _cmd_recover(args):
480
+ """Run the gentle probe (Chrome must already be up); clear + email on health.
481
+
482
+ Re-checks the age gate itself (unless --force) so it is safe to call
483
+ directly, not just behind recover-check."""
484
+ if not is_active():
485
+ print(json.dumps({"recovered": False, "reason": "not_active"}))
486
+ sys.exit(0)
487
+ age = age_seconds()
488
+ min_age = RECOVERY_MIN_AGE_HOURS * 3600
489
+ if not args.force and (age is None or age < min_age):
490
+ print(json.dumps({
491
+ "recovered": False,
492
+ "reason": "too_young",
493
+ "age_hours": (round(age / 3600.0, 2) if age else None),
494
+ }))
495
+ sys.exit(0)
496
+
497
+ cdp_url = args.cdp_url or LINKEDIN_CDP_URL
498
+ healthy, detail = _probe_linkedin_health(cdp_url)
499
+ _append_trail({
500
+ "event": "recover_probe",
501
+ "ts": _now_iso(),
502
+ "healthy": healthy,
503
+ "detail": detail,
504
+ "age_hours": (round(age / 3600.0, 2) if age else None),
505
+ })
506
+ if not healthy:
507
+ print(json.dumps({"recovered": False, "reason": "probe_unhealthy", "detail": detail}))
508
+ sys.exit(0)
509
+
510
+ clear()
511
+ _append_trail({"event": "recover_clear", "ts": _now_iso(), "detail": detail})
512
+ if not args.no_email:
513
+ ok, msg = _send_recovery_email(detail, age)
514
+ _append_trail({"event": "recover_email", "ok": ok, "msg": msg})
515
+ print(json.dumps({"recovered": True, "detail": detail}))
516
+ sys.exit(0)
517
+
518
+
279
519
  def main():
280
520
  parser = argparse.ArgumentParser(description="LinkedIn pipeline killswitch")
281
521
  sub = parser.add_subparsers(dest="cmd", required=True)
@@ -292,12 +532,27 @@ def main():
292
532
 
293
533
  sub.add_parser("clear", help="clear the killswitch (human ack)")
294
534
 
535
+ sub.add_parser(
536
+ "recover-check",
537
+ help="exit 0 if active AND >= RECOVERY_MIN_AGE_HOURS old (else 1)",
538
+ )
539
+
540
+ r = sub.add_parser(
541
+ "recover",
542
+ help="gentle probe; clear + email on health (Chrome must be up)",
543
+ )
544
+ r.add_argument("--cdp-url", default="", help="harness CDP URL (default $LINKEDIN_CDP_URL)")
545
+ r.add_argument("--no-email", action="store_true", help="skip recovery email")
546
+ r.add_argument("--force", action="store_true", help="skip the age gate")
547
+
295
548
  args = parser.parse_args()
296
549
  {
297
550
  "check": _cmd_check,
298
551
  "status": _cmd_status,
299
552
  "engage": _cmd_engage,
300
553
  "clear": _cmd_clear,
554
+ "recover-check": _cmd_recover_check,
555
+ "recover": _cmd_recover,
301
556
  }[args.cmd](args)
302
557
 
303
558
 
@@ -243,14 +243,35 @@ def main():
243
243
  continue
244
244
  bank = build_bank(name, args.min_likes, args.min_clicks, args.limit)
245
245
  proven_size = len(bank)
246
+ invent_added = 0
246
247
  if not args.no_invented:
247
248
  invented = fetch_invented_queries(name, args.invent_min_supply)
248
249
  bank = merge_invented(bank, invented)
250
+ invent_added = len(bank) - proven_size
251
+ # Cold-start bootstrap: a freshly-configured project has no proven
252
+ # queries (no post history) AND no invented ones (invent_topics.py
253
+ # hasn't run for it yet), so the bank is empty -> the cycle scans
254
+ # nothing and returns 0 drafts on every early cycle (the dead-on-
255
+ # arrival problem). Fall back to the project's seeded search_topic AS
256
+ # the query so there's something to scrape on day one. Proven +
257
+ # invented queries supersede this automatically as they accumulate.
258
+ # (cold-start fallback, 2026-06-03)
259
+ cold_start = False
260
+ if not bank:
261
+ topic = ((p.get("search_topic") if isinstance(p, dict) else "") or "").strip()
262
+ if topic:
263
+ bank = [{
264
+ "project": name,
265
+ "query": f"{topic} -filter:replies",
266
+ "search_topic": topic,
267
+ "likes": 0, "clicks": 0, "posts": 0,
268
+ }]
269
+ cold_start = True
249
270
  combined.extend(bank)
250
- invent_added = len(bank) - proven_size
251
271
  print(f"qualified_query_bank: project={name!r} -> {proven_size} proven "
252
- f"+ {invent_added} invented = {len(bank)} queries",
253
- file=sys.stderr)
272
+ f"+ {invent_added} invented"
273
+ + (" + 1 cold-start(topic)" if cold_start else "")
274
+ + f" = {len(bank)} queries", file=sys.stderr)
254
275
  json.dump(combined, sys.stdout)
255
276
  print()
256
277
  print(f"qualified_query_bank: combined bank = {len(combined)} queries across "
@@ -276,6 +276,22 @@ ensure_linkedin_browser_for_backend() {
276
276
  _extra+=(--window-size="${BH_LINKEDIN_WINDOW_SIZE:-1024,1013}")
277
277
  ;;
278
278
  esac
279
+ # Self-heal (2026-06-03): reap any stale Chrome holding THIS profile dir
280
+ # but not answering CDP on our port, else the relaunch hands off via the
281
+ # SingletonLock and loops "failed to start within 12s". Exact-dir match
282
+ # (trailing space) so this never touches the twitter browser-harness
283
+ # profile. See twitter-backend.sh for the regression that motivated this.
284
+ local _prof_dir="$HOME/.claude/browser-profiles/browser-harness-linkedin"
285
+ local _stale_pids
286
+ _stale_pids=$(pgrep -f -- "--user-data-dir=$_prof_dir " 2>/dev/null || true)
287
+ if [ -n "$_stale_pids" ] && ! curl -sf --max-time 2 -o /dev/null http://127.0.0.1:9556/json/version 2>/dev/null; then
288
+ echo "[$(date +%H:%M:%S)] CDP down but Chrome still holds $_prof_dir (pids: $(echo $_stale_pids | tr '\n' ' ')); reaping stale profile owner before relaunch" >&2
289
+ kill $_stale_pids 2>/dev/null || true
290
+ sleep 2
291
+ _stale_pids=$(pgrep -f -- "--user-data-dir=$_prof_dir " 2>/dev/null || true)
292
+ [ -n "$_stale_pids" ] && { kill -9 $_stale_pids 2>/dev/null || true; sleep 1; }
293
+ rm -f "$_prof_dir/SingletonLock" "$_prof_dir/SingletonSocket" "$_prof_dir/SingletonCookie" 2>/dev/null || true
294
+ fi
279
295
  "$_chrome_bin" \
280
296
  --remote-debugging-port=9556 \
281
297
  --user-data-dir="$HOME/.claude/browser-profiles/browser-harness-linkedin" \
@@ -187,6 +187,22 @@ ensure_reddit_browser_for_backend() {
187
187
  _extra+=(--window-size="${BH_REDDIT_WINDOW_SIZE:-911,1016}")
188
188
  ;;
189
189
  esac
190
+ # Self-heal (2026-06-03): reap any stale Chrome holding THIS profile dir
191
+ # but not answering CDP on our port, else the relaunch hands off via the
192
+ # SingletonLock and loops "failed to start within 12s". Exact-dir match
193
+ # (trailing space) keeps this scoped to reddit-harness only. See
194
+ # twitter-backend.sh for the regression that motivated this.
195
+ local _prof_dir="$HOME/.claude/browser-profiles/reddit-harness"
196
+ local _stale_pids
197
+ _stale_pids=$(pgrep -f -- "--user-data-dir=$_prof_dir " 2>/dev/null || true)
198
+ if [ -n "$_stale_pids" ] && ! curl -sf --max-time 2 -o /dev/null http://127.0.0.1:9557/json/version 2>/dev/null; then
199
+ echo "[$(date +%H:%M:%S)] CDP down but Chrome still holds $_prof_dir (pids: $(echo $_stale_pids | tr '\n' ' ')); reaping stale profile owner before relaunch" >&2
200
+ kill $_stale_pids 2>/dev/null || true
201
+ sleep 2
202
+ _stale_pids=$(pgrep -f -- "--user-data-dir=$_prof_dir " 2>/dev/null || true)
203
+ [ -n "$_stale_pids" ] && { kill -9 $_stale_pids 2>/dev/null || true; sleep 1; }
204
+ rm -f "$_prof_dir/SingletonLock" "$_prof_dir/SingletonSocket" "$_prof_dir/SingletonCookie" 2>/dev/null || true
205
+ fi
190
206
  "$_chrome_bin" \
191
207
  --remote-debugging-port=9557 \
192
208
  --user-data-dir="$HOME/.claude/browser-profiles/reddit-harness" \
@@ -205,6 +205,26 @@ ensure_twitter_browser_for_backend() {
205
205
  # re-injection needed. Matches the flags the Playwright browser agents
206
206
  # already use. (Root-cause persistence fix, 2026-06-02; the cookie
207
207
  # mirror + restore_twitter_session.py remain as the safety net.)
208
+ # Self-heal (2026-06-03): if a Chrome already holds THIS profile dir but
209
+ # is not answering CDP on our port, a fresh launch hands off to it via
210
+ # Chrome's SingletonLock and exits without ever binding our port — the
211
+ # old "failed to start within 12s" loop (8h Twitter outage overnight
212
+ # 2026-06-02/03, root cause: a server.py regression that dropped
213
+ # BH_PROFILE_NAME and collapsed the linkedin/twitter harness profiles
214
+ # onto this one, stranding an orphan on 9556). Reap the stale owner of
215
+ # our EXACT profile dir (trailing space in the pattern so browser-harness
216
+ # never matches browser-harness-linkedin) before relaunching.
217
+ local _prof_dir="$HOME/.claude/browser-profiles/browser-harness"
218
+ local _stale_pids
219
+ _stale_pids=$(pgrep -f -- "--user-data-dir=$_prof_dir " 2>/dev/null || true)
220
+ if [ -n "$_stale_pids" ] && ! curl -sf --max-time 2 -o /dev/null http://127.0.0.1:9555/json/version 2>/dev/null; then
221
+ echo "[$(date +%H:%M:%S)] CDP down but Chrome still holds $_prof_dir (pids: $(echo $_stale_pids | tr '\n' ' ')); reaping stale profile owner before relaunch" >&2
222
+ kill $_stale_pids 2>/dev/null || true
223
+ sleep 2
224
+ _stale_pids=$(pgrep -f -- "--user-data-dir=$_prof_dir " 2>/dev/null || true)
225
+ [ -n "$_stale_pids" ] && { kill -9 $_stale_pids 2>/dev/null || true; sleep 1; }
226
+ rm -f "$_prof_dir/SingletonLock" "$_prof_dir/SingletonSocket" "$_prof_dir/SingletonCookie" 2>/dev/null || true
227
+ fi
208
228
  "$_chrome_bin" \
209
229
  --remote-debugging-port=9555 \
210
230
  --user-data-dir="$HOME/.claude/browser-profiles/browser-harness" \
@@ -0,0 +1,71 @@
1
+ #!/bin/bash
2
+ # linkedin-recovery.sh — hourly auto-recovery for the LinkedIn killswitch.
3
+ #
4
+ # Problem this solves: when LinkedIn returns an HTTP 999 / authwall, the
5
+ # killswitch (scripts/linkedin_killswitch.py) engages and every LinkedIn
6
+ # pipeline self-aborts at startup until a human re-auths and clears the flag.
7
+ # Most of the time the 999 is transient (a momentary rate-limit), the session
8
+ # cookies stay valid, and the only thing keeping LinkedIn down is the flag
9
+ # itself, which never auto-clears. That stranded the pipeline overnight on
10
+ # 2026-06-03.
11
+ #
12
+ # This job, fired hourly by launchd (com.m13v.social-linkedin-recovery), does:
13
+ # 1. recover-check — proceed ONLY if the killswitch is active AND has been so
14
+ # for >= LINKEDIN_RECOVERY_MIN_AGE_HOURS (default 24h). The 24h wait is the
15
+ # anti-bot rule: let the session sit idle after a 999 rather than hammering
16
+ # the login wall every tick. Not eligible -> exit immediately (no Chrome).
17
+ # 2. Bring up the linkedin-harness Chrome (port 9556) via
18
+ # ensure_linkedin_browser_for_backend (also takes the pipeline lock).
19
+ # 3. recover — a gentle read-only probe (ONE nav to /feed/, ONE nav to the
20
+ # recent-activity/comments endpoint that tripped it). If healthy, it clears
21
+ # the killswitch and emails [LI KILL] RECOVERED.
22
+ #
23
+ # When the flag clears, the six LinkedIn launchd jobs resume on their next fire
24
+ # (they all gate on the killswitch file). There is NO launchctl load/unload:
25
+ # the jobs were never unloaded, only gated, so clearing the flag is the resume.
26
+ #
27
+ # This script is a no-op (instant exit, no Chrome) on every hour the killswitch
28
+ # is inactive or younger than the threshold, so it is safe to leave loaded.
29
+
30
+ set -uo pipefail
31
+
32
+ REPO_DIR="$HOME/social-autoposter"
33
+ LOG_DIR="$REPO_DIR/skill/logs"
34
+ mkdir -p "$LOG_DIR"
35
+ LOG="$LOG_DIR/linkedin-recovery.log"
36
+
37
+ log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG" >&2; }
38
+
39
+ PY="/opt/homebrew/bin/python3"
40
+ [ -x "$PY" ] || PY="/usr/bin/python3"
41
+
42
+ # Gate: only proceed if the killswitch is active AND >= threshold old.
43
+ # No Chrome launch otherwise — this is the common (no-op) path.
44
+ if ! "$PY" "$REPO_DIR/scripts/linkedin_killswitch.py" recover-check >>"$LOG" 2>&1; then
45
+ exit 0
46
+ fi
47
+
48
+ log "killswitch eligible for auto-recovery; bringing up harness Chrome for gentle probe"
49
+
50
+ # linkedin-backend.sh exports LINKEDIN_CDP_URL + LINKEDIN_DISCOVER_PYTHON and
51
+ # provides ensure_linkedin_browser_for_backend (launches port-9556 Chrome and
52
+ # acquires the cross-pipeline lock). Identify ourselves as the lock holder.
53
+ export SAPS_PIPELINE_NAME="linkedin-recovery"
54
+ # shellcheck disable=SC1091
55
+ source "$REPO_DIR/skill/lib/linkedin-backend.sh"
56
+
57
+ if ! ensure_linkedin_browser_for_backend; then
58
+ log "ERROR: could not bring up linkedin-harness Chrome; will retry next hour"
59
+ exit 0
60
+ fi
61
+
62
+ # The probe needs a Playwright-capable interpreter (3.14 lacks it; the backend
63
+ # resolves a working one into LINKEDIN_DISCOVER_PYTHON).
64
+ PROBE_PY="${LINKEDIN_DISCOVER_PYTHON:-$PY}"
65
+ RESULT="$("$PROBE_PY" "$REPO_DIR/scripts/linkedin_killswitch.py" recover \
66
+ --cdp-url "$LINKEDIN_CDP_URL" 2>>"$LOG")"
67
+ log "recover result: $RESULT"
68
+
69
+ # On recovery the flag is now gone; the six LinkedIn jobs resume on their next
70
+ # launchd fire. Nothing to load/unload here.
71
+ exit 0