@m13v/s4l 1.6.197-rc.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (326) hide show
  1. package/README.md +143 -0
  2. package/SKILL.md +342 -0
  3. package/bin/cli.js +980 -0
  4. package/bin/cookie-helper.js +315 -0
  5. package/bin/platform.js +59 -0
  6. package/bin/scheduler/index.js +12 -0
  7. package/bin/scheduler/launchd.js +518 -0
  8. package/browser-agent-configs/all-agents-mcp.json +68 -0
  9. package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
  10. package/browser-agent-configs/linkedin-agent.json +17 -0
  11. package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
  12. package/browser-agent-configs/reddit-agent-mcp.json +16 -0
  13. package/browser-agent-configs/reddit-agent.json +17 -0
  14. package/browser-agent-configs/twitter-harness-mcp.json +18 -0
  15. package/config.example.json +45 -0
  16. package/mcp/dist/index.js +4212 -0
  17. package/mcp/dist/onboarding.js +200 -0
  18. package/mcp/dist/panel.html +176 -0
  19. package/mcp/dist/product-link.html +102 -0
  20. package/mcp/dist/repo.js +222 -0
  21. package/mcp/dist/runtime.js +1079 -0
  22. package/mcp/dist/screencast.js +323 -0
  23. package/mcp/dist/setup.js +545 -0
  24. package/mcp/dist/telemetry.js +306 -0
  25. package/mcp/dist/twitterAuth.js +138 -0
  26. package/mcp/dist/version.js +271 -0
  27. package/mcp/dist/version.json +4 -0
  28. package/mcp/install-runtime.mjs +70 -0
  29. package/mcp/install.mjs +169 -0
  30. package/mcp/manifest.json +80 -0
  31. package/mcp/menubar/dashboard_server.py +213 -0
  32. package/mcp/menubar/s4l_card.py +1314 -0
  33. package/mcp/menubar/s4l_log_relay.py +179 -0
  34. package/mcp/menubar/s4l_menubar.py +2439 -0
  35. package/mcp/menubar/s4l_state.py +891 -0
  36. package/mcp/package.json +34 -0
  37. package/mcp/shared/doctor.cjs +437 -0
  38. package/mcp/shared/onboarding-ledger.cjs +324 -0
  39. package/mcp-servers/browser-harness/server.py +968 -0
  40. package/package.json +160 -0
  41. package/requirements.txt +20 -0
  42. package/scripts/_compute_allowlist.py +58 -0
  43. package/scripts/_db_update.py +20 -0
  44. package/scripts/_filt.py +9 -0
  45. package/scripts/_li_notif_match.py +76 -0
  46. package/scripts/_li_notif_orchestrate.py +126 -0
  47. package/scripts/_lock_preempt_test.py +60 -0
  48. package/scripts/_run_icp_precheck.py +57 -0
  49. package/scripts/a16z_pearx_calendar_reminders.py +99 -0
  50. package/scripts/account_resolver.py +141 -0
  51. package/scripts/active_campaigns.py +114 -0
  52. package/scripts/active_users.py +190 -0
  53. package/scripts/amplitude_24h_signups.py +468 -0
  54. package/scripts/amplitude_signups.py +177 -0
  55. package/scripts/apply_onboarding_selections.py +131 -0
  56. package/scripts/audience_pages.py +243 -0
  57. package/scripts/audit_helper.py +120 -0
  58. package/scripts/author_history_block.py +353 -0
  59. package/scripts/autopilot_stall_watch.py +284 -0
  60. package/scripts/backfill_twitter_attempts_topic.py +81 -0
  61. package/scripts/backfill_twitter_log_post_no_id.py +322 -0
  62. package/scripts/bench_dashboard.sh +138 -0
  63. package/scripts/bh_send.py +39 -0
  64. package/scripts/build_persona.py +409 -0
  65. package/scripts/bulk_icp.py +18 -0
  66. package/scripts/campaign_bump.py +51 -0
  67. package/scripts/capture_thread_media.py +288 -0
  68. package/scripts/check_browser_lock_health.sh +81 -0
  69. package/scripts/check_external_pool_depth.py +253 -0
  70. package/scripts/check_unread_web_chats.py +28 -0
  71. package/scripts/claim_web_chat.py +47 -0
  72. package/scripts/classify_run_error.py +158 -0
  73. package/scripts/claude_job.py +988 -0
  74. package/scripts/clean_stale_singleton.sh +56 -0
  75. package/scripts/cleanup_harness_tabs.py +68 -0
  76. package/scripts/copy_browser_cookies.py +454 -0
  77. package/scripts/counterparty_history.py +350 -0
  78. package/scripts/db.py +57 -0
  79. package/scripts/discover_claude_profiles.py +120 -0
  80. package/scripts/discover_linkedin_candidates.py +984 -0
  81. package/scripts/dm_conversation.py +682 -0
  82. package/scripts/dm_db_update.py +69 -0
  83. package/scripts/dm_engage_helper.py +161 -0
  84. package/scripts/dm_outreach_helper.py +147 -0
  85. package/scripts/dm_outreach_twitter_helper.py +129 -0
  86. package/scripts/dm_send_log.py +106 -0
  87. package/scripts/dm_short_links.py +1084 -0
  88. package/scripts/dump_web_chat_history.py +47 -0
  89. package/scripts/engage_github.py +640 -0
  90. package/scripts/engage_reddit.py +1235 -0
  91. package/scripts/engage_twitter_helper.py +301 -0
  92. package/scripts/engagement_styles.py +1787 -0
  93. package/scripts/enrich_twitter_candidates.py +82 -0
  94. package/scripts/feedback_digest.py +448 -0
  95. package/scripts/fetch_prospect_profile.py +312 -0
  96. package/scripts/fetch_twitter_t1.py +134 -0
  97. package/scripts/find_threads.py +530 -0
  98. package/scripts/follow_gate_log.py +59 -0
  99. package/scripts/funnel_per_day.py +194 -0
  100. package/scripts/generate_daily_human_style.py +494 -0
  101. package/scripts/generation_trace.py +173 -0
  102. package/scripts/get_run_cost.py +107 -0
  103. package/scripts/github_engage_helper.py +93 -0
  104. package/scripts/github_tools.py +509 -0
  105. package/scripts/harness_overlay.py +556 -0
  106. package/scripts/harvest_twitter_following.py +243 -0
  107. package/scripts/heartbeat.sh +70 -0
  108. package/scripts/history_context.py +284 -0
  109. package/scripts/http_api.py +206 -0
  110. package/scripts/human_dm_replies_helper.py +169 -0
  111. package/scripts/identity.py +302 -0
  112. package/scripts/ig_batch_creator.sh +93 -0
  113. package/scripts/ig_post_type_picker.py +243 -0
  114. package/scripts/ig_scrape_transcribe.sh +91 -0
  115. package/scripts/ingest_human_dm_replies.py +271 -0
  116. package/scripts/ingest_web_chat_replies.py +229 -0
  117. package/scripts/install_fleet.py +187 -0
  118. package/scripts/invent_mcp_server.py +350 -0
  119. package/scripts/invent_topics.py +1462 -0
  120. package/scripts/learned_preferences.py +263 -0
  121. package/scripts/li_discovery.py +161 -0
  122. package/scripts/link_edit_helper.py +142 -0
  123. package/scripts/link_tail.py +592 -0
  124. package/scripts/linkedin_api.py +561 -0
  125. package/scripts/linkedin_browser.py +730 -0
  126. package/scripts/linkedin_cooldown.py +128 -0
  127. package/scripts/linkedin_exclusions.py +234 -0
  128. package/scripts/linkedin_killswitch.py +1333 -0
  129. package/scripts/linkedin_search_topic_schema.py +49 -0
  130. package/scripts/linkedin_unipile.py +658 -0
  131. package/scripts/linkedin_url.py +228 -0
  132. package/scripts/log_claude_session.py +636 -0
  133. package/scripts/log_draft.py +143 -0
  134. package/scripts/log_linkedin_search_attempts.py +126 -0
  135. package/scripts/log_post.py +651 -0
  136. package/scripts/log_run.py +364 -0
  137. package/scripts/log_thread_media.py +108 -0
  138. package/scripts/log_twitter_search_attempts.py +150 -0
  139. package/scripts/log_twitter_skips.py +211 -0
  140. package/scripts/lookup_post.py +78 -0
  141. package/scripts/mark_web_chat_processed.py +32 -0
  142. package/scripts/mcp_lock_proxy.py +370 -0
  143. package/scripts/memory_snapshot.py +972 -0
  144. package/scripts/merge_review_queue.py +215 -0
  145. package/scripts/mint_external_pool.py +182 -0
  146. package/scripts/mint_kent_pool.py +249 -0
  147. package/scripts/moltbook_post.py +320 -0
  148. package/scripts/moltbook_tools.py +159 -0
  149. package/scripts/pending_threads.py +188 -0
  150. package/scripts/pick_ig_account.py +177 -0
  151. package/scripts/pick_project.py +208 -0
  152. package/scripts/pick_search_topic.py +771 -0
  153. package/scripts/pick_thread_target.py +279 -0
  154. package/scripts/pick_twitter_thread_target.py +202 -0
  155. package/scripts/podlog_fetch_batch.sh +32 -0
  156. package/scripts/post_github.py +1311 -0
  157. package/scripts/post_reddit.py +2668 -0
  158. package/scripts/precompute_dashboard_stats.py +204 -0
  159. package/scripts/preflight.sh +297 -0
  160. package/scripts/progress.py +88 -0
  161. package/scripts/project_excludes.py +353 -0
  162. package/scripts/project_slugs.py +91 -0
  163. package/scripts/project_stats.py +241 -0
  164. package/scripts/project_stats_json.py +1563 -0
  165. package/scripts/project_topics.py +192 -0
  166. package/scripts/qualified_query_bank.py +436 -0
  167. package/scripts/reap_stale_claude_sessions.py +867 -0
  168. package/scripts/reddit_browser.py +2549 -0
  169. package/scripts/reddit_browser_fetch.py +141 -0
  170. package/scripts/reddit_browser_lock.py +593 -0
  171. package/scripts/reddit_chat_sync.py +710 -0
  172. package/scripts/reddit_query_bank.py +200 -0
  173. package/scripts/reddit_threads_helper.py +151 -0
  174. package/scripts/reddit_tools.py +956 -0
  175. package/scripts/refresh_instagram_tokens.py +280 -0
  176. package/scripts/release-mcpb.sh +497 -0
  177. package/scripts/reply_db.py +334 -0
  178. package/scripts/reply_insert.py +98 -0
  179. package/scripts/reply_risk_digest.py +761 -0
  180. package/scripts/reset-test-machine.sh +602 -0
  181. package/scripts/restore_twitter_session.py +177 -0
  182. package/scripts/ripen_reddit_plan.py +478 -0
  183. package/scripts/run_claude.sh +433 -0
  184. package/scripts/run_moltbook_cycle.py +555 -0
  185. package/scripts/s4l_box_update.sh +226 -0
  186. package/scripts/s4l_channel.py +103 -0
  187. package/scripts/s4l_ctl.sh +75 -0
  188. package/scripts/s4l_env.py +47 -0
  189. package/scripts/saps_activity.py +126 -0
  190. package/scripts/saps_mode.py +328 -0
  191. package/scripts/scan_dm_candidates.py +580 -0
  192. package/scripts/scan_github_replies.py +168 -0
  193. package/scripts/scan_instagram_comments.py +481 -0
  194. package/scripts/scan_moltbook_replies.py +252 -0
  195. package/scripts/scan_pii.py +190 -0
  196. package/scripts/scan_reddit_replies.py +377 -0
  197. package/scripts/scan_twitter_mentions_browser.py +327 -0
  198. package/scripts/scan_twitter_thread_followups.py +299 -0
  199. package/scripts/scan_x_profile.py +384 -0
  200. package/scripts/schedule_state.py +202 -0
  201. package/scripts/scheduled_tasks_snapshot.py +123 -0
  202. package/scripts/score_linkedin_candidates.py +419 -0
  203. package/scripts/score_twitter_candidates.py +718 -0
  204. package/scripts/scrape_linkedin_comment_stats.py +1755 -0
  205. package/scripts/scrape_linkedin_stats_browser.py +52 -0
  206. package/scripts/scrape_reddit_views.py +365 -0
  207. package/scripts/seed_search_queries.py +453 -0
  208. package/scripts/seed_search_topics.py +127 -0
  209. package/scripts/send_web_chat_reply.py +130 -0
  210. package/scripts/sentry_init.py +128 -0
  211. package/scripts/setup_twitter_auth.py +1320 -0
  212. package/scripts/snapshot.py +583 -0
  213. package/scripts/stats.py +2702 -0
  214. package/scripts/stats_helper.py +52 -0
  215. package/scripts/strike_alert.py +783 -0
  216. package/scripts/sweep_post_link_clicks.py +107 -0
  217. package/scripts/sync_ig_to_posts.py +147 -0
  218. package/scripts/test_browser_lock.py +189 -0
  219. package/scripts/test_installation_api.sh +52 -0
  220. package/scripts/test_percard_posting.py +142 -0
  221. package/scripts/top_dud_linkedin_queries.py +71 -0
  222. package/scripts/top_dud_reddit_queries.py +67 -0
  223. package/scripts/top_dud_twitter_queries.py +71 -0
  224. package/scripts/top_dud_twitter_topics.py +102 -0
  225. package/scripts/top_linkedin_queries.py +55 -0
  226. package/scripts/top_omitted_reddit_topics.py +91 -0
  227. package/scripts/top_performers.py +588 -0
  228. package/scripts/top_search_topics.py +180 -0
  229. package/scripts/top_twitter_queries.py +190 -0
  230. package/scripts/twitter_access_check.py +382 -0
  231. package/scripts/twitter_account.py +41 -0
  232. package/scripts/twitter_batch_phase.py +126 -0
  233. package/scripts/twitter_browser.py +2804 -0
  234. package/scripts/twitter_cookie_mirror.py +130 -0
  235. package/scripts/twitter_cycle_helper.py +310 -0
  236. package/scripts/twitter_gen_links.py +287 -0
  237. package/scripts/twitter_post_plan.py +1188 -0
  238. package/scripts/twitter_scan.py +324 -0
  239. package/scripts/twitter_supply_signal.py +57 -0
  240. package/scripts/twitter_threads_helper.py +152 -0
  241. package/scripts/unclaim_web_chat.py +29 -0
  242. package/scripts/update_instagram_stats.py +261 -0
  243. package/scripts/update_linkedin_stats_from_feed.py +328 -0
  244. package/scripts/version.py +72 -0
  245. package/scripts/watchdog_hung_runs.py +343 -0
  246. package/scripts/write_generation_trace.py +73 -0
  247. package/setup/SKILL.md +277 -0
  248. package/skill/amplitude-24h-signups.sh +38 -0
  249. package/skill/archive-old-logs.sh +40 -0
  250. package/skill/audit-dm-staleness.sh +42 -0
  251. package/skill/audit-linkedin.sh +14 -0
  252. package/skill/audit-moltbook.sh +4 -0
  253. package/skill/audit-reddit-resurrect.sh +67 -0
  254. package/skill/audit-reddit.sh +4 -0
  255. package/skill/audit-twitter.sh +4 -0
  256. package/skill/audit.sh +287 -0
  257. package/skill/backfill-twitter-attempts-topic.sh +19 -0
  258. package/skill/backfill-twitter-ghost-posts.sh +24 -0
  259. package/skill/check-external-pool-depth.sh +7 -0
  260. package/skill/check-web-chats.sh +203 -0
  261. package/skill/dm-outreach-linkedin.sh +250 -0
  262. package/skill/dm-outreach-reddit.sh +274 -0
  263. package/skill/dm-outreach-twitter.sh +265 -0
  264. package/skill/engage-dm-replies-linkedin.sh +4 -0
  265. package/skill/engage-dm-replies-reddit.sh +4 -0
  266. package/skill/engage-dm-replies-twitter.sh +4 -0
  267. package/skill/engage-dm-replies.sh +1597 -0
  268. package/skill/engage-linkedin.sh +581 -0
  269. package/skill/engage-moltbook.sh +36 -0
  270. package/skill/engage-reddit.sh +146 -0
  271. package/skill/engage-twitter.sh +467 -0
  272. package/skill/github-engage.sh +176 -0
  273. package/skill/ingest-web-chat-replies.sh +38 -0
  274. package/skill/invent-supply-test.sh +100 -0
  275. package/skill/invent-topics.sh +50 -0
  276. package/skill/lib/linkedin-backend.sh +364 -0
  277. package/skill/lib/platform.sh +48 -0
  278. package/skill/lib/reddit-backend.sh +234 -0
  279. package/skill/lib/twitter-backend.sh +314 -0
  280. package/skill/link-edit-github.sh +136 -0
  281. package/skill/link-edit-moltbook.sh +117 -0
  282. package/skill/link-edit-reddit.sh +201 -0
  283. package/skill/linkedin-presence.sh +182 -0
  284. package/skill/linkedin-recovery.sh +282 -0
  285. package/skill/lock.sh +647 -0
  286. package/skill/memory-snapshot.sh +39 -0
  287. package/skill/precompute-stats.sh +35 -0
  288. package/skill/prewarm-funnel.sh +104 -0
  289. package/skill/refresh-instagram-tokens.sh +57 -0
  290. package/skill/refresh-twitter-following.sh +52 -0
  291. package/skill/reply-risk-digest.sh +31 -0
  292. package/skill/run-cycle-update-guard.sh +44 -0
  293. package/skill/run-draft-and-publish.sh +123 -0
  294. package/skill/run-generate-daily-style.sh +50 -0
  295. package/skill/run-github-launchd.sh +62 -0
  296. package/skill/run-github.sh +102 -0
  297. package/skill/run-instagram-daily.sh +149 -0
  298. package/skill/run-instagram-render.sh +875 -0
  299. package/skill/run-linkedin-launchd.sh +81 -0
  300. package/skill/run-linkedin-unipile.sh +130 -0
  301. package/skill/run-linkedin.sh +1593 -0
  302. package/skill/run-moltbook-launchd.sh +61 -0
  303. package/skill/run-moltbook.sh +38 -0
  304. package/skill/run-overlay-watch.sh +100 -0
  305. package/skill/run-reddit-search-launchd.sh +64 -0
  306. package/skill/run-reddit-search.sh +505 -0
  307. package/skill/run-reddit-threads-double.sh +32 -0
  308. package/skill/run-reddit-threads.sh +847 -0
  309. package/skill/run-scan-moltbook-replies.sh +57 -0
  310. package/skill/run-twitter-cycle-launchd.sh +63 -0
  311. package/skill/run-twitter-cycle-singleton.sh +62 -0
  312. package/skill/run-twitter-cycle.sh +2408 -0
  313. package/skill/run-twitter-threads.sh +592 -0
  314. package/skill/scan-instagram-replies.sh +61 -0
  315. package/skill/scan-twitter-followups.sh +57 -0
  316. package/skill/social-autoposter-update.sh +66 -0
  317. package/skill/stats-instagram.sh +72 -0
  318. package/skill/stats-linkedin.sh +271 -0
  319. package/skill/stats-moltbook.sh +4 -0
  320. package/skill/stats-reddit.sh +4 -0
  321. package/skill/stats-twitter.sh +4 -0
  322. package/skill/stats.sh +521 -0
  323. package/skill/strike-alert.sh +18 -0
  324. package/skill/styles.sh +87 -0
  325. package/skill/sweep-link-clicks.sh +40 -0
  326. package/skill/topics.sh +51 -0
@@ -0,0 +1,984 @@
1
+ #!/usr/bin/env python3
2
+ """LinkedIn SERP discovery: read-only Phase A search-page scrape.
3
+
4
+ Usage:
5
+ python3 discover_linkedin_candidates.py <vertical> <query>
6
+ # vertical = people | content | companies
7
+
8
+ Attaches to the already-running LinkedIn Chrome via CDP and reuses its
9
+ existing BrowserContext. Same Chrome process, same cookies, same UA, same
10
+ fingerprint as whatever LinkedIn already trusts from the live session.
11
+ Opens our own page in that context, navigates to the SERP, runs ONE
12
+ page.evaluate() against the rendered DOM, closes our page, disconnects.
13
+
14
+ CDP endpoint resolution (see _resolve_cdp_url, 2026-05-29):
15
+ Lane 1 (preferred): LINKEDIN_CDP_URL — the browser-harness Chrome on
16
+ :9556, exported by skill/lib/linkedin-backend.sh. The main
17
+ run-linkedin.sh pipeline now drives that harness Chrome
18
+ (mcp__linkedin-harness__bh_run) instead of the retired
19
+ linkedin-agent MCP, so this is the live, logged-in session.
20
+ Lane 2 (legacy): http://localhost:<port> read from the linkedin-agent
21
+ MCP's DevToolsActivePort. Kept for running outside the harness.
22
+
23
+ Read-only DOM scrape: NO Voyager API, NO scroll-and-expand loops, NO
24
+ permalink fan-out, NO clicks/typing, NO programmatic login.
25
+
26
+ Pre-conditions for this to work:
27
+ 1. A LinkedIn Chrome is running and reachable via one of the two CDP
28
+ lanes above (normally the harness Chrome on :9556 launched by
29
+ ensure_linkedin_browser_for_backend).
30
+ 2. The user is logged in inside that browser. We do NOT log in.
31
+
32
+ Why CDP attach rather than launch_persistent_context: the previous version
33
+ launched its own Chrome against the shared profile dir. When LinkedIn
34
+ redirected the SERP request (UA mismatch / fresh-launch fingerprint) the
35
+ homepage response contained Set-Cookie headers that cleared li_at. On
36
+ context.close() Chrome flushed the cleared cookies to disk, logging the
37
+ shared profile out and breaking unread-dms + the linkedin-agent MCP.
38
+ Attaching to the MCP's running Chrome eliminates the launch fingerprint,
39
+ removes the cookie-flush risk (we never close the context), and keeps the
40
+ profile fully owned by one process at a time.
41
+
42
+ Per CLAUDE.md "LinkedIn: flagged patterns" carve-out (2026-04-29): the
43
+ read-only DOM read is permitted because the request runs inside the same
44
+ Chrome the MCP already drives. The 2026-04-17 restriction was caused by
45
+ Voyager calls + permalink scroll loops, neither of which appear here.
46
+
47
+ Rate-limited against linkedin_browser_searches per the 2026-04-29 research:
48
+ ~30s min gap, ~40/day, ~150/month soft cap leaves headroom under LinkedIn's
49
+ ~300/month commercial-use wall on free accounts. Fails CLOSED on DB errors:
50
+ if we cannot enforce the budget we do not perform the search.
51
+
52
+ Output (stdout, JSON):
53
+ {
54
+ "ok": true,
55
+ "url": "https://www.linkedin.com/search/results/people/?keywords=...",
56
+ "vertical": "people",
57
+ "query": "founder rag retrieval",
58
+ "result_count": 10,
59
+ "results": [...],
60
+ "rate_budget": {"daily_used": N, "daily_cap": null,
61
+ "monthly_used": N, "monthly_cap": null},
62
+ }
63
+
64
+ Failure shapes:
65
+ {"ok": false, "error": "session_invalid", "url": "..."}
66
+ {"ok": false, "error": "serp_redirected", "url": "..."}
67
+ {"ok": false, "error": "mcp_not_running", "detail": "..."}
68
+ {"ok": false, "error": "cdp_attach_failed", "detail": "..."}
69
+ {"ok": false, "error": "navigation_failed", "detail": "..."}
70
+ {"ok": false, "error": "bad_vertical", "detail": "..."}
71
+ {"ok": false, "error": "empty_query", "detail": ""}
72
+
73
+ Note: rate_limited and db_unavailable are no longer raised. All caps were
74
+ removed 2026-05-01; the script logs to linkedin_browser_searches for
75
+ visibility but never refuses based on volume or recency.
76
+
77
+ Exits 0 on success, 1 on failure.
78
+ """
79
+
80
+ import json
81
+ import os
82
+ import random
83
+ import sys
84
+ import time
85
+ import urllib.parse
86
+ from typing import Optional
87
+
88
+
89
+ def _bh_activity_log(action: str, cdp_url: str) -> None:
90
+ """Append to the universal browser-activity.log (Python-CDP path coverage)."""
91
+ try:
92
+ import time as _t
93
+ import os as _o
94
+ from pathlib import Path as _P
95
+ _p = _P(_o.environ.get(
96
+ "BH_ACTIVITY_LOG",
97
+ str(_P.home() / ".claude" / "browser-profiles" / "browser-activity.log"),
98
+ ))
99
+ _port = (cdp_url or "").rsplit(":", 1)[-1].split("/")[0] or "-"
100
+ _p.parent.mkdir(parents=True, exist_ok=True)
101
+ with _p.open("a") as _f:
102
+ _f.write(
103
+ f"[{_t.strftime('%Y-%m-%d %H:%M:%S')}] pycdp "
104
+ f"script={_o.path.basename(__file__)} action={action} "
105
+ f"pid={_o.getpid()} ppid={_o.getppid()} cdp={cdp_url or '-'} "
106
+ f"port={_port}\n"
107
+ )
108
+ except Exception:
109
+ pass
110
+
111
+ # Reuse the lock helper + login-URL detector from linkedin_browser. We share
112
+ # the lock so concurrent Python helpers (search vs unread-dms) serialize on
113
+ # the same ~/.claude/linkedin-agent-lock.json. PROFILE_DIR also points at
114
+ # the directory where the linkedin-agent MCP writes DevToolsActivePort.
115
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
116
+ from linkedin_browser import ( # noqa: E402
117
+ PROFILE_DIR,
118
+ _acquire_browser_lock,
119
+ _is_login_or_checkpoint,
120
+ )
121
+ from score_linkedin_candidates import calculate_velocity_score # noqa: E402
122
+ try: # author exclusion is fail-open: never let it break discovery
123
+ from linkedin_exclusions import load_exclusions, classify_author # noqa: E402
124
+ except Exception: # pragma: no cover - helper missing -> exclusion becomes a no-op
125
+ def load_exclusions(platform="linkedin"):
126
+ return {"hard_slugs": set(), "soft_slugs": set(), "soft_names": set()}
127
+
128
+ def classify_author(author_name, author_profile_url, excl=None):
129
+ return None, ""
130
+ from http_api import api_get, api_post # noqa: E402
131
+
132
+ DEVTOOLS_ACTIVE_PORT = os.path.join(PROFILE_DIR, "DevToolsActivePort")
133
+
134
+ # Virality (velocity * reach_mult * age_decay * (1 + disc_bonus); see
135
+ # score_linkedin_candidates.calculate_velocity_score) is a RANKING signal,
136
+ # not a cutoff. Aligned with the Twitter model (2026-05-29): score every
137
+ # SERP card, sort by velocity_score DESC so the Phase A picker sees the
138
+ # strongest candidates first, and NEVER drop a card on virality. The picker
139
+ # prompt already steers toward the top of the sorted list and leans toward
140
+ # posting, so the old hard floor only caused zero-post cycles on quiet
141
+ # topics where every card scored low (e.g. niche backend-dev SERPs). Twitter
142
+ # (score_twitter_candidates.py) keeps every candidate the same way: "no cap,
143
+ # no cutoff: this only ever raises a score, never removes a candidate."
144
+
145
+ # Search rate-limit budget removed 2026-05-01 per user instruction. The
146
+ # linkedin_browser_searches table is kept so daily/monthly volumes remain
147
+ # observable, but no min-gap, daily, or monthly cap is enforced. Caller is
148
+ # responsible for cadence. The 2026-04-17 LinkedIn restriction (see CLAUDE.md
149
+ # "LinkedIn: flagged patterns") came from behavioral fingerprinting, not raw
150
+ # volume, so volume caps weren't the load-bearing protection anyway — but
151
+ # back-to-back machine-cadence search hits are now structurally possible
152
+ # from this script.
153
+ SEARCH_VERTICALS = ("people", "content", "companies")
154
+
155
+
156
+ def _check_rate_limit() -> dict:
157
+ """Always returns ok=True. Caps removed 2026-05-01 per user instruction.
158
+
159
+ Reads current daily/monthly volume so the response shape keeps the
160
+ rate_budget block populated for the dashboard. A failure here is
161
+ non-fatal — the search proceeds anyway since there's no cap to enforce.
162
+
163
+ Migrated 2026-06-01 from a direct DB read (+ first-use CREATE TABLE) to
164
+ GET /api/v1/linkedin-browser-searches. The table now lives in the
165
+ social-autoposter-website schema; no client-side DDL needed.
166
+ """
167
+ daily = monthly = 0
168
+ try:
169
+ resp = api_get("/api/v1/linkedin-browser-searches")
170
+ data = resp.get("data") or {}
171
+ daily = int(data.get("daily_used") or 0)
172
+ monthly = int(data.get("monthly_used") or 0)
173
+ except (Exception, SystemExit):
174
+ # API down? Not our problem — caps are off, search proceeds.
175
+ pass
176
+ return {"ok": True, "daily_used": daily, "monthly_used": monthly}
177
+
178
+
179
+ def _log_search(query: str, vertical: str, ok: bool, error: Optional[str]) -> None:
180
+ """Best-effort write of one row to linkedin_browser_searches.
181
+
182
+ Never raises: a failed log must not turn a successful search into a
183
+ failure. Note that if logging fails the next rate-limit check will
184
+ under-count. Acceptable: the monthly cap has 50% headroom under the
185
+ actual wall.
186
+
187
+ Migrated 2026-06-01 to POST /api/v1/linkedin-browser-searches. SystemExit
188
+ (raised by http_api on 4xx / exhausted retries) is swallowed too, so a
189
+ logging failure can never bubble up and fail a real search.
190
+ """
191
+ try:
192
+ api_post(
193
+ "/api/v1/linkedin-browser-searches",
194
+ {"query": query, "vertical": vertical, "ok": ok, "error": error},
195
+ )
196
+ except (Exception, SystemExit) as e:
197
+ print(
198
+ f"[discover_linkedin_candidates] _log_search: post failed: {e}",
199
+ file=sys.stderr,
200
+ )
201
+
202
+
203
+ # DOM extractors per vertical. Each is a single querySelectorAll + map, with
204
+ # multiple selector fallbacks because LinkedIn's class names rotate. Returns
205
+ # JSON.stringify(...) so the Python side can json.loads regardless of how the
206
+ # evaluate channel marshals nested objects. Limit to the first 25 cards on the
207
+ # page — anything beyond that requires scrolling, which we explicitly do not
208
+ # do.
209
+ #
210
+ # Provenance:
211
+ # _SEARCH_JS_CONTENT — lifted from skill/run-linkedin.sh (production-tested).
212
+ # _SEARCH_JS_PEOPLE — UNVERIFIED. Selectors based on widely-documented
213
+ # LinkedIn patterns + multiple fallbacks. Smoke-test
214
+ # against a real SERP before relying on the output.
215
+ # _SEARCH_JS_COMPANIES — UNVERIFIED. Same caveat as people.
216
+ #
217
+ # Reconciliation procedure for the UNVERIFIED extractors (do this once,
218
+ # then update this block to mark them VERIFIED with the date):
219
+ # Preconditions:
220
+ # - linkedin-agent has been idle >= 1 hour. Check
221
+ # ~/.playwright-mcp/linkedin-agent/page-*.yml mtimes.
222
+ # - The persistent profile is logged in. Do NOT trigger a probe like
223
+ # "navigate to LinkedIn and tell me what you see" — that prompt itself
224
+ # is the high-risk behavior that invalidated cookies on 2026-04-29.
225
+ # If the session is dead, wait for the next normal pipeline cycle to
226
+ # re-auth, then resume reconciliation in a fresh hour.
227
+ # Steps (use the linkedin-agent MCP, NOT this script — the script logs to
228
+ # linkedin_browser_searches and burns the rate budget for nothing):
229
+ # 1. mcp__linkedin-agent__browser_navigate to
230
+ # https://www.linkedin.com/search/results/people/?keywords=founder%20ai
231
+ # 2. mcp__linkedin-agent__browser_evaluate, paste _SEARCH_JS_PEOPLE
232
+ # verbatim (including the JSON.stringify wrap). JSON.parse the
233
+ # returned string.
234
+ # Accept criterion: >= 5 entries with non-empty name AND profile_url.
235
+ # Reject: [] or rows with all-empty fields → snapshot the page,
236
+ # find the live card class names, patch the querySelectorAll lists.
237
+ # Keep existing fallback selectors at the END of each list to stay
238
+ # compatible with the older layout.
239
+ # 3. Repeat for /search/results/companies/?keywords=founder%20ai with
240
+ # _SEARCH_JS_COMPANIES. Same accept criterion (>= 5 cards with
241
+ # company AND company_url).
242
+ # Hard limits during reconciliation: 2 navigations total, no
243
+ # close-and-reopen of the agent, no scroll, no clicks. Anything more is
244
+ # the same fingerprint pattern that triggered the 2026-04-29 lockouts.
245
+ _SEARCH_JS_PEOPLE = r"""
246
+ () => {
247
+ const out = [];
248
+ const cards = document.querySelectorAll(
249
+ "div.search-results-container li div.entity-result, "
250
+ + "li.reusable-search__result-container, "
251
+ + "[data-chameleon-result-urn]"
252
+ );
253
+ for (const c of Array.from(cards).slice(0, 25)) {
254
+ const link = c.querySelector(
255
+ "a[href*='/in/'].app-aware-link, a[href*='/in/']"
256
+ );
257
+ const profileUrl = link
258
+ ? (link.href || link.getAttribute("href") || "")
259
+ : "";
260
+ const nameEl = c.querySelector(
261
+ ".entity-result__title-text, .entity-result__title-line, "
262
+ + "span[aria-hidden='true']"
263
+ );
264
+ const name = nameEl ? (nameEl.textContent || "").trim() : "";
265
+ const headlineEl = c.querySelector(
266
+ ".entity-result__primary-subtitle, .t-14.t-black.t-normal"
267
+ );
268
+ const headline = headlineEl
269
+ ? (headlineEl.textContent || "").trim() : "";
270
+ const locEl = c.querySelector(
271
+ ".entity-result__secondary-subtitle, .t-14.t-normal"
272
+ );
273
+ const location = locEl ? (locEl.textContent || "").trim() : "";
274
+ if (!name && !profileUrl) continue;
275
+ out.push({
276
+ name: name.replace(/\s+/g, " "),
277
+ headline: headline.replace(/\s+/g, " "),
278
+ location: location.replace(/\s+/g, " "),
279
+ profile_url: profileUrl.split("?")[0],
280
+ });
281
+ }
282
+ return JSON.stringify(out);
283
+ }
284
+ """
285
+
286
+ # Content-search extractor. Two layouts coexist in the wild:
287
+ #
288
+ # New SDUI layout (post 2026-04-30 reconciliation): obfuscated class names,
289
+ # results wrapped in [data-sdui-screen*="SearchResultsContent"], each card
290
+ # [role="listitem"][componentkey]. The activity URN is GONE from the DOM
291
+ # for most cards: only cards that embed a quoted/reposted share keep a
292
+ # visible /feed/update/<urn> link. So post_url/activity_id can legitimately
293
+ # be null on the new layout — callers must dedupe by
294
+ # (author_profile_url, post_text hash) when activity_id is missing.
295
+ #
296
+ # Legacy class layout (pre-rollout, may still appear): div.feed-shared-update-v2
297
+ # / div[data-urn=...] cards with full URNs.
298
+ #
299
+ # Tries the new layout first, falls back to legacy, returns the same shape
300
+ # either way. Verified 2026-04-30 against
301
+ # /search/results/content/?keywords=ai%20agent%20founder
302
+ # 8/8 cards extracted (author_name + author_profile_url + post_text + age_text);
303
+ # 1/8 had activity_id (the only embedded-share case).
304
+ _SEARCH_JS_CONTENT = r"""
305
+ () => {
306
+ const out = [];
307
+
308
+ function parseRelativeAge(txt) {
309
+ if (!txt) return null;
310
+ const m = txt.match(/(\d+)\s*(s|min|m|hr|h|d|w|mo|y)\b/i);
311
+ if (!m) return null;
312
+ const n = parseInt(m[1], 10);
313
+ let u = m[2].toLowerCase();
314
+ if (u === 'hr') u = 'h';
315
+ if (u === 'min') u = 'm';
316
+ const map = { s: 1/3600, m: 1/60, h: 1, d: 24, w: 24*7, mo: 24*30, y: 24*365 };
317
+ return n * (map[u] || 0);
318
+ }
319
+ function parseCount(txt) {
320
+ if (!txt) return 0;
321
+ const t = String(txt).replace(/,/g, '').trim();
322
+ const m = t.match(/([\d.]+)\s*([KkMm]?)/);
323
+ if (!m) return 0;
324
+ const n = parseFloat(m[1]);
325
+ const u = (m[2] || '').toLowerCase();
326
+ return Math.round(n * (u === 'k' ? 1000 : u === 'm' ? 1_000_000 : 1));
327
+ }
328
+
329
+ // 1. New SDUI layout.
330
+ let items = [];
331
+ const screen = document.querySelector('[data-sdui-screen*="SearchResultsContent"]');
332
+ if (screen) {
333
+ items = Array.from(screen.querySelectorAll('[role="listitem"][componentkey]'));
334
+ }
335
+ // 2. Legacy fallback.
336
+ if (items.length === 0) {
337
+ items = Array.from(document.querySelectorAll(
338
+ 'div.feed-shared-update-v2, '
339
+ + 'div[data-urn*="urn:li:activity"], '
340
+ + 'div[data-urn*="urn:li:share"], '
341
+ + 'div[data-urn*="urn:li:ugcPost"]'
342
+ ));
343
+ }
344
+
345
+ const seen = new Set();
346
+ const urnRe = /urn:li:(activity|share|ugcPost):(\d{16,19})/;
347
+ const urnReG = /urn:li:(activity|share|ugcPost):(\d{16,19})/g;
348
+
349
+ for (const item of items.slice(0, 25)) {
350
+ let urnType = null, activityId = null;
351
+ const allUrns = new Set();
352
+
353
+ const updateLink = item.querySelector('a[href*="/feed/update/"]');
354
+ if (updateLink) {
355
+ const m = (updateLink.href || '').match(urnRe);
356
+ if (m) { urnType = m[1]; activityId = m[2]; allUrns.add(m[2]); }
357
+ }
358
+ if (!activityId) {
359
+ const dataUrn = item.getAttribute('data-urn') || '';
360
+ const m = dataUrn.match(urnRe);
361
+ if (m) { urnType = m[1]; activityId = m[2]; allUrns.add(m[2]); }
362
+ }
363
+ if (!activityId) {
364
+ const html = item.outerHTML || '';
365
+ let mm;
366
+ urnReG.lastIndex = 0;
367
+ while ((mm = urnReG.exec(html)) !== null) {
368
+ allUrns.add(mm[2]);
369
+ if (!activityId) { urnType = mm[1]; activityId = mm[2]; }
370
+ }
371
+ }
372
+ if (activityId) {
373
+ if (seen.has(activityId)) continue;
374
+ seen.add(activityId);
375
+ }
376
+
377
+ const authorLink = item.querySelector('a[aria-label*="profile" i][href*="/in/"]')
378
+ || item.querySelector('a[href*="/in/"]');
379
+ const authorUrl = authorLink ? (authorLink.href || '').split('?')[0] : null;
380
+ let authorName = null;
381
+ if (authorLink) {
382
+ const al = authorLink.getAttribute('aria-label') || '';
383
+ const m = al.match(/View\s+(.+?)['’]s\s+profile/i);
384
+ if (m) authorName = m[1].trim();
385
+ }
386
+ // The new SDUI layout puts the View-profile aria on an inner <svg>, not
387
+ // the <a>. Probe descendants of the link too before falling back.
388
+ if (!authorName && authorLink) {
389
+ const inner = authorLink.querySelector('[aria-label*="profile" i]');
390
+ if (inner) {
391
+ const m = (inner.getAttribute('aria-label') || '').match(/View\s+(.+?)['’]s\s+profile/i);
392
+ if (m) authorName = m[1].trim();
393
+ }
394
+ }
395
+ if (!authorName) {
396
+ const followBtn = item.querySelector('button[aria-label^="Follow "]');
397
+ if (followBtn) {
398
+ const m = (followBtn.getAttribute('aria-label') || '').match(/^Follow\s+(.+)$/i);
399
+ if (m) authorName = m[1].trim();
400
+ }
401
+ }
402
+ if (!authorName) {
403
+ const nameEl = item.querySelector(
404
+ '.update-components-actor__name, span.feed-shared-actor__name'
405
+ );
406
+ if (nameEl) authorName = (nameEl.textContent || '').trim();
407
+ }
408
+
409
+ let authorFollowers = null;
410
+ const supplementary = item.querySelector(
411
+ '.update-components-actor__supplementary-actor-info, '
412
+ + '.feed-shared-actor__sub-description'
413
+ );
414
+ if (supplementary) {
415
+ const fm = (supplementary.textContent || '').match(/([\d.,]+[KkMm]?)\s*follower/);
416
+ if (fm) authorFollowers = parseCount(fm[1]);
417
+ }
418
+
419
+ // Actor block = the prefix of the listitem text before "• Follow". On the
420
+ // new SDUI layout it has the shape "Feed post<NAME> • <CONNECTION><HEADLINE><AGE>".
421
+ const fullItemText = (item.textContent || '').replace(/\s+/g, ' ').trim();
422
+ const followIdx0 = fullItemText.indexOf('• Follow');
423
+ const actorBlock = followIdx0 >= 0 ? fullItemText.slice(0, followIdx0) : fullItemText.slice(0, 300);
424
+
425
+ // Author headline: strip "Feed post" prefix, the name, the connection
426
+ // marker, and the trailing age. Best-effort; for company pages or
427
+ // non-standard layouts (no • <connection>) we still return whatever's
428
+ // left after the name.
429
+ let authorHeadline = null;
430
+ {
431
+ let h = actorBlock.replace(/^Feed post/, '').trim();
432
+ if (authorName && h.startsWith(authorName)) h = h.slice(authorName.length);
433
+ h = h.replace(/^\s*•\s*(1st|2nd|3rd\+?|Out of network|Following)\s*/i, '');
434
+ h = h.replace(/\s*(?:•\s*)?\d+\s*(?:s|min|m|hr|h|d|w|mo|y)\s*$/i, '');
435
+ h = h.trim();
436
+ if (h) authorHeadline = h;
437
+ }
438
+
439
+ // Post body. Legacy: prefer the dedicated text element. New SDUI: take
440
+ // text after "• Follow", then strip trailing CTA / count noise.
441
+ let postText = '';
442
+ const textEl = item.querySelector(
443
+ '.update-components-text, .feed-shared-update-v2__description, span.break-words'
444
+ );
445
+ if (textEl) {
446
+ postText = (textEl.textContent || '').replace(/\s+/g, ' ').trim();
447
+ } else {
448
+ let s = fullItemText.replace(/^Feed post/, '').trim();
449
+ const idx = s.indexOf('• Follow');
450
+ if (idx >= 0) s = s.slice(idx + '• Follow'.length).trim();
451
+ // Strip trailing "… more" / "...more" the new layout appends.
452
+ s = s.replace(/\s*[…\.]+\s*more\s*$/i, '').trim();
453
+ // Strip trailing count noise like "+132 comments23 reactions",
454
+ // "1 comment1", "+811 reaction", "23 reactions".
455
+ // Count widgets concatenate without delimiters; consume runs greedily.
456
+ for (let i = 0; i < 6; i++) {
457
+ const before = s;
458
+ s = s.replace(
459
+ /\s*\+?\s*\d+\s*(?:reactions?|comments?|reposts?)\s*\d*\s*$/i,
460
+ ''
461
+ ).trim();
462
+ if (s === before) break;
463
+ }
464
+ // Strip a stray trailing digit (artifact of glued-in count widgets).
465
+ s = s.replace(/\s+\d+\s*$/, '').trim();
466
+ postText = s;
467
+ }
468
+
469
+ let ageText = '';
470
+ const timeEl = item.querySelector(
471
+ 'time, .update-components-actor__sub-description, '
472
+ + 'span.feed-shared-actor__sub-description'
473
+ );
474
+ if (timeEl) ageText = (timeEl.textContent || '').trim();
475
+ if (!ageText) {
476
+ const ageM = fullItemText.match(/(\d+\s*(?:s|min|m|hr|h|d|w|mo|y))\b/i);
477
+ if (ageM) ageText = ageM[1];
478
+ }
479
+ const ageHours = parseRelativeAge(ageText);
480
+
481
+ // Counts. New SDUI hides counts from button aria-labels and embeds them
482
+ // as plain leaf-divs ("1 comment", "23 reactions", "+811 reaction").
483
+ // We walk every leaf div/span and match the strict shape; we keep the
484
+ // max in case the same widget is mirrored across nested wrappers.
485
+ let reactions = 0, comments = 0, reposts = 0;
486
+ item.querySelectorAll('div, span').forEach(el => {
487
+ if (el.children.length > 0) return;
488
+ const t = (el.textContent || '').trim();
489
+ if (!t || t.length > 30) return;
490
+ let m;
491
+ if ((m = t.match(/^[+]?\s*([\d.,]+\s*[KkMm]?)\s+reactions?$/i))) {
492
+ const v = parseCount(m[1]);
493
+ if (v > reactions) reactions = v;
494
+ }
495
+ if ((m = t.match(/^[+]?\s*([\d.,]+\s*[KkMm]?)\s+comments?$/i))) {
496
+ const v = parseCount(m[1]);
497
+ if (v > comments) comments = v;
498
+ }
499
+ if ((m = t.match(/^[+]?\s*([\d.,]+\s*[KkMm]?)\s+reposts?$/i))) {
500
+ const v = parseCount(m[1]);
501
+ if (v > reposts) reposts = v;
502
+ }
503
+ });
504
+ // Legacy fallbacks (unchanged): aria-label-based counts on the old layout.
505
+ if (reactions === 0) {
506
+ const reactEl = item.querySelector(
507
+ '[aria-label*=" reaction" i], '
508
+ + '.social-details-social-counts__reactions-count'
509
+ );
510
+ if (reactEl) {
511
+ const m = (reactEl.getAttribute('aria-label') || reactEl.textContent || '')
512
+ .match(/([\d.,]+\s*[KkMm]?)\s*reaction/i);
513
+ if (m) reactions = parseCount(m[1]);
514
+ }
515
+ }
516
+ if (comments === 0) {
517
+ const commentEl = item.querySelector(
518
+ '[aria-label*=" comment" i], '
519
+ + 'li.social-details-social-counts__comments'
520
+ );
521
+ if (commentEl) {
522
+ const m = (commentEl.getAttribute('aria-label') || commentEl.textContent || '')
523
+ .match(/([\d.,]+\s*[KkMm]?)\s*comment/i);
524
+ if (m) comments = parseCount(m[1]);
525
+ }
526
+ }
527
+ if (reposts === 0) {
528
+ const repostEl = item.querySelector(
529
+ '[aria-label*=" repost" i], '
530
+ + 'li.social-details-social-counts__item--right-aligned'
531
+ );
532
+ if (repostEl) {
533
+ const m = (repostEl.getAttribute('aria-label') || repostEl.textContent || '')
534
+ .match(/([\d.,]+\s*[KkMm]?)\s*repost/i);
535
+ if (m) reposts = parseCount(m[1]);
536
+ }
537
+ }
538
+
539
+ if (!authorName && !authorUrl && !postText) continue;
540
+
541
+ // Comment gate: the author restricted commenting to their connections
542
+ // ("Only connections can comment on this post. You can still react or
543
+ // share it."). For a 3rd+ degree account the comment editor never renders,
544
+ // so the post is uncommentable. LinkedIn paints this notice straight into
545
+ // the card chrome, so we can detect it here (Phase A) and drop the
546
+ // candidate before it ever reaches the expensive compose/post phase.
547
+ const commentGated =
548
+ /only connections can comment on this post|you can still react or share it/i
549
+ .test(fullItemText);
550
+
551
+ out.push({
552
+ post_url: activityId
553
+ ? ('https://www.linkedin.com/feed/update/urn:li:' + urnType + ':' + activityId + '/')
554
+ : null,
555
+ activity_id: activityId,
556
+ all_urns: Array.from(allUrns),
557
+ author_name: authorName || null,
558
+ author_headline: authorHeadline,
559
+ author_profile_url: authorUrl,
560
+ author_followers: authorFollowers,
561
+ post_text: postText,
562
+ age_hours: ageHours,
563
+ reactions: reactions,
564
+ comments: comments,
565
+ reposts: reposts,
566
+ age_text: ageText,
567
+ comment_gated: commentGated
568
+ });
569
+ }
570
+ return JSON.stringify(out);
571
+ }
572
+ """
573
+
574
+ _SEARCH_JS_COMPANIES = r"""
575
+ () => {
576
+ const out = [];
577
+ const cards = document.querySelectorAll(
578
+ "div.search-results-container li div.entity-result, "
579
+ + "li.reusable-search__result-container, "
580
+ + "[data-chameleon-result-urn]"
581
+ );
582
+ for (const c of Array.from(cards).slice(0, 25)) {
583
+ const link = c.querySelector(
584
+ "a[href*='/company/'].app-aware-link, a[href*='/company/']"
585
+ );
586
+ const url = link ? (link.href || link.getAttribute("href") || "") : "";
587
+ const nameEl = c.querySelector(
588
+ ".entity-result__title-text, .entity-result__title-line, "
589
+ + "span[aria-hidden='true']"
590
+ );
591
+ const name = nameEl ? (nameEl.textContent || "").trim() : "";
592
+ const taglineEl = c.querySelector(
593
+ ".entity-result__primary-subtitle, .t-14.t-black.t-normal"
594
+ );
595
+ const tagline = taglineEl ? (taglineEl.textContent || "").trim() : "";
596
+ if (!name && !url) continue;
597
+ out.push({
598
+ company: name.replace(/\s+/g, " "),
599
+ tagline: tagline.replace(/\s+/g, " "),
600
+ company_url: url.split("?")[0],
601
+ });
602
+ }
603
+ return JSON.stringify(out);
604
+ }
605
+ """
606
+
607
+ _SEARCH_JS_BY_VERTICAL = {
608
+ "people": _SEARCH_JS_PEOPLE,
609
+ "content": _SEARCH_JS_CONTENT,
610
+ "companies": _SEARCH_JS_COMPANIES,
611
+ }
612
+
613
+
614
+ def _read_devtools_port() -> Optional[int]:
615
+ """Return the CDP port the linkedin-agent MCP's Chrome is listening on,
616
+ or None if the file is missing/unreadable/stale. Chrome writes the port
617
+ on line 1 of DevToolsActivePort when launched with --remote-debugging-port.
618
+
619
+ Chrome SHOULD remove the file when it exits, but doesn't always — a
620
+ crashed/killed Chrome leaves a stale file pointing at a port nothing's
621
+ listening on. We probe the port with a non-blocking TCP connect; if the
622
+ connection is refused, we treat the file as stale and return None so
623
+ callers report the cleaner mcp_not_running error rather than dragging
624
+ out to a noisy cdp_attach_failed."""
625
+ try:
626
+ with open(DEVTOOLS_ACTIVE_PORT) as f:
627
+ port = int(f.readline().strip())
628
+ if port <= 0:
629
+ return None
630
+ except (OSError, ValueError):
631
+ return None
632
+ import socket
633
+ try:
634
+ with socket.create_connection(("127.0.0.1", port), timeout=0.5):
635
+ return port
636
+ except (OSError, socket.timeout):
637
+ return None
638
+
639
+
640
+ def _resolve_cdp_url() -> Optional[str]:
641
+ """Resolve the CDP endpoint to attach the SERP read to.
642
+
643
+ Lane 1 (preferred, 2026-05-29): LINKEDIN_CDP_URL, exported by
644
+ skill/lib/linkedin-backend.sh to point at the browser-harness Chrome on
645
+ :9556. The main run-linkedin.sh pipeline now drives that harness Chrome
646
+ (mcp__linkedin-harness__bh_run) instead of the retired linkedin-agent
647
+ MCP, so this is the live session whose cookies/fingerprint we want. We
648
+ probe /json/version with a 1s GET so a stale/unset env falls through
649
+ cleanly rather than dragging into a noisy connect failure.
650
+
651
+ Lane 2 (legacy DevToolsActivePort attach to the linkedin-agent profile
652
+ under PROFILE_DIR) was REMOVED 2026-05-31. It silently sent the SERP read
653
+ to a SECOND Chrome (the retired linkedin-agent MCP browser) whenever the
654
+ harness was momentarily unreachable — the "two LinkedIn browsers in
655
+ parallel" bug. The harness Chrome on :9556 is now the ONLY allowed target.
656
+
657
+ Returns the harness CDP base URL (e.g. "http://127.0.0.1:9556") or None
658
+ when LINKEDIN_CDP_URL is unset or the harness is unreachable.
659
+ """
660
+ harness = os.environ.get("LINKEDIN_CDP_URL", "").strip()
661
+ if not harness:
662
+ return None
663
+ import urllib.request
664
+ try:
665
+ with urllib.request.urlopen(
666
+ f"{harness.rstrip('/')}/json/version", timeout=1.0
667
+ ):
668
+ return harness.rstrip("/")
669
+ except Exception:
670
+ return None
671
+
672
+
673
+ def search(vertical: str, query: str) -> dict:
674
+ """Attach to the linkedin-agent MCP's Chrome via CDP and read one SERP.
675
+
676
+ ONE goto, ONE evaluate. No own-Chrome launch, no context.close(),
677
+ so we never write cookies back to disk. Rate-limited against
678
+ linkedin_browser_searches; fails closed if the DB budget is exhausted.
679
+ """
680
+ if vertical not in SEARCH_VERTICALS:
681
+ return {
682
+ "ok": False,
683
+ "error": "bad_vertical",
684
+ "detail": f"got {vertical!r}; want one of {SEARCH_VERTICALS}",
685
+ }
686
+ query = (query or "").strip()
687
+ if not query:
688
+ return {"ok": False, "error": "empty_query", "detail": ""}
689
+
690
+ rate = _check_rate_limit()
691
+ if not rate.get("ok"):
692
+ return rate
693
+
694
+ cdp_url = _resolve_cdp_url()
695
+ if cdp_url is None:
696
+ return {
697
+ "ok": False,
698
+ "error": "mcp_not_running",
699
+ "detail": (
700
+ "No LinkedIn CDP endpoint reachable. Set LINKEDIN_CDP_URL "
701
+ "(the browser-harness Chrome on :9556, exported by "
702
+ "skill/lib/linkedin-backend.sh) and make sure that Chrome is "
703
+ f"running; or, for the legacy lane, {DEVTOOLS_ACTIVE_PORT} "
704
+ "must point at a live linkedin-agent MCP Chrome launched with "
705
+ "--remote-debugging-port."
706
+ ),
707
+ }
708
+
709
+ from playwright.sync_api import sync_playwright
710
+
711
+ _acquire_browser_lock()
712
+
713
+ encoded = urllib.parse.quote(query)
714
+ # Content searches sort by date_posted to match skill/run-linkedin.sh
715
+ # Phase A behavior — fresh posts > stale ones for engagement work.
716
+ suffix = "&sortBy=date_posted" if vertical == "content" else ""
717
+ search_url = (
718
+ f"https://www.linkedin.com/search/results/{vertical}/"
719
+ f"?keywords={encoded}{suffix}"
720
+ )
721
+ serp_prefix = f"https://www.linkedin.com/search/results/{vertical}/"
722
+
723
+ with sync_playwright() as p:
724
+ try:
725
+ browser = p.chromium.connect_over_cdp(cdp_url)
726
+ except Exception as e:
727
+ _log_search(query, vertical, ok=False, error="cdp_attach_failed")
728
+ return {
729
+ "ok": False,
730
+ "error": "cdp_attach_failed",
731
+ "detail": f"connect_over_cdp({cdp_url}) failed: {e}",
732
+ }
733
+
734
+ _bh_activity_log("attach", cdp_url)
735
+
736
+ # Reuse the existing context (cookies / UA / fingerprint already set
737
+ # by the MCP launch). Never close it — that would kill the MCP's
738
+ # pages too. We only own the page we create below.
739
+ if not browser.contexts:
740
+ browser.disconnect()
741
+ _log_search(query, vertical, ok=False, error="cdp_attach_failed")
742
+ return {
743
+ "ok": False,
744
+ "error": "cdp_attach_failed",
745
+ "detail": "browser.contexts is empty; MCP has no open context",
746
+ }
747
+ context = browser.contexts[0]
748
+
749
+ page = None
750
+ _reused_page = False
751
+ try:
752
+ # Reuse an existing harness tab instead of spawning a throwaway one
753
+ # (mirrors reddit_browser / linkedin_browser). Prefer a tab already
754
+ # on linkedin.com (not login/checkpoint), else the first open page;
755
+ # only new_page() when the context has no usable tab. A reused tab
756
+ # is left open in the finally below so the next consumer reuses it.
757
+ for pg in context.pages:
758
+ u = pg.url or ""
759
+ if "linkedin.com" in u and "login" not in u and "checkpoint" not in u:
760
+ page, _reused_page = pg, True
761
+ break
762
+ if page is None and context.pages:
763
+ page, _reused_page = context.pages[0], True
764
+ if page is None:
765
+ page = context.new_page()
766
+ try:
767
+ page.goto(
768
+ search_url,
769
+ wait_until="domcontentloaded",
770
+ timeout=30000,
771
+ )
772
+ except Exception as e:
773
+ _log_search(query, vertical, ok=False, error="navigation_failed")
774
+ return {
775
+ "ok": False,
776
+ "error": "navigation_failed",
777
+ "detail": str(e),
778
+ }
779
+
780
+ # Settle: search results lazy-render after DOMContentLoaded.
781
+ # Selectors cover the new SDUI layout (post 2026-04 rollout) AND
782
+ # the legacy class layout, in that order.
783
+ try:
784
+ page.wait_for_selector(
785
+ "[data-sdui-screen*='SearchResultsContent'], "
786
+ "div.search-results-container, "
787
+ "main[aria-label*='Search'], "
788
+ "div.feed-shared-update-v2",
789
+ timeout=10000,
790
+ )
791
+ except Exception:
792
+ pass # extractor will return [] if nothing rendered
793
+
794
+ # Random 2-4s human-pacing delay before reading the DOM. The new
795
+ # SDUI layout streams cards in after the screen container exists;
796
+ # 1-3s sometimes returned 6/8 cards. 2-4s reliably gets 8/8.
797
+ page.wait_for_timeout(random.randint(2000, 4000))
798
+
799
+ cur_url = page.url
800
+ if _is_login_or_checkpoint(cur_url):
801
+ _log_search(query, vertical, ok=False, error="session_invalid")
802
+ return {
803
+ "ok": False,
804
+ "error": "session_invalid",
805
+ "url": cur_url,
806
+ }
807
+ # LinkedIn's anti-automation likes to redirect a refused SERP to
808
+ # https://www.linkedin.com/ (no /login marker). Without this
809
+ # check the extractor would run on the homepage, find nothing,
810
+ # and we'd return ok:true with result_count:0 — masking failure
811
+ # as an empty query. Require landing on the SERP path.
812
+ if not cur_url.startswith(serp_prefix):
813
+ _log_search(query, vertical, ok=False, error="serp_redirected")
814
+ return {
815
+ "ok": False,
816
+ "error": "serp_redirected",
817
+ "url": cur_url,
818
+ }
819
+
820
+ raw = page.evaluate(_SEARCH_JS_BY_VERTICAL[vertical])
821
+ try:
822
+ results = json.loads(raw or "[]")
823
+ except json.JSONDecodeError:
824
+ results = []
825
+
826
+ # Author exclusion (ALL verticals). Drop hard-excluded authors
827
+ # (config.json exclusions + author_blocklist, slug-keyed) before the
828
+ # Phase A picker, scoring, or the comment_gated logic can see them.
829
+ # Slug is the reliable key; display-name matches are intentionally
830
+ # soft (many real namesakes), so only "hard" verdicts drop here.
831
+ # The helper is fail-open, so a blocklist-API hiccup can't wedge
832
+ # discovery.
833
+ _excl = load_exclusions()
834
+ before_excl = len(results)
835
+ results = [
836
+ r for r in results
837
+ if classify_author(
838
+ r.get("author_name"), r.get("author_profile_url"), _excl
839
+ )[0] != "hard"
840
+ ]
841
+ dropped_excluded = before_excl - len(results)
842
+ if dropped_excluded:
843
+ print(
844
+ f"[discover_linkedin_candidates] dropped_excluded="
845
+ f"{dropped_excluded} (author on exclusion list)",
846
+ file=sys.stderr,
847
+ )
848
+
849
+ dropped_comment_gated = 0
850
+ if vertical == "content":
851
+ # Programmatic comment-gate pre-filter (Phase A). Posts whose
852
+ # author restricted commenting to connections-only are
853
+ # uncommentable by a 3rd+ degree account: the comment editor
854
+ # never renders and a full compose/post cycle ends in
855
+ # rejected_by_platform. Before this filter ~28% of posts that
856
+ # reached the comment stage died this way (35/123 since the
857
+ # 2026-05-29 harness migration). The gate is visible in the
858
+ # scraped card chrome, so we drop these here and never spend a
859
+ # Phase B cycle on them. The like-at-comment-time backstop in
860
+ # the posting agent still catches gates not shown in the card.
861
+ before = len(results)
862
+ results = [r for r in results if not r.get("comment_gated")]
863
+ dropped_comment_gated = before - len(results)
864
+ for r in results:
865
+ velocity, virality, age_clamped = calculate_velocity_score(r)
866
+ r["engagement_velocity"] = velocity
867
+ r["velocity_score"] = virality
868
+ r["age_hours_clamped"] = age_clamped
869
+ # Twitter model: rank, never drop. Sort by velocity_score DESC
870
+ # so the Phase A picker sees the strongest candidates first and
871
+ # takes from the top; weak cards stay eligible as fallback so
872
+ # quiet topics still yield a comment instead of zero-posting.
873
+ results.sort(key=lambda x: x.get("velocity_score") or 0, reverse=True)
874
+
875
+ _log_search(query, vertical, ok=True, error=None)
876
+ return {
877
+ "ok": True,
878
+ "url": cur_url,
879
+ "vertical": vertical,
880
+ "query": query,
881
+ "result_count": len(results),
882
+ "dropped_below_virality_floor": 0,
883
+ "dropped_comment_gated": dropped_comment_gated,
884
+ "dropped_excluded": dropped_excluded,
885
+ "virality_floor": None,
886
+ "results": results,
887
+ "rate_budget": {
888
+ "daily_used": rate.get("daily_used"),
889
+ "daily_cap": None,
890
+ "monthly_used": rate.get("monthly_used"),
891
+ "monthly_cap": None,
892
+ },
893
+ }
894
+
895
+ finally:
896
+ # Close ONLY a page WE created, never the context or the browser.
897
+ # The MCP keeps owning the Chrome instance and its existing pages.
898
+ # A reused tab is left open so the next consumer can reuse it.
899
+ try:
900
+ if page is not None and not _reused_page:
901
+ page.close()
902
+ except Exception:
903
+ pass
904
+ try:
905
+ browser.disconnect()
906
+ except Exception:
907
+ pass
908
+
909
+
910
+ def search_with_retry(vertical: str, query: str, max_attempts: int = 2) -> dict:
911
+ """One retry on transient browser-target failures only. Do NOT retry on
912
+ session_invalid / mcp_not_running / serp_redirected."""
913
+ last_result: dict = {"ok": False, "error": "no_attempts"}
914
+ for attempt in range(1, max_attempts + 1):
915
+ try:
916
+ result = search(vertical, query)
917
+ except Exception as e:
918
+ result = {
919
+ "ok": False,
920
+ "error": "exception",
921
+ "detail": f"{type(e).__name__}: {e}",
922
+ "attempt": attempt,
923
+ }
924
+ last_result = result
925
+ err = (result.get("error") or "").lower()
926
+ detail = (result.get("detail") or "").lower()
927
+ transient = (
928
+ "targetclosed" in detail
929
+ or "target page" in detail
930
+ or "browser has been closed" in detail
931
+ or err == "navigation_failed"
932
+ )
933
+ if result.get("ok") or not transient or attempt >= max_attempts:
934
+ if attempt > 1:
935
+ result["retry_attempt"] = attempt
936
+ return result
937
+ print(
938
+ f"[discover_linkedin_candidates] transient failure attempt "
939
+ f"{attempt}: {result.get('detail') or result.get('error')}; "
940
+ f"retrying...",
941
+ file=sys.stderr,
942
+ )
943
+ time.sleep(2)
944
+ return last_result
945
+
946
+
947
+ def main():
948
+ # Guard: only authorized pipelines may invoke this helper. Other Claude
949
+ # subprocess planners auto-load CLAUDE.md as system context, see this
950
+ # helper documented there, and have wandered off-task to "smoke test"
951
+ # it — racing the linkedin profile's SingletonLock and triggering
952
+ # server-side session invalidation. The legitimate caller sets the
953
+ # matching env var immediately before invoking; nothing else does.
954
+ if os.environ.get("SOCIAL_AUTOPOSTER_LINKEDIN_SEARCH") != "1":
955
+ print(
956
+ json.dumps({
957
+ "ok": False,
958
+ "error": "unauthorized_caller",
959
+ "detail": (
960
+ "discover_linkedin_candidates.py is invoked only by the "
961
+ "run-linkedin Phase A discovery pipeline. Set "
962
+ "SOCIAL_AUTOPOSTER_LINKEDIN_SEARCH=1 from the caller if "
963
+ "this invocation is legitimate."
964
+ ),
965
+ }),
966
+ file=sys.stderr,
967
+ )
968
+ sys.exit(2)
969
+ if len(sys.argv) < 3:
970
+ print(
971
+ "Usage: discover_linkedin_candidates.py "
972
+ "<people|content|companies> <query>",
973
+ file=sys.stderr,
974
+ )
975
+ sys.exit(2)
976
+ vertical = sys.argv[1]
977
+ query = " ".join(sys.argv[2:])
978
+ result = search_with_retry(vertical, query)
979
+ print(json.dumps(result, indent=2))
980
+ sys.exit(0 if result.get("ok") else 1)
981
+
982
+
983
+ if __name__ == "__main__":
984
+ main()