@m13v/s4l 1.6.197-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +143 -0
- package/SKILL.md +342 -0
- package/bin/cli.js +980 -0
- package/bin/cookie-helper.js +315 -0
- package/bin/platform.js +59 -0
- package/bin/scheduler/index.js +12 -0
- package/bin/scheduler/launchd.js +518 -0
- package/browser-agent-configs/all-agents-mcp.json +68 -0
- package/browser-agent-configs/linkedin-agent-mcp.json +16 -0
- package/browser-agent-configs/linkedin-agent.json +17 -0
- package/browser-agent-configs/linkedin-harness-mcp.json +21 -0
- package/browser-agent-configs/reddit-agent-mcp.json +16 -0
- package/browser-agent-configs/reddit-agent.json +17 -0
- package/browser-agent-configs/twitter-harness-mcp.json +18 -0
- package/config.example.json +45 -0
- package/mcp/dist/index.js +4212 -0
- package/mcp/dist/onboarding.js +200 -0
- package/mcp/dist/panel.html +176 -0
- package/mcp/dist/product-link.html +102 -0
- package/mcp/dist/repo.js +222 -0
- package/mcp/dist/runtime.js +1079 -0
- package/mcp/dist/screencast.js +323 -0
- package/mcp/dist/setup.js +545 -0
- package/mcp/dist/telemetry.js +306 -0
- package/mcp/dist/twitterAuth.js +138 -0
- package/mcp/dist/version.js +271 -0
- package/mcp/dist/version.json +4 -0
- package/mcp/install-runtime.mjs +70 -0
- package/mcp/install.mjs +169 -0
- package/mcp/manifest.json +80 -0
- package/mcp/menubar/dashboard_server.py +213 -0
- package/mcp/menubar/s4l_card.py +1336 -0
- package/mcp/menubar/s4l_log_relay.py +179 -0
- package/mcp/menubar/s4l_menubar.py +2439 -0
- package/mcp/menubar/s4l_state.py +891 -0
- package/mcp/package.json +34 -0
- package/mcp/shared/doctor.cjs +437 -0
- package/mcp/shared/onboarding-ledger.cjs +324 -0
- package/mcp-servers/browser-harness/server.py +968 -0
- package/package.json +160 -0
- package/requirements.txt +20 -0
- package/scripts/_compute_allowlist.py +58 -0
- package/scripts/_db_update.py +20 -0
- package/scripts/_filt.py +9 -0
- package/scripts/_li_notif_match.py +76 -0
- package/scripts/_li_notif_orchestrate.py +126 -0
- package/scripts/_lock_preempt_test.py +60 -0
- package/scripts/_run_icp_precheck.py +57 -0
- package/scripts/a16z_pearx_calendar_reminders.py +99 -0
- package/scripts/account_resolver.py +141 -0
- package/scripts/active_campaigns.py +114 -0
- package/scripts/active_users.py +190 -0
- package/scripts/amplitude_24h_signups.py +468 -0
- package/scripts/amplitude_signups.py +177 -0
- package/scripts/apply_onboarding_selections.py +131 -0
- package/scripts/audience_pages.py +243 -0
- package/scripts/audit_helper.py +120 -0
- package/scripts/author_history_block.py +353 -0
- package/scripts/autopilot_stall_watch.py +284 -0
- package/scripts/backfill_twitter_attempts_topic.py +81 -0
- package/scripts/backfill_twitter_log_post_no_id.py +322 -0
- package/scripts/bench_dashboard.sh +138 -0
- package/scripts/bh_send.py +39 -0
- package/scripts/build_persona.py +409 -0
- package/scripts/bulk_icp.py +18 -0
- package/scripts/campaign_bump.py +51 -0
- package/scripts/capture_thread_media.py +288 -0
- package/scripts/check_browser_lock_health.sh +81 -0
- package/scripts/check_external_pool_depth.py +253 -0
- package/scripts/check_unread_web_chats.py +28 -0
- package/scripts/claim_web_chat.py +47 -0
- package/scripts/classify_run_error.py +158 -0
- package/scripts/claude_job.py +988 -0
- package/scripts/clean_stale_singleton.sh +56 -0
- package/scripts/cleanup_harness_tabs.py +68 -0
- package/scripts/copy_browser_cookies.py +454 -0
- package/scripts/counterparty_history.py +350 -0
- package/scripts/db.py +57 -0
- package/scripts/discover_claude_profiles.py +120 -0
- package/scripts/discover_linkedin_candidates.py +984 -0
- package/scripts/dm_conversation.py +682 -0
- package/scripts/dm_db_update.py +69 -0
- package/scripts/dm_engage_helper.py +161 -0
- package/scripts/dm_outreach_helper.py +147 -0
- package/scripts/dm_outreach_twitter_helper.py +129 -0
- package/scripts/dm_send_log.py +106 -0
- package/scripts/dm_short_links.py +1084 -0
- package/scripts/dump_web_chat_history.py +47 -0
- package/scripts/engage_github.py +640 -0
- package/scripts/engage_reddit.py +1235 -0
- package/scripts/engage_twitter_helper.py +301 -0
- package/scripts/engagement_styles.py +1787 -0
- package/scripts/enrich_twitter_candidates.py +82 -0
- package/scripts/feedback_digest.py +448 -0
- package/scripts/fetch_prospect_profile.py +312 -0
- package/scripts/fetch_twitter_t1.py +134 -0
- package/scripts/find_threads.py +530 -0
- package/scripts/follow_gate_log.py +59 -0
- package/scripts/funnel_per_day.py +194 -0
- package/scripts/generate_daily_human_style.py +494 -0
- package/scripts/generation_trace.py +173 -0
- package/scripts/get_run_cost.py +107 -0
- package/scripts/github_engage_helper.py +93 -0
- package/scripts/github_tools.py +509 -0
- package/scripts/harness_overlay.py +556 -0
- package/scripts/harvest_twitter_following.py +243 -0
- package/scripts/heartbeat.sh +70 -0
- package/scripts/history_context.py +284 -0
- package/scripts/http_api.py +206 -0
- package/scripts/human_dm_replies_helper.py +169 -0
- package/scripts/identity.py +302 -0
- package/scripts/ig_batch_creator.sh +93 -0
- package/scripts/ig_post_type_picker.py +243 -0
- package/scripts/ig_scrape_transcribe.sh +91 -0
- package/scripts/ingest_human_dm_replies.py +271 -0
- package/scripts/ingest_web_chat_replies.py +229 -0
- package/scripts/install_fleet.py +187 -0
- package/scripts/invent_mcp_server.py +350 -0
- package/scripts/invent_topics.py +1462 -0
- package/scripts/learned_preferences.py +263 -0
- package/scripts/li_discovery.py +161 -0
- package/scripts/link_edit_helper.py +142 -0
- package/scripts/link_tail.py +592 -0
- package/scripts/linkedin_api.py +561 -0
- package/scripts/linkedin_browser.py +730 -0
- package/scripts/linkedin_cooldown.py +128 -0
- package/scripts/linkedin_exclusions.py +234 -0
- package/scripts/linkedin_killswitch.py +1333 -0
- package/scripts/linkedin_search_topic_schema.py +49 -0
- package/scripts/linkedin_unipile.py +658 -0
- package/scripts/linkedin_url.py +228 -0
- package/scripts/log_claude_session.py +636 -0
- package/scripts/log_draft.py +143 -0
- package/scripts/log_linkedin_search_attempts.py +126 -0
- package/scripts/log_post.py +651 -0
- package/scripts/log_run.py +364 -0
- package/scripts/log_thread_media.py +108 -0
- package/scripts/log_twitter_search_attempts.py +150 -0
- package/scripts/log_twitter_skips.py +211 -0
- package/scripts/lookup_post.py +78 -0
- package/scripts/mark_web_chat_processed.py +32 -0
- package/scripts/mcp_lock_proxy.py +370 -0
- package/scripts/memory_snapshot.py +972 -0
- package/scripts/merge_review_queue.py +215 -0
- package/scripts/mint_external_pool.py +182 -0
- package/scripts/mint_kent_pool.py +249 -0
- package/scripts/moltbook_post.py +320 -0
- package/scripts/moltbook_tools.py +159 -0
- package/scripts/pending_threads.py +188 -0
- package/scripts/pick_ig_account.py +177 -0
- package/scripts/pick_project.py +208 -0
- package/scripts/pick_search_topic.py +771 -0
- package/scripts/pick_thread_target.py +279 -0
- package/scripts/pick_twitter_thread_target.py +202 -0
- package/scripts/podlog_fetch_batch.sh +32 -0
- package/scripts/post_github.py +1311 -0
- package/scripts/post_reddit.py +2668 -0
- package/scripts/precompute_dashboard_stats.py +204 -0
- package/scripts/preflight.sh +297 -0
- package/scripts/progress.py +88 -0
- package/scripts/project_excludes.py +353 -0
- package/scripts/project_slugs.py +91 -0
- package/scripts/project_stats.py +241 -0
- package/scripts/project_stats_json.py +1563 -0
- package/scripts/project_topics.py +192 -0
- package/scripts/qualified_query_bank.py +436 -0
- package/scripts/reap_stale_claude_sessions.py +867 -0
- package/scripts/reddit_browser.py +2549 -0
- package/scripts/reddit_browser_fetch.py +141 -0
- package/scripts/reddit_browser_lock.py +593 -0
- package/scripts/reddit_chat_sync.py +710 -0
- package/scripts/reddit_query_bank.py +200 -0
- package/scripts/reddit_threads_helper.py +151 -0
- package/scripts/reddit_tools.py +956 -0
- package/scripts/refresh_instagram_tokens.py +280 -0
- package/scripts/release-mcpb.sh +513 -0
- package/scripts/reply_db.py +334 -0
- package/scripts/reply_insert.py +98 -0
- package/scripts/reply_risk_digest.py +761 -0
- package/scripts/reset-test-machine.sh +602 -0
- package/scripts/restore_twitter_session.py +177 -0
- package/scripts/ripen_reddit_plan.py +478 -0
- package/scripts/run_claude.sh +433 -0
- package/scripts/run_moltbook_cycle.py +555 -0
- package/scripts/s4l_box_update.sh +226 -0
- package/scripts/s4l_channel.py +103 -0
- package/scripts/s4l_ctl.sh +75 -0
- package/scripts/s4l_env.py +47 -0
- package/scripts/saps_activity.py +126 -0
- package/scripts/saps_mode.py +328 -0
- package/scripts/scan_dm_candidates.py +580 -0
- package/scripts/scan_github_replies.py +168 -0
- package/scripts/scan_instagram_comments.py +481 -0
- package/scripts/scan_moltbook_replies.py +252 -0
- package/scripts/scan_pii.py +190 -0
- package/scripts/scan_reddit_replies.py +377 -0
- package/scripts/scan_twitter_mentions_browser.py +327 -0
- package/scripts/scan_twitter_thread_followups.py +299 -0
- package/scripts/scan_x_profile.py +384 -0
- package/scripts/schedule_state.py +202 -0
- package/scripts/scheduled_tasks_snapshot.py +123 -0
- package/scripts/score_linkedin_candidates.py +419 -0
- package/scripts/score_twitter_candidates.py +718 -0
- package/scripts/scrape_linkedin_comment_stats.py +1755 -0
- package/scripts/scrape_linkedin_stats_browser.py +52 -0
- package/scripts/scrape_reddit_views.py +365 -0
- package/scripts/seed_search_queries.py +453 -0
- package/scripts/seed_search_topics.py +127 -0
- package/scripts/send_web_chat_reply.py +130 -0
- package/scripts/sentry_init.py +128 -0
- package/scripts/setup_twitter_auth.py +1320 -0
- package/scripts/snapshot.py +583 -0
- package/scripts/stats.py +2702 -0
- package/scripts/stats_helper.py +52 -0
- package/scripts/strike_alert.py +783 -0
- package/scripts/sweep_post_link_clicks.py +107 -0
- package/scripts/sync_ig_to_posts.py +147 -0
- package/scripts/test_browser_lock.py +189 -0
- package/scripts/test_installation_api.sh +52 -0
- package/scripts/test_percard_posting.py +142 -0
- package/scripts/top_dud_linkedin_queries.py +71 -0
- package/scripts/top_dud_reddit_queries.py +67 -0
- package/scripts/top_dud_twitter_queries.py +71 -0
- package/scripts/top_dud_twitter_topics.py +102 -0
- package/scripts/top_linkedin_queries.py +55 -0
- package/scripts/top_omitted_reddit_topics.py +91 -0
- package/scripts/top_performers.py +588 -0
- package/scripts/top_search_topics.py +180 -0
- package/scripts/top_twitter_queries.py +190 -0
- package/scripts/twitter_access_check.py +382 -0
- package/scripts/twitter_account.py +41 -0
- package/scripts/twitter_batch_phase.py +126 -0
- package/scripts/twitter_browser.py +2804 -0
- package/scripts/twitter_cookie_mirror.py +130 -0
- package/scripts/twitter_cycle_helper.py +310 -0
- package/scripts/twitter_gen_links.py +287 -0
- package/scripts/twitter_post_plan.py +1188 -0
- package/scripts/twitter_scan.py +324 -0
- package/scripts/twitter_supply_signal.py +57 -0
- package/scripts/twitter_threads_helper.py +152 -0
- package/scripts/unclaim_web_chat.py +29 -0
- package/scripts/update_instagram_stats.py +261 -0
- package/scripts/update_linkedin_stats_from_feed.py +328 -0
- package/scripts/version.py +72 -0
- package/scripts/watchdog_hung_runs.py +343 -0
- package/scripts/write_generation_trace.py +73 -0
- package/setup/SKILL.md +277 -0
- package/skill/amplitude-24h-signups.sh +38 -0
- package/skill/archive-old-logs.sh +40 -0
- package/skill/audit-dm-staleness.sh +42 -0
- package/skill/audit-linkedin.sh +14 -0
- package/skill/audit-moltbook.sh +4 -0
- package/skill/audit-reddit-resurrect.sh +67 -0
- package/skill/audit-reddit.sh +4 -0
- package/skill/audit-twitter.sh +4 -0
- package/skill/audit.sh +287 -0
- package/skill/backfill-twitter-attempts-topic.sh +19 -0
- package/skill/backfill-twitter-ghost-posts.sh +24 -0
- package/skill/check-external-pool-depth.sh +7 -0
- package/skill/check-web-chats.sh +203 -0
- package/skill/dm-outreach-linkedin.sh +250 -0
- package/skill/dm-outreach-reddit.sh +274 -0
- package/skill/dm-outreach-twitter.sh +265 -0
- package/skill/engage-dm-replies-linkedin.sh +4 -0
- package/skill/engage-dm-replies-reddit.sh +4 -0
- package/skill/engage-dm-replies-twitter.sh +4 -0
- package/skill/engage-dm-replies.sh +1597 -0
- package/skill/engage-linkedin.sh +581 -0
- package/skill/engage-moltbook.sh +36 -0
- package/skill/engage-reddit.sh +146 -0
- package/skill/engage-twitter.sh +467 -0
- package/skill/github-engage.sh +176 -0
- package/skill/ingest-web-chat-replies.sh +38 -0
- package/skill/invent-supply-test.sh +100 -0
- package/skill/invent-topics.sh +50 -0
- package/skill/lib/linkedin-backend.sh +364 -0
- package/skill/lib/platform.sh +48 -0
- package/skill/lib/reddit-backend.sh +234 -0
- package/skill/lib/twitter-backend.sh +314 -0
- package/skill/link-edit-github.sh +136 -0
- package/skill/link-edit-moltbook.sh +117 -0
- package/skill/link-edit-reddit.sh +201 -0
- package/skill/linkedin-presence.sh +182 -0
- package/skill/linkedin-recovery.sh +282 -0
- package/skill/lock.sh +647 -0
- package/skill/memory-snapshot.sh +39 -0
- package/skill/precompute-stats.sh +35 -0
- package/skill/prewarm-funnel.sh +104 -0
- package/skill/refresh-instagram-tokens.sh +57 -0
- package/skill/refresh-twitter-following.sh +52 -0
- package/skill/reply-risk-digest.sh +31 -0
- package/skill/run-cycle-update-guard.sh +44 -0
- package/skill/run-draft-and-publish.sh +123 -0
- package/skill/run-generate-daily-style.sh +50 -0
- package/skill/run-github-launchd.sh +62 -0
- package/skill/run-github.sh +102 -0
- package/skill/run-instagram-daily.sh +149 -0
- package/skill/run-instagram-render.sh +875 -0
- package/skill/run-linkedin-launchd.sh +81 -0
- package/skill/run-linkedin-unipile.sh +130 -0
- package/skill/run-linkedin.sh +1593 -0
- package/skill/run-moltbook-launchd.sh +61 -0
- package/skill/run-moltbook.sh +38 -0
- package/skill/run-overlay-watch.sh +100 -0
- package/skill/run-reddit-search-launchd.sh +64 -0
- package/skill/run-reddit-search.sh +505 -0
- package/skill/run-reddit-threads-double.sh +32 -0
- package/skill/run-reddit-threads.sh +847 -0
- package/skill/run-scan-moltbook-replies.sh +57 -0
- package/skill/run-twitter-cycle-launchd.sh +63 -0
- package/skill/run-twitter-cycle-singleton.sh +62 -0
- package/skill/run-twitter-cycle.sh +2408 -0
- package/skill/run-twitter-threads.sh +592 -0
- package/skill/scan-instagram-replies.sh +61 -0
- package/skill/scan-twitter-followups.sh +57 -0
- package/skill/social-autoposter-update.sh +66 -0
- package/skill/stats-instagram.sh +72 -0
- package/skill/stats-linkedin.sh +271 -0
- package/skill/stats-moltbook.sh +4 -0
- package/skill/stats-reddit.sh +4 -0
- package/skill/stats-twitter.sh +4 -0
- package/skill/stats.sh +521 -0
- package/skill/strike-alert.sh +18 -0
- package/skill/styles.sh +87 -0
- package/skill/sweep-link-clicks.sh +40 -0
- package/skill/topics.sh +51 -0
|
@@ -0,0 +1,984 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""LinkedIn SERP discovery: read-only Phase A search-page scrape.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python3 discover_linkedin_candidates.py <vertical> <query>
|
|
6
|
+
# vertical = people | content | companies
|
|
7
|
+
|
|
8
|
+
Attaches to the already-running LinkedIn Chrome via CDP and reuses its
|
|
9
|
+
existing BrowserContext. Same Chrome process, same cookies, same UA, same
|
|
10
|
+
fingerprint as whatever LinkedIn already trusts from the live session.
|
|
11
|
+
Opens our own page in that context, navigates to the SERP, runs ONE
|
|
12
|
+
page.evaluate() against the rendered DOM, closes our page, disconnects.
|
|
13
|
+
|
|
14
|
+
CDP endpoint resolution (see _resolve_cdp_url, 2026-05-29):
|
|
15
|
+
Lane 1 (preferred): LINKEDIN_CDP_URL — the browser-harness Chrome on
|
|
16
|
+
:9556, exported by skill/lib/linkedin-backend.sh. The main
|
|
17
|
+
run-linkedin.sh pipeline now drives that harness Chrome
|
|
18
|
+
(mcp__linkedin-harness__bh_run) instead of the retired
|
|
19
|
+
linkedin-agent MCP, so this is the live, logged-in session.
|
|
20
|
+
Lane 2 (legacy): http://localhost:<port> read from the linkedin-agent
|
|
21
|
+
MCP's DevToolsActivePort. Kept for running outside the harness.
|
|
22
|
+
|
|
23
|
+
Read-only DOM scrape: NO Voyager API, NO scroll-and-expand loops, NO
|
|
24
|
+
permalink fan-out, NO clicks/typing, NO programmatic login.
|
|
25
|
+
|
|
26
|
+
Pre-conditions for this to work:
|
|
27
|
+
1. A LinkedIn Chrome is running and reachable via one of the two CDP
|
|
28
|
+
lanes above (normally the harness Chrome on :9556 launched by
|
|
29
|
+
ensure_linkedin_browser_for_backend).
|
|
30
|
+
2. The user is logged in inside that browser. We do NOT log in.
|
|
31
|
+
|
|
32
|
+
Why CDP attach rather than launch_persistent_context: the previous version
|
|
33
|
+
launched its own Chrome against the shared profile dir. When LinkedIn
|
|
34
|
+
redirected the SERP request (UA mismatch / fresh-launch fingerprint) the
|
|
35
|
+
homepage response contained Set-Cookie headers that cleared li_at. On
|
|
36
|
+
context.close() Chrome flushed the cleared cookies to disk, logging the
|
|
37
|
+
shared profile out and breaking unread-dms + the linkedin-agent MCP.
|
|
38
|
+
Attaching to the MCP's running Chrome eliminates the launch fingerprint,
|
|
39
|
+
removes the cookie-flush risk (we never close the context), and keeps the
|
|
40
|
+
profile fully owned by one process at a time.
|
|
41
|
+
|
|
42
|
+
Per CLAUDE.md "LinkedIn: flagged patterns" carve-out (2026-04-29): the
|
|
43
|
+
read-only DOM read is permitted because the request runs inside the same
|
|
44
|
+
Chrome the MCP already drives. The 2026-04-17 restriction was caused by
|
|
45
|
+
Voyager calls + permalink scroll loops, neither of which appear here.
|
|
46
|
+
|
|
47
|
+
Rate-limited against linkedin_browser_searches per the 2026-04-29 research:
|
|
48
|
+
~30s min gap, ~40/day, ~150/month soft cap leaves headroom under LinkedIn's
|
|
49
|
+
~300/month commercial-use wall on free accounts. Fails CLOSED on DB errors:
|
|
50
|
+
if we cannot enforce the budget we do not perform the search.
|
|
51
|
+
|
|
52
|
+
Output (stdout, JSON):
|
|
53
|
+
{
|
|
54
|
+
"ok": true,
|
|
55
|
+
"url": "https://www.linkedin.com/search/results/people/?keywords=...",
|
|
56
|
+
"vertical": "people",
|
|
57
|
+
"query": "founder rag retrieval",
|
|
58
|
+
"result_count": 10,
|
|
59
|
+
"results": [...],
|
|
60
|
+
"rate_budget": {"daily_used": N, "daily_cap": null,
|
|
61
|
+
"monthly_used": N, "monthly_cap": null},
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
Failure shapes:
|
|
65
|
+
{"ok": false, "error": "session_invalid", "url": "..."}
|
|
66
|
+
{"ok": false, "error": "serp_redirected", "url": "..."}
|
|
67
|
+
{"ok": false, "error": "mcp_not_running", "detail": "..."}
|
|
68
|
+
{"ok": false, "error": "cdp_attach_failed", "detail": "..."}
|
|
69
|
+
{"ok": false, "error": "navigation_failed", "detail": "..."}
|
|
70
|
+
{"ok": false, "error": "bad_vertical", "detail": "..."}
|
|
71
|
+
{"ok": false, "error": "empty_query", "detail": ""}
|
|
72
|
+
|
|
73
|
+
Note: rate_limited and db_unavailable are no longer raised. All caps were
|
|
74
|
+
removed 2026-05-01; the script logs to linkedin_browser_searches for
|
|
75
|
+
visibility but never refuses based on volume or recency.
|
|
76
|
+
|
|
77
|
+
Exits 0 on success, 1 on failure.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
import json
|
|
81
|
+
import os
|
|
82
|
+
import random
|
|
83
|
+
import sys
|
|
84
|
+
import time
|
|
85
|
+
import urllib.parse
|
|
86
|
+
from typing import Optional
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _bh_activity_log(action: str, cdp_url: str) -> None:
|
|
90
|
+
"""Append to the universal browser-activity.log (Python-CDP path coverage)."""
|
|
91
|
+
try:
|
|
92
|
+
import time as _t
|
|
93
|
+
import os as _o
|
|
94
|
+
from pathlib import Path as _P
|
|
95
|
+
_p = _P(_o.environ.get(
|
|
96
|
+
"BH_ACTIVITY_LOG",
|
|
97
|
+
str(_P.home() / ".claude" / "browser-profiles" / "browser-activity.log"),
|
|
98
|
+
))
|
|
99
|
+
_port = (cdp_url or "").rsplit(":", 1)[-1].split("/")[0] or "-"
|
|
100
|
+
_p.parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
with _p.open("a") as _f:
|
|
102
|
+
_f.write(
|
|
103
|
+
f"[{_t.strftime('%Y-%m-%d %H:%M:%S')}] pycdp "
|
|
104
|
+
f"script={_o.path.basename(__file__)} action={action} "
|
|
105
|
+
f"pid={_o.getpid()} ppid={_o.getppid()} cdp={cdp_url or '-'} "
|
|
106
|
+
f"port={_port}\n"
|
|
107
|
+
)
|
|
108
|
+
except Exception:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
# Reuse the lock helper + login-URL detector from linkedin_browser. We share
|
|
112
|
+
# the lock so concurrent Python helpers (search vs unread-dms) serialize on
|
|
113
|
+
# the same ~/.claude/linkedin-agent-lock.json. PROFILE_DIR also points at
|
|
114
|
+
# the directory where the linkedin-agent MCP writes DevToolsActivePort.
|
|
115
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
116
|
+
from linkedin_browser import ( # noqa: E402
|
|
117
|
+
PROFILE_DIR,
|
|
118
|
+
_acquire_browser_lock,
|
|
119
|
+
_is_login_or_checkpoint,
|
|
120
|
+
)
|
|
121
|
+
from score_linkedin_candidates import calculate_velocity_score # noqa: E402
|
|
122
|
+
try: # author exclusion is fail-open: never let it break discovery
|
|
123
|
+
from linkedin_exclusions import load_exclusions, classify_author # noqa: E402
|
|
124
|
+
except Exception: # pragma: no cover - helper missing -> exclusion becomes a no-op
|
|
125
|
+
def load_exclusions(platform="linkedin"):
|
|
126
|
+
return {"hard_slugs": set(), "soft_slugs": set(), "soft_names": set()}
|
|
127
|
+
|
|
128
|
+
def classify_author(author_name, author_profile_url, excl=None):
|
|
129
|
+
return None, ""
|
|
130
|
+
from http_api import api_get, api_post # noqa: E402
|
|
131
|
+
|
|
132
|
+
DEVTOOLS_ACTIVE_PORT = os.path.join(PROFILE_DIR, "DevToolsActivePort")
|
|
133
|
+
|
|
134
|
+
# Virality (velocity * reach_mult * age_decay * (1 + disc_bonus); see
|
|
135
|
+
# score_linkedin_candidates.calculate_velocity_score) is a RANKING signal,
|
|
136
|
+
# not a cutoff. Aligned with the Twitter model (2026-05-29): score every
|
|
137
|
+
# SERP card, sort by velocity_score DESC so the Phase A picker sees the
|
|
138
|
+
# strongest candidates first, and NEVER drop a card on virality. The picker
|
|
139
|
+
# prompt already steers toward the top of the sorted list and leans toward
|
|
140
|
+
# posting, so the old hard floor only caused zero-post cycles on quiet
|
|
141
|
+
# topics where every card scored low (e.g. niche backend-dev SERPs). Twitter
|
|
142
|
+
# (score_twitter_candidates.py) keeps every candidate the same way: "no cap,
|
|
143
|
+
# no cutoff: this only ever raises a score, never removes a candidate."
|
|
144
|
+
|
|
145
|
+
# Search rate-limit budget removed 2026-05-01 per user instruction. The
|
|
146
|
+
# linkedin_browser_searches table is kept so daily/monthly volumes remain
|
|
147
|
+
# observable, but no min-gap, daily, or monthly cap is enforced. Caller is
|
|
148
|
+
# responsible for cadence. The 2026-04-17 LinkedIn restriction (see CLAUDE.md
|
|
149
|
+
# "LinkedIn: flagged patterns") came from behavioral fingerprinting, not raw
|
|
150
|
+
# volume, so volume caps weren't the load-bearing protection anyway — but
|
|
151
|
+
# back-to-back machine-cadence search hits are now structurally possible
|
|
152
|
+
# from this script.
|
|
153
|
+
SEARCH_VERTICALS = ("people", "content", "companies")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _check_rate_limit() -> dict:
|
|
157
|
+
"""Always returns ok=True. Caps removed 2026-05-01 per user instruction.
|
|
158
|
+
|
|
159
|
+
Reads current daily/monthly volume so the response shape keeps the
|
|
160
|
+
rate_budget block populated for the dashboard. A failure here is
|
|
161
|
+
non-fatal — the search proceeds anyway since there's no cap to enforce.
|
|
162
|
+
|
|
163
|
+
Migrated 2026-06-01 from a direct DB read (+ first-use CREATE TABLE) to
|
|
164
|
+
GET /api/v1/linkedin-browser-searches. The table now lives in the
|
|
165
|
+
social-autoposter-website schema; no client-side DDL needed.
|
|
166
|
+
"""
|
|
167
|
+
daily = monthly = 0
|
|
168
|
+
try:
|
|
169
|
+
resp = api_get("/api/v1/linkedin-browser-searches")
|
|
170
|
+
data = resp.get("data") or {}
|
|
171
|
+
daily = int(data.get("daily_used") or 0)
|
|
172
|
+
monthly = int(data.get("monthly_used") or 0)
|
|
173
|
+
except (Exception, SystemExit):
|
|
174
|
+
# API down? Not our problem — caps are off, search proceeds.
|
|
175
|
+
pass
|
|
176
|
+
return {"ok": True, "daily_used": daily, "monthly_used": monthly}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _log_search(query: str, vertical: str, ok: bool, error: Optional[str]) -> None:
|
|
180
|
+
"""Best-effort write of one row to linkedin_browser_searches.
|
|
181
|
+
|
|
182
|
+
Never raises: a failed log must not turn a successful search into a
|
|
183
|
+
failure. Note that if logging fails the next rate-limit check will
|
|
184
|
+
under-count. Acceptable: the monthly cap has 50% headroom under the
|
|
185
|
+
actual wall.
|
|
186
|
+
|
|
187
|
+
Migrated 2026-06-01 to POST /api/v1/linkedin-browser-searches. SystemExit
|
|
188
|
+
(raised by http_api on 4xx / exhausted retries) is swallowed too, so a
|
|
189
|
+
logging failure can never bubble up and fail a real search.
|
|
190
|
+
"""
|
|
191
|
+
try:
|
|
192
|
+
api_post(
|
|
193
|
+
"/api/v1/linkedin-browser-searches",
|
|
194
|
+
{"query": query, "vertical": vertical, "ok": ok, "error": error},
|
|
195
|
+
)
|
|
196
|
+
except (Exception, SystemExit) as e:
|
|
197
|
+
print(
|
|
198
|
+
f"[discover_linkedin_candidates] _log_search: post failed: {e}",
|
|
199
|
+
file=sys.stderr,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# DOM extractors per vertical. Each is a single querySelectorAll + map, with
|
|
204
|
+
# multiple selector fallbacks because LinkedIn's class names rotate. Returns
|
|
205
|
+
# JSON.stringify(...) so the Python side can json.loads regardless of how the
|
|
206
|
+
# evaluate channel marshals nested objects. Limit to the first 25 cards on the
|
|
207
|
+
# page — anything beyond that requires scrolling, which we explicitly do not
|
|
208
|
+
# do.
|
|
209
|
+
#
|
|
210
|
+
# Provenance:
|
|
211
|
+
# _SEARCH_JS_CONTENT — lifted from skill/run-linkedin.sh (production-tested).
|
|
212
|
+
# _SEARCH_JS_PEOPLE — UNVERIFIED. Selectors based on widely-documented
|
|
213
|
+
# LinkedIn patterns + multiple fallbacks. Smoke-test
|
|
214
|
+
# against a real SERP before relying on the output.
|
|
215
|
+
# _SEARCH_JS_COMPANIES — UNVERIFIED. Same caveat as people.
|
|
216
|
+
#
|
|
217
|
+
# Reconciliation procedure for the UNVERIFIED extractors (do this once,
|
|
218
|
+
# then update this block to mark them VERIFIED with the date):
|
|
219
|
+
# Preconditions:
|
|
220
|
+
# - linkedin-agent has been idle >= 1 hour. Check
|
|
221
|
+
# ~/.playwright-mcp/linkedin-agent/page-*.yml mtimes.
|
|
222
|
+
# - The persistent profile is logged in. Do NOT trigger a probe like
|
|
223
|
+
# "navigate to LinkedIn and tell me what you see" — that prompt itself
|
|
224
|
+
# is the high-risk behavior that invalidated cookies on 2026-04-29.
|
|
225
|
+
# If the session is dead, wait for the next normal pipeline cycle to
|
|
226
|
+
# re-auth, then resume reconciliation in a fresh hour.
|
|
227
|
+
# Steps (use the linkedin-agent MCP, NOT this script — the script logs to
|
|
228
|
+
# linkedin_browser_searches and burns the rate budget for nothing):
|
|
229
|
+
# 1. mcp__linkedin-agent__browser_navigate to
|
|
230
|
+
# https://www.linkedin.com/search/results/people/?keywords=founder%20ai
|
|
231
|
+
# 2. mcp__linkedin-agent__browser_evaluate, paste _SEARCH_JS_PEOPLE
|
|
232
|
+
# verbatim (including the JSON.stringify wrap). JSON.parse the
|
|
233
|
+
# returned string.
|
|
234
|
+
# Accept criterion: >= 5 entries with non-empty name AND profile_url.
|
|
235
|
+
# Reject: [] or rows with all-empty fields → snapshot the page,
|
|
236
|
+
# find the live card class names, patch the querySelectorAll lists.
|
|
237
|
+
# Keep existing fallback selectors at the END of each list to stay
|
|
238
|
+
# compatible with the older layout.
|
|
239
|
+
# 3. Repeat for /search/results/companies/?keywords=founder%20ai with
|
|
240
|
+
# _SEARCH_JS_COMPANIES. Same accept criterion (>= 5 cards with
|
|
241
|
+
# company AND company_url).
|
|
242
|
+
# Hard limits during reconciliation: 2 navigations total, no
|
|
243
|
+
# close-and-reopen of the agent, no scroll, no clicks. Anything more is
|
|
244
|
+
# the same fingerprint pattern that triggered the 2026-04-29 lockouts.
|
|
245
|
+
_SEARCH_JS_PEOPLE = r"""
|
|
246
|
+
() => {
|
|
247
|
+
const out = [];
|
|
248
|
+
const cards = document.querySelectorAll(
|
|
249
|
+
"div.search-results-container li div.entity-result, "
|
|
250
|
+
+ "li.reusable-search__result-container, "
|
|
251
|
+
+ "[data-chameleon-result-urn]"
|
|
252
|
+
);
|
|
253
|
+
for (const c of Array.from(cards).slice(0, 25)) {
|
|
254
|
+
const link = c.querySelector(
|
|
255
|
+
"a[href*='/in/'].app-aware-link, a[href*='/in/']"
|
|
256
|
+
);
|
|
257
|
+
const profileUrl = link
|
|
258
|
+
? (link.href || link.getAttribute("href") || "")
|
|
259
|
+
: "";
|
|
260
|
+
const nameEl = c.querySelector(
|
|
261
|
+
".entity-result__title-text, .entity-result__title-line, "
|
|
262
|
+
+ "span[aria-hidden='true']"
|
|
263
|
+
);
|
|
264
|
+
const name = nameEl ? (nameEl.textContent || "").trim() : "";
|
|
265
|
+
const headlineEl = c.querySelector(
|
|
266
|
+
".entity-result__primary-subtitle, .t-14.t-black.t-normal"
|
|
267
|
+
);
|
|
268
|
+
const headline = headlineEl
|
|
269
|
+
? (headlineEl.textContent || "").trim() : "";
|
|
270
|
+
const locEl = c.querySelector(
|
|
271
|
+
".entity-result__secondary-subtitle, .t-14.t-normal"
|
|
272
|
+
);
|
|
273
|
+
const location = locEl ? (locEl.textContent || "").trim() : "";
|
|
274
|
+
if (!name && !profileUrl) continue;
|
|
275
|
+
out.push({
|
|
276
|
+
name: name.replace(/\s+/g, " "),
|
|
277
|
+
headline: headline.replace(/\s+/g, " "),
|
|
278
|
+
location: location.replace(/\s+/g, " "),
|
|
279
|
+
profile_url: profileUrl.split("?")[0],
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
return JSON.stringify(out);
|
|
283
|
+
}
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
# Content-search extractor. Two layouts coexist in the wild:
|
|
287
|
+
#
|
|
288
|
+
# New SDUI layout (post 2026-04-30 reconciliation): obfuscated class names,
|
|
289
|
+
# results wrapped in [data-sdui-screen*="SearchResultsContent"], each card
|
|
290
|
+
# [role="listitem"][componentkey]. The activity URN is GONE from the DOM
|
|
291
|
+
# for most cards: only cards that embed a quoted/reposted share keep a
|
|
292
|
+
# visible /feed/update/<urn> link. So post_url/activity_id can legitimately
|
|
293
|
+
# be null on the new layout — callers must dedupe by
|
|
294
|
+
# (author_profile_url, post_text hash) when activity_id is missing.
|
|
295
|
+
#
|
|
296
|
+
# Legacy class layout (pre-rollout, may still appear): div.feed-shared-update-v2
|
|
297
|
+
# / div[data-urn=...] cards with full URNs.
|
|
298
|
+
#
|
|
299
|
+
# Tries the new layout first, falls back to legacy, returns the same shape
|
|
300
|
+
# either way. Verified 2026-04-30 against
|
|
301
|
+
# /search/results/content/?keywords=ai%20agent%20founder
|
|
302
|
+
# 8/8 cards extracted (author_name + author_profile_url + post_text + age_text);
|
|
303
|
+
# 1/8 had activity_id (the only embedded-share case).
|
|
304
|
+
_SEARCH_JS_CONTENT = r"""
|
|
305
|
+
() => {
|
|
306
|
+
const out = [];
|
|
307
|
+
|
|
308
|
+
function parseRelativeAge(txt) {
|
|
309
|
+
if (!txt) return null;
|
|
310
|
+
const m = txt.match(/(\d+)\s*(s|min|m|hr|h|d|w|mo|y)\b/i);
|
|
311
|
+
if (!m) return null;
|
|
312
|
+
const n = parseInt(m[1], 10);
|
|
313
|
+
let u = m[2].toLowerCase();
|
|
314
|
+
if (u === 'hr') u = 'h';
|
|
315
|
+
if (u === 'min') u = 'm';
|
|
316
|
+
const map = { s: 1/3600, m: 1/60, h: 1, d: 24, w: 24*7, mo: 24*30, y: 24*365 };
|
|
317
|
+
return n * (map[u] || 0);
|
|
318
|
+
}
|
|
319
|
+
function parseCount(txt) {
|
|
320
|
+
if (!txt) return 0;
|
|
321
|
+
const t = String(txt).replace(/,/g, '').trim();
|
|
322
|
+
const m = t.match(/([\d.]+)\s*([KkMm]?)/);
|
|
323
|
+
if (!m) return 0;
|
|
324
|
+
const n = parseFloat(m[1]);
|
|
325
|
+
const u = (m[2] || '').toLowerCase();
|
|
326
|
+
return Math.round(n * (u === 'k' ? 1000 : u === 'm' ? 1_000_000 : 1));
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// 1. New SDUI layout.
|
|
330
|
+
let items = [];
|
|
331
|
+
const screen = document.querySelector('[data-sdui-screen*="SearchResultsContent"]');
|
|
332
|
+
if (screen) {
|
|
333
|
+
items = Array.from(screen.querySelectorAll('[role="listitem"][componentkey]'));
|
|
334
|
+
}
|
|
335
|
+
// 2. Legacy fallback.
|
|
336
|
+
if (items.length === 0) {
|
|
337
|
+
items = Array.from(document.querySelectorAll(
|
|
338
|
+
'div.feed-shared-update-v2, '
|
|
339
|
+
+ 'div[data-urn*="urn:li:activity"], '
|
|
340
|
+
+ 'div[data-urn*="urn:li:share"], '
|
|
341
|
+
+ 'div[data-urn*="urn:li:ugcPost"]'
|
|
342
|
+
));
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
const seen = new Set();
|
|
346
|
+
const urnRe = /urn:li:(activity|share|ugcPost):(\d{16,19})/;
|
|
347
|
+
const urnReG = /urn:li:(activity|share|ugcPost):(\d{16,19})/g;
|
|
348
|
+
|
|
349
|
+
for (const item of items.slice(0, 25)) {
|
|
350
|
+
let urnType = null, activityId = null;
|
|
351
|
+
const allUrns = new Set();
|
|
352
|
+
|
|
353
|
+
const updateLink = item.querySelector('a[href*="/feed/update/"]');
|
|
354
|
+
if (updateLink) {
|
|
355
|
+
const m = (updateLink.href || '').match(urnRe);
|
|
356
|
+
if (m) { urnType = m[1]; activityId = m[2]; allUrns.add(m[2]); }
|
|
357
|
+
}
|
|
358
|
+
if (!activityId) {
|
|
359
|
+
const dataUrn = item.getAttribute('data-urn') || '';
|
|
360
|
+
const m = dataUrn.match(urnRe);
|
|
361
|
+
if (m) { urnType = m[1]; activityId = m[2]; allUrns.add(m[2]); }
|
|
362
|
+
}
|
|
363
|
+
if (!activityId) {
|
|
364
|
+
const html = item.outerHTML || '';
|
|
365
|
+
let mm;
|
|
366
|
+
urnReG.lastIndex = 0;
|
|
367
|
+
while ((mm = urnReG.exec(html)) !== null) {
|
|
368
|
+
allUrns.add(mm[2]);
|
|
369
|
+
if (!activityId) { urnType = mm[1]; activityId = mm[2]; }
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
if (activityId) {
|
|
373
|
+
if (seen.has(activityId)) continue;
|
|
374
|
+
seen.add(activityId);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const authorLink = item.querySelector('a[aria-label*="profile" i][href*="/in/"]')
|
|
378
|
+
|| item.querySelector('a[href*="/in/"]');
|
|
379
|
+
const authorUrl = authorLink ? (authorLink.href || '').split('?')[0] : null;
|
|
380
|
+
let authorName = null;
|
|
381
|
+
if (authorLink) {
|
|
382
|
+
const al = authorLink.getAttribute('aria-label') || '';
|
|
383
|
+
const m = al.match(/View\s+(.+?)['’]s\s+profile/i);
|
|
384
|
+
if (m) authorName = m[1].trim();
|
|
385
|
+
}
|
|
386
|
+
// The new SDUI layout puts the View-profile aria on an inner <svg>, not
|
|
387
|
+
// the <a>. Probe descendants of the link too before falling back.
|
|
388
|
+
if (!authorName && authorLink) {
|
|
389
|
+
const inner = authorLink.querySelector('[aria-label*="profile" i]');
|
|
390
|
+
if (inner) {
|
|
391
|
+
const m = (inner.getAttribute('aria-label') || '').match(/View\s+(.+?)['’]s\s+profile/i);
|
|
392
|
+
if (m) authorName = m[1].trim();
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
if (!authorName) {
|
|
396
|
+
const followBtn = item.querySelector('button[aria-label^="Follow "]');
|
|
397
|
+
if (followBtn) {
|
|
398
|
+
const m = (followBtn.getAttribute('aria-label') || '').match(/^Follow\s+(.+)$/i);
|
|
399
|
+
if (m) authorName = m[1].trim();
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
if (!authorName) {
|
|
403
|
+
const nameEl = item.querySelector(
|
|
404
|
+
'.update-components-actor__name, span.feed-shared-actor__name'
|
|
405
|
+
);
|
|
406
|
+
if (nameEl) authorName = (nameEl.textContent || '').trim();
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
let authorFollowers = null;
|
|
410
|
+
const supplementary = item.querySelector(
|
|
411
|
+
'.update-components-actor__supplementary-actor-info, '
|
|
412
|
+
+ '.feed-shared-actor__sub-description'
|
|
413
|
+
);
|
|
414
|
+
if (supplementary) {
|
|
415
|
+
const fm = (supplementary.textContent || '').match(/([\d.,]+[KkMm]?)\s*follower/);
|
|
416
|
+
if (fm) authorFollowers = parseCount(fm[1]);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// Actor block = the prefix of the listitem text before "• Follow". On the
|
|
420
|
+
// new SDUI layout it has the shape "Feed post<NAME> • <CONNECTION><HEADLINE><AGE>".
|
|
421
|
+
const fullItemText = (item.textContent || '').replace(/\s+/g, ' ').trim();
|
|
422
|
+
const followIdx0 = fullItemText.indexOf('• Follow');
|
|
423
|
+
const actorBlock = followIdx0 >= 0 ? fullItemText.slice(0, followIdx0) : fullItemText.slice(0, 300);
|
|
424
|
+
|
|
425
|
+
// Author headline: strip "Feed post" prefix, the name, the connection
|
|
426
|
+
// marker, and the trailing age. Best-effort; for company pages or
|
|
427
|
+
// non-standard layouts (no • <connection>) we still return whatever's
|
|
428
|
+
// left after the name.
|
|
429
|
+
let authorHeadline = null;
|
|
430
|
+
{
|
|
431
|
+
let h = actorBlock.replace(/^Feed post/, '').trim();
|
|
432
|
+
if (authorName && h.startsWith(authorName)) h = h.slice(authorName.length);
|
|
433
|
+
h = h.replace(/^\s*•\s*(1st|2nd|3rd\+?|Out of network|Following)\s*/i, '');
|
|
434
|
+
h = h.replace(/\s*(?:•\s*)?\d+\s*(?:s|min|m|hr|h|d|w|mo|y)\s*$/i, '');
|
|
435
|
+
h = h.trim();
|
|
436
|
+
if (h) authorHeadline = h;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Post body. Legacy: prefer the dedicated text element. New SDUI: take
|
|
440
|
+
// text after "• Follow", then strip trailing CTA / count noise.
|
|
441
|
+
let postText = '';
|
|
442
|
+
const textEl = item.querySelector(
|
|
443
|
+
'.update-components-text, .feed-shared-update-v2__description, span.break-words'
|
|
444
|
+
);
|
|
445
|
+
if (textEl) {
|
|
446
|
+
postText = (textEl.textContent || '').replace(/\s+/g, ' ').trim();
|
|
447
|
+
} else {
|
|
448
|
+
let s = fullItemText.replace(/^Feed post/, '').trim();
|
|
449
|
+
const idx = s.indexOf('• Follow');
|
|
450
|
+
if (idx >= 0) s = s.slice(idx + '• Follow'.length).trim();
|
|
451
|
+
// Strip trailing "… more" / "...more" the new layout appends.
|
|
452
|
+
s = s.replace(/\s*[…\.]+\s*more\s*$/i, '').trim();
|
|
453
|
+
// Strip trailing count noise like "+132 comments23 reactions",
|
|
454
|
+
// "1 comment1", "+811 reaction", "23 reactions".
|
|
455
|
+
// Count widgets concatenate without delimiters; consume runs greedily.
|
|
456
|
+
for (let i = 0; i < 6; i++) {
|
|
457
|
+
const before = s;
|
|
458
|
+
s = s.replace(
|
|
459
|
+
/\s*\+?\s*\d+\s*(?:reactions?|comments?|reposts?)\s*\d*\s*$/i,
|
|
460
|
+
''
|
|
461
|
+
).trim();
|
|
462
|
+
if (s === before) break;
|
|
463
|
+
}
|
|
464
|
+
// Strip a stray trailing digit (artifact of glued-in count widgets).
|
|
465
|
+
s = s.replace(/\s+\d+\s*$/, '').trim();
|
|
466
|
+
postText = s;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
let ageText = '';
|
|
470
|
+
const timeEl = item.querySelector(
|
|
471
|
+
'time, .update-components-actor__sub-description, '
|
|
472
|
+
+ 'span.feed-shared-actor__sub-description'
|
|
473
|
+
);
|
|
474
|
+
if (timeEl) ageText = (timeEl.textContent || '').trim();
|
|
475
|
+
if (!ageText) {
|
|
476
|
+
const ageM = fullItemText.match(/(\d+\s*(?:s|min|m|hr|h|d|w|mo|y))\b/i);
|
|
477
|
+
if (ageM) ageText = ageM[1];
|
|
478
|
+
}
|
|
479
|
+
const ageHours = parseRelativeAge(ageText);
|
|
480
|
+
|
|
481
|
+
// Counts. New SDUI hides counts from button aria-labels and embeds them
|
|
482
|
+
// as plain leaf-divs ("1 comment", "23 reactions", "+811 reaction").
|
|
483
|
+
// We walk every leaf div/span and match the strict shape; we keep the
|
|
484
|
+
// max in case the same widget is mirrored across nested wrappers.
|
|
485
|
+
let reactions = 0, comments = 0, reposts = 0;
|
|
486
|
+
item.querySelectorAll('div, span').forEach(el => {
|
|
487
|
+
if (el.children.length > 0) return;
|
|
488
|
+
const t = (el.textContent || '').trim();
|
|
489
|
+
if (!t || t.length > 30) return;
|
|
490
|
+
let m;
|
|
491
|
+
if ((m = t.match(/^[+]?\s*([\d.,]+\s*[KkMm]?)\s+reactions?$/i))) {
|
|
492
|
+
const v = parseCount(m[1]);
|
|
493
|
+
if (v > reactions) reactions = v;
|
|
494
|
+
}
|
|
495
|
+
if ((m = t.match(/^[+]?\s*([\d.,]+\s*[KkMm]?)\s+comments?$/i))) {
|
|
496
|
+
const v = parseCount(m[1]);
|
|
497
|
+
if (v > comments) comments = v;
|
|
498
|
+
}
|
|
499
|
+
if ((m = t.match(/^[+]?\s*([\d.,]+\s*[KkMm]?)\s+reposts?$/i))) {
|
|
500
|
+
const v = parseCount(m[1]);
|
|
501
|
+
if (v > reposts) reposts = v;
|
|
502
|
+
}
|
|
503
|
+
});
|
|
504
|
+
// Legacy fallbacks (unchanged): aria-label-based counts on the old layout.
|
|
505
|
+
if (reactions === 0) {
|
|
506
|
+
const reactEl = item.querySelector(
|
|
507
|
+
'[aria-label*=" reaction" i], '
|
|
508
|
+
+ '.social-details-social-counts__reactions-count'
|
|
509
|
+
);
|
|
510
|
+
if (reactEl) {
|
|
511
|
+
const m = (reactEl.getAttribute('aria-label') || reactEl.textContent || '')
|
|
512
|
+
.match(/([\d.,]+\s*[KkMm]?)\s*reaction/i);
|
|
513
|
+
if (m) reactions = parseCount(m[1]);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
if (comments === 0) {
|
|
517
|
+
const commentEl = item.querySelector(
|
|
518
|
+
'[aria-label*=" comment" i], '
|
|
519
|
+
+ 'li.social-details-social-counts__comments'
|
|
520
|
+
);
|
|
521
|
+
if (commentEl) {
|
|
522
|
+
const m = (commentEl.getAttribute('aria-label') || commentEl.textContent || '')
|
|
523
|
+
.match(/([\d.,]+\s*[KkMm]?)\s*comment/i);
|
|
524
|
+
if (m) comments = parseCount(m[1]);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
if (reposts === 0) {
|
|
528
|
+
const repostEl = item.querySelector(
|
|
529
|
+
'[aria-label*=" repost" i], '
|
|
530
|
+
+ 'li.social-details-social-counts__item--right-aligned'
|
|
531
|
+
);
|
|
532
|
+
if (repostEl) {
|
|
533
|
+
const m = (repostEl.getAttribute('aria-label') || repostEl.textContent || '')
|
|
534
|
+
.match(/([\d.,]+\s*[KkMm]?)\s*repost/i);
|
|
535
|
+
if (m) reposts = parseCount(m[1]);
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
if (!authorName && !authorUrl && !postText) continue;
|
|
540
|
+
|
|
541
|
+
// Comment gate: the author restricted commenting to their connections
|
|
542
|
+
// ("Only connections can comment on this post. You can still react or
|
|
543
|
+
// share it."). For a 3rd+ degree account the comment editor never renders,
|
|
544
|
+
// so the post is uncommentable. LinkedIn paints this notice straight into
|
|
545
|
+
// the card chrome, so we can detect it here (Phase A) and drop the
|
|
546
|
+
// candidate before it ever reaches the expensive compose/post phase.
|
|
547
|
+
const commentGated =
|
|
548
|
+
/only connections can comment on this post|you can still react or share it/i
|
|
549
|
+
.test(fullItemText);
|
|
550
|
+
|
|
551
|
+
out.push({
|
|
552
|
+
post_url: activityId
|
|
553
|
+
? ('https://www.linkedin.com/feed/update/urn:li:' + urnType + ':' + activityId + '/')
|
|
554
|
+
: null,
|
|
555
|
+
activity_id: activityId,
|
|
556
|
+
all_urns: Array.from(allUrns),
|
|
557
|
+
author_name: authorName || null,
|
|
558
|
+
author_headline: authorHeadline,
|
|
559
|
+
author_profile_url: authorUrl,
|
|
560
|
+
author_followers: authorFollowers,
|
|
561
|
+
post_text: postText,
|
|
562
|
+
age_hours: ageHours,
|
|
563
|
+
reactions: reactions,
|
|
564
|
+
comments: comments,
|
|
565
|
+
reposts: reposts,
|
|
566
|
+
age_text: ageText,
|
|
567
|
+
comment_gated: commentGated
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
return JSON.stringify(out);
|
|
571
|
+
}
|
|
572
|
+
"""
|
|
573
|
+
|
|
574
|
+
_SEARCH_JS_COMPANIES = r"""
|
|
575
|
+
() => {
|
|
576
|
+
const out = [];
|
|
577
|
+
const cards = document.querySelectorAll(
|
|
578
|
+
"div.search-results-container li div.entity-result, "
|
|
579
|
+
+ "li.reusable-search__result-container, "
|
|
580
|
+
+ "[data-chameleon-result-urn]"
|
|
581
|
+
);
|
|
582
|
+
for (const c of Array.from(cards).slice(0, 25)) {
|
|
583
|
+
const link = c.querySelector(
|
|
584
|
+
"a[href*='/company/'].app-aware-link, a[href*='/company/']"
|
|
585
|
+
);
|
|
586
|
+
const url = link ? (link.href || link.getAttribute("href") || "") : "";
|
|
587
|
+
const nameEl = c.querySelector(
|
|
588
|
+
".entity-result__title-text, .entity-result__title-line, "
|
|
589
|
+
+ "span[aria-hidden='true']"
|
|
590
|
+
);
|
|
591
|
+
const name = nameEl ? (nameEl.textContent || "").trim() : "";
|
|
592
|
+
const taglineEl = c.querySelector(
|
|
593
|
+
".entity-result__primary-subtitle, .t-14.t-black.t-normal"
|
|
594
|
+
);
|
|
595
|
+
const tagline = taglineEl ? (taglineEl.textContent || "").trim() : "";
|
|
596
|
+
if (!name && !url) continue;
|
|
597
|
+
out.push({
|
|
598
|
+
company: name.replace(/\s+/g, " "),
|
|
599
|
+
tagline: tagline.replace(/\s+/g, " "),
|
|
600
|
+
company_url: url.split("?")[0],
|
|
601
|
+
});
|
|
602
|
+
}
|
|
603
|
+
return JSON.stringify(out);
|
|
604
|
+
}
|
|
605
|
+
"""
|
|
606
|
+
|
|
607
|
+
_SEARCH_JS_BY_VERTICAL = {
|
|
608
|
+
"people": _SEARCH_JS_PEOPLE,
|
|
609
|
+
"content": _SEARCH_JS_CONTENT,
|
|
610
|
+
"companies": _SEARCH_JS_COMPANIES,
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def _read_devtools_port() -> Optional[int]:
|
|
615
|
+
"""Return the CDP port the linkedin-agent MCP's Chrome is listening on,
|
|
616
|
+
or None if the file is missing/unreadable/stale. Chrome writes the port
|
|
617
|
+
on line 1 of DevToolsActivePort when launched with --remote-debugging-port.
|
|
618
|
+
|
|
619
|
+
Chrome SHOULD remove the file when it exits, but doesn't always — a
|
|
620
|
+
crashed/killed Chrome leaves a stale file pointing at a port nothing's
|
|
621
|
+
listening on. We probe the port with a non-blocking TCP connect; if the
|
|
622
|
+
connection is refused, we treat the file as stale and return None so
|
|
623
|
+
callers report the cleaner mcp_not_running error rather than dragging
|
|
624
|
+
out to a noisy cdp_attach_failed."""
|
|
625
|
+
try:
|
|
626
|
+
with open(DEVTOOLS_ACTIVE_PORT) as f:
|
|
627
|
+
port = int(f.readline().strip())
|
|
628
|
+
if port <= 0:
|
|
629
|
+
return None
|
|
630
|
+
except (OSError, ValueError):
|
|
631
|
+
return None
|
|
632
|
+
import socket
|
|
633
|
+
try:
|
|
634
|
+
with socket.create_connection(("127.0.0.1", port), timeout=0.5):
|
|
635
|
+
return port
|
|
636
|
+
except (OSError, socket.timeout):
|
|
637
|
+
return None
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def _resolve_cdp_url() -> Optional[str]:
|
|
641
|
+
"""Resolve the CDP endpoint to attach the SERP read to.
|
|
642
|
+
|
|
643
|
+
Lane 1 (preferred, 2026-05-29): LINKEDIN_CDP_URL, exported by
|
|
644
|
+
skill/lib/linkedin-backend.sh to point at the browser-harness Chrome on
|
|
645
|
+
:9556. The main run-linkedin.sh pipeline now drives that harness Chrome
|
|
646
|
+
(mcp__linkedin-harness__bh_run) instead of the retired linkedin-agent
|
|
647
|
+
MCP, so this is the live session whose cookies/fingerprint we want. We
|
|
648
|
+
probe /json/version with a 1s GET so a stale/unset env falls through
|
|
649
|
+
cleanly rather than dragging into a noisy connect failure.
|
|
650
|
+
|
|
651
|
+
Lane 2 (legacy DevToolsActivePort attach to the linkedin-agent profile
|
|
652
|
+
under PROFILE_DIR) was REMOVED 2026-05-31. It silently sent the SERP read
|
|
653
|
+
to a SECOND Chrome (the retired linkedin-agent MCP browser) whenever the
|
|
654
|
+
harness was momentarily unreachable — the "two LinkedIn browsers in
|
|
655
|
+
parallel" bug. The harness Chrome on :9556 is now the ONLY allowed target.
|
|
656
|
+
|
|
657
|
+
Returns the harness CDP base URL (e.g. "http://127.0.0.1:9556") or None
|
|
658
|
+
when LINKEDIN_CDP_URL is unset or the harness is unreachable.
|
|
659
|
+
"""
|
|
660
|
+
harness = os.environ.get("LINKEDIN_CDP_URL", "").strip()
|
|
661
|
+
if not harness:
|
|
662
|
+
return None
|
|
663
|
+
import urllib.request
|
|
664
|
+
try:
|
|
665
|
+
with urllib.request.urlopen(
|
|
666
|
+
f"{harness.rstrip('/')}/json/version", timeout=1.0
|
|
667
|
+
):
|
|
668
|
+
return harness.rstrip("/")
|
|
669
|
+
except Exception:
|
|
670
|
+
return None
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
def search(vertical: str, query: str) -> dict:
|
|
674
|
+
"""Attach to the linkedin-agent MCP's Chrome via CDP and read one SERP.
|
|
675
|
+
|
|
676
|
+
ONE goto, ONE evaluate. No own-Chrome launch, no context.close(),
|
|
677
|
+
so we never write cookies back to disk. Rate-limited against
|
|
678
|
+
linkedin_browser_searches; fails closed if the DB budget is exhausted.
|
|
679
|
+
"""
|
|
680
|
+
if vertical not in SEARCH_VERTICALS:
|
|
681
|
+
return {
|
|
682
|
+
"ok": False,
|
|
683
|
+
"error": "bad_vertical",
|
|
684
|
+
"detail": f"got {vertical!r}; want one of {SEARCH_VERTICALS}",
|
|
685
|
+
}
|
|
686
|
+
query = (query or "").strip()
|
|
687
|
+
if not query:
|
|
688
|
+
return {"ok": False, "error": "empty_query", "detail": ""}
|
|
689
|
+
|
|
690
|
+
rate = _check_rate_limit()
|
|
691
|
+
if not rate.get("ok"):
|
|
692
|
+
return rate
|
|
693
|
+
|
|
694
|
+
cdp_url = _resolve_cdp_url()
|
|
695
|
+
if cdp_url is None:
|
|
696
|
+
return {
|
|
697
|
+
"ok": False,
|
|
698
|
+
"error": "mcp_not_running",
|
|
699
|
+
"detail": (
|
|
700
|
+
"No LinkedIn CDP endpoint reachable. Set LINKEDIN_CDP_URL "
|
|
701
|
+
"(the browser-harness Chrome on :9556, exported by "
|
|
702
|
+
"skill/lib/linkedin-backend.sh) and make sure that Chrome is "
|
|
703
|
+
f"running; or, for the legacy lane, {DEVTOOLS_ACTIVE_PORT} "
|
|
704
|
+
"must point at a live linkedin-agent MCP Chrome launched with "
|
|
705
|
+
"--remote-debugging-port."
|
|
706
|
+
),
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
from playwright.sync_api import sync_playwright
|
|
710
|
+
|
|
711
|
+
_acquire_browser_lock()
|
|
712
|
+
|
|
713
|
+
encoded = urllib.parse.quote(query)
|
|
714
|
+
# Content searches sort by date_posted to match skill/run-linkedin.sh
|
|
715
|
+
# Phase A behavior — fresh posts > stale ones for engagement work.
|
|
716
|
+
suffix = "&sortBy=date_posted" if vertical == "content" else ""
|
|
717
|
+
search_url = (
|
|
718
|
+
f"https://www.linkedin.com/search/results/{vertical}/"
|
|
719
|
+
f"?keywords={encoded}{suffix}"
|
|
720
|
+
)
|
|
721
|
+
serp_prefix = f"https://www.linkedin.com/search/results/{vertical}/"
|
|
722
|
+
|
|
723
|
+
with sync_playwright() as p:
|
|
724
|
+
try:
|
|
725
|
+
browser = p.chromium.connect_over_cdp(cdp_url)
|
|
726
|
+
except Exception as e:
|
|
727
|
+
_log_search(query, vertical, ok=False, error="cdp_attach_failed")
|
|
728
|
+
return {
|
|
729
|
+
"ok": False,
|
|
730
|
+
"error": "cdp_attach_failed",
|
|
731
|
+
"detail": f"connect_over_cdp({cdp_url}) failed: {e}",
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
_bh_activity_log("attach", cdp_url)
|
|
735
|
+
|
|
736
|
+
# Reuse the existing context (cookies / UA / fingerprint already set
|
|
737
|
+
# by the MCP launch). Never close it — that would kill the MCP's
|
|
738
|
+
# pages too. We only own the page we create below.
|
|
739
|
+
if not browser.contexts:
|
|
740
|
+
browser.disconnect()
|
|
741
|
+
_log_search(query, vertical, ok=False, error="cdp_attach_failed")
|
|
742
|
+
return {
|
|
743
|
+
"ok": False,
|
|
744
|
+
"error": "cdp_attach_failed",
|
|
745
|
+
"detail": "browser.contexts is empty; MCP has no open context",
|
|
746
|
+
}
|
|
747
|
+
context = browser.contexts[0]
|
|
748
|
+
|
|
749
|
+
page = None
|
|
750
|
+
_reused_page = False
|
|
751
|
+
try:
|
|
752
|
+
# Reuse an existing harness tab instead of spawning a throwaway one
|
|
753
|
+
# (mirrors reddit_browser / linkedin_browser). Prefer a tab already
|
|
754
|
+
# on linkedin.com (not login/checkpoint), else the first open page;
|
|
755
|
+
# only new_page() when the context has no usable tab. A reused tab
|
|
756
|
+
# is left open in the finally below so the next consumer reuses it.
|
|
757
|
+
for pg in context.pages:
|
|
758
|
+
u = pg.url or ""
|
|
759
|
+
if "linkedin.com" in u and "login" not in u and "checkpoint" not in u:
|
|
760
|
+
page, _reused_page = pg, True
|
|
761
|
+
break
|
|
762
|
+
if page is None and context.pages:
|
|
763
|
+
page, _reused_page = context.pages[0], True
|
|
764
|
+
if page is None:
|
|
765
|
+
page = context.new_page()
|
|
766
|
+
try:
|
|
767
|
+
page.goto(
|
|
768
|
+
search_url,
|
|
769
|
+
wait_until="domcontentloaded",
|
|
770
|
+
timeout=30000,
|
|
771
|
+
)
|
|
772
|
+
except Exception as e:
|
|
773
|
+
_log_search(query, vertical, ok=False, error="navigation_failed")
|
|
774
|
+
return {
|
|
775
|
+
"ok": False,
|
|
776
|
+
"error": "navigation_failed",
|
|
777
|
+
"detail": str(e),
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
# Settle: search results lazy-render after DOMContentLoaded.
|
|
781
|
+
# Selectors cover the new SDUI layout (post 2026-04 rollout) AND
|
|
782
|
+
# the legacy class layout, in that order.
|
|
783
|
+
try:
|
|
784
|
+
page.wait_for_selector(
|
|
785
|
+
"[data-sdui-screen*='SearchResultsContent'], "
|
|
786
|
+
"div.search-results-container, "
|
|
787
|
+
"main[aria-label*='Search'], "
|
|
788
|
+
"div.feed-shared-update-v2",
|
|
789
|
+
timeout=10000,
|
|
790
|
+
)
|
|
791
|
+
except Exception:
|
|
792
|
+
pass # extractor will return [] if nothing rendered
|
|
793
|
+
|
|
794
|
+
# Random 2-4s human-pacing delay before reading the DOM. The new
|
|
795
|
+
# SDUI layout streams cards in after the screen container exists;
|
|
796
|
+
# 1-3s sometimes returned 6/8 cards. 2-4s reliably gets 8/8.
|
|
797
|
+
page.wait_for_timeout(random.randint(2000, 4000))
|
|
798
|
+
|
|
799
|
+
cur_url = page.url
|
|
800
|
+
if _is_login_or_checkpoint(cur_url):
|
|
801
|
+
_log_search(query, vertical, ok=False, error="session_invalid")
|
|
802
|
+
return {
|
|
803
|
+
"ok": False,
|
|
804
|
+
"error": "session_invalid",
|
|
805
|
+
"url": cur_url,
|
|
806
|
+
}
|
|
807
|
+
# LinkedIn's anti-automation likes to redirect a refused SERP to
|
|
808
|
+
# https://www.linkedin.com/ (no /login marker). Without this
|
|
809
|
+
# check the extractor would run on the homepage, find nothing,
|
|
810
|
+
# and we'd return ok:true with result_count:0 — masking failure
|
|
811
|
+
# as an empty query. Require landing on the SERP path.
|
|
812
|
+
if not cur_url.startswith(serp_prefix):
|
|
813
|
+
_log_search(query, vertical, ok=False, error="serp_redirected")
|
|
814
|
+
return {
|
|
815
|
+
"ok": False,
|
|
816
|
+
"error": "serp_redirected",
|
|
817
|
+
"url": cur_url,
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
raw = page.evaluate(_SEARCH_JS_BY_VERTICAL[vertical])
|
|
821
|
+
try:
|
|
822
|
+
results = json.loads(raw or "[]")
|
|
823
|
+
except json.JSONDecodeError:
|
|
824
|
+
results = []
|
|
825
|
+
|
|
826
|
+
# Author exclusion (ALL verticals). Drop hard-excluded authors
|
|
827
|
+
# (config.json exclusions + author_blocklist, slug-keyed) before the
|
|
828
|
+
# Phase A picker, scoring, or the comment_gated logic can see them.
|
|
829
|
+
# Slug is the reliable key; display-name matches are intentionally
|
|
830
|
+
# soft (many real namesakes), so only "hard" verdicts drop here.
|
|
831
|
+
# The helper is fail-open, so a blocklist-API hiccup can't wedge
|
|
832
|
+
# discovery.
|
|
833
|
+
_excl = load_exclusions()
|
|
834
|
+
before_excl = len(results)
|
|
835
|
+
results = [
|
|
836
|
+
r for r in results
|
|
837
|
+
if classify_author(
|
|
838
|
+
r.get("author_name"), r.get("author_profile_url"), _excl
|
|
839
|
+
)[0] != "hard"
|
|
840
|
+
]
|
|
841
|
+
dropped_excluded = before_excl - len(results)
|
|
842
|
+
if dropped_excluded:
|
|
843
|
+
print(
|
|
844
|
+
f"[discover_linkedin_candidates] dropped_excluded="
|
|
845
|
+
f"{dropped_excluded} (author on exclusion list)",
|
|
846
|
+
file=sys.stderr,
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
dropped_comment_gated = 0
|
|
850
|
+
if vertical == "content":
|
|
851
|
+
# Programmatic comment-gate pre-filter (Phase A). Posts whose
|
|
852
|
+
# author restricted commenting to connections-only are
|
|
853
|
+
# uncommentable by a 3rd+ degree account: the comment editor
|
|
854
|
+
# never renders and a full compose/post cycle ends in
|
|
855
|
+
# rejected_by_platform. Before this filter ~28% of posts that
|
|
856
|
+
# reached the comment stage died this way (35/123 since the
|
|
857
|
+
# 2026-05-29 harness migration). The gate is visible in the
|
|
858
|
+
# scraped card chrome, so we drop these here and never spend a
|
|
859
|
+
# Phase B cycle on them. The like-at-comment-time backstop in
|
|
860
|
+
# the posting agent still catches gates not shown in the card.
|
|
861
|
+
before = len(results)
|
|
862
|
+
results = [r for r in results if not r.get("comment_gated")]
|
|
863
|
+
dropped_comment_gated = before - len(results)
|
|
864
|
+
for r in results:
|
|
865
|
+
velocity, virality, age_clamped = calculate_velocity_score(r)
|
|
866
|
+
r["engagement_velocity"] = velocity
|
|
867
|
+
r["velocity_score"] = virality
|
|
868
|
+
r["age_hours_clamped"] = age_clamped
|
|
869
|
+
# Twitter model: rank, never drop. Sort by velocity_score DESC
|
|
870
|
+
# so the Phase A picker sees the strongest candidates first and
|
|
871
|
+
# takes from the top; weak cards stay eligible as fallback so
|
|
872
|
+
# quiet topics still yield a comment instead of zero-posting.
|
|
873
|
+
results.sort(key=lambda x: x.get("velocity_score") or 0, reverse=True)
|
|
874
|
+
|
|
875
|
+
_log_search(query, vertical, ok=True, error=None)
|
|
876
|
+
return {
|
|
877
|
+
"ok": True,
|
|
878
|
+
"url": cur_url,
|
|
879
|
+
"vertical": vertical,
|
|
880
|
+
"query": query,
|
|
881
|
+
"result_count": len(results),
|
|
882
|
+
"dropped_below_virality_floor": 0,
|
|
883
|
+
"dropped_comment_gated": dropped_comment_gated,
|
|
884
|
+
"dropped_excluded": dropped_excluded,
|
|
885
|
+
"virality_floor": None,
|
|
886
|
+
"results": results,
|
|
887
|
+
"rate_budget": {
|
|
888
|
+
"daily_used": rate.get("daily_used"),
|
|
889
|
+
"daily_cap": None,
|
|
890
|
+
"monthly_used": rate.get("monthly_used"),
|
|
891
|
+
"monthly_cap": None,
|
|
892
|
+
},
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
finally:
|
|
896
|
+
# Close ONLY a page WE created, never the context or the browser.
|
|
897
|
+
# The MCP keeps owning the Chrome instance and its existing pages.
|
|
898
|
+
# A reused tab is left open so the next consumer can reuse it.
|
|
899
|
+
try:
|
|
900
|
+
if page is not None and not _reused_page:
|
|
901
|
+
page.close()
|
|
902
|
+
except Exception:
|
|
903
|
+
pass
|
|
904
|
+
try:
|
|
905
|
+
browser.disconnect()
|
|
906
|
+
except Exception:
|
|
907
|
+
pass
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
def search_with_retry(vertical: str, query: str, max_attempts: int = 2) -> dict:
|
|
911
|
+
"""One retry on transient browser-target failures only. Do NOT retry on
|
|
912
|
+
session_invalid / mcp_not_running / serp_redirected."""
|
|
913
|
+
last_result: dict = {"ok": False, "error": "no_attempts"}
|
|
914
|
+
for attempt in range(1, max_attempts + 1):
|
|
915
|
+
try:
|
|
916
|
+
result = search(vertical, query)
|
|
917
|
+
except Exception as e:
|
|
918
|
+
result = {
|
|
919
|
+
"ok": False,
|
|
920
|
+
"error": "exception",
|
|
921
|
+
"detail": f"{type(e).__name__}: {e}",
|
|
922
|
+
"attempt": attempt,
|
|
923
|
+
}
|
|
924
|
+
last_result = result
|
|
925
|
+
err = (result.get("error") or "").lower()
|
|
926
|
+
detail = (result.get("detail") or "").lower()
|
|
927
|
+
transient = (
|
|
928
|
+
"targetclosed" in detail
|
|
929
|
+
or "target page" in detail
|
|
930
|
+
or "browser has been closed" in detail
|
|
931
|
+
or err == "navigation_failed"
|
|
932
|
+
)
|
|
933
|
+
if result.get("ok") or not transient or attempt >= max_attempts:
|
|
934
|
+
if attempt > 1:
|
|
935
|
+
result["retry_attempt"] = attempt
|
|
936
|
+
return result
|
|
937
|
+
print(
|
|
938
|
+
f"[discover_linkedin_candidates] transient failure attempt "
|
|
939
|
+
f"{attempt}: {result.get('detail') or result.get('error')}; "
|
|
940
|
+
f"retrying...",
|
|
941
|
+
file=sys.stderr,
|
|
942
|
+
)
|
|
943
|
+
time.sleep(2)
|
|
944
|
+
return last_result
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
def main():
|
|
948
|
+
# Guard: only authorized pipelines may invoke this helper. Other Claude
|
|
949
|
+
# subprocess planners auto-load CLAUDE.md as system context, see this
|
|
950
|
+
# helper documented there, and have wandered off-task to "smoke test"
|
|
951
|
+
# it — racing the linkedin profile's SingletonLock and triggering
|
|
952
|
+
# server-side session invalidation. The legitimate caller sets the
|
|
953
|
+
# matching env var immediately before invoking; nothing else does.
|
|
954
|
+
if os.environ.get("SOCIAL_AUTOPOSTER_LINKEDIN_SEARCH") != "1":
|
|
955
|
+
print(
|
|
956
|
+
json.dumps({
|
|
957
|
+
"ok": False,
|
|
958
|
+
"error": "unauthorized_caller",
|
|
959
|
+
"detail": (
|
|
960
|
+
"discover_linkedin_candidates.py is invoked only by the "
|
|
961
|
+
"run-linkedin Phase A discovery pipeline. Set "
|
|
962
|
+
"SOCIAL_AUTOPOSTER_LINKEDIN_SEARCH=1 from the caller if "
|
|
963
|
+
"this invocation is legitimate."
|
|
964
|
+
),
|
|
965
|
+
}),
|
|
966
|
+
file=sys.stderr,
|
|
967
|
+
)
|
|
968
|
+
sys.exit(2)
|
|
969
|
+
if len(sys.argv) < 3:
|
|
970
|
+
print(
|
|
971
|
+
"Usage: discover_linkedin_candidates.py "
|
|
972
|
+
"<people|content|companies> <query>",
|
|
973
|
+
file=sys.stderr,
|
|
974
|
+
)
|
|
975
|
+
sys.exit(2)
|
|
976
|
+
vertical = sys.argv[1]
|
|
977
|
+
query = " ".join(sys.argv[2:])
|
|
978
|
+
result = search_with_retry(vertical, query)
|
|
979
|
+
print(json.dumps(result, indent=2))
|
|
980
|
+
sys.exit(0 if result.get("ok") else 1)
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
if __name__ == "__main__":
|
|
984
|
+
main()
|