linkedin-agent-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkedin_agent_cli-0.1.0.dist-info/METADATA +197 -0
- linkedin_agent_cli-0.1.0.dist-info/RECORD +34 -0
- linkedin_agent_cli-0.1.0.dist-info/WHEEL +4 -0
- linkedin_agent_cli-0.1.0.dist-info/entry_points.txt +2 -0
- linkedin_agent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- linkedin_cli/__init__.py +9 -0
- linkedin_cli/actions/__init__.py +0 -0
- linkedin_cli/actions/connect.py +118 -0
- linkedin_cli/actions/conversations.py +132 -0
- linkedin_cli/actions/message.py +153 -0
- linkedin_cli/actions/profile.py +22 -0
- linkedin_cli/actions/search.py +186 -0
- linkedin_cli/actions/status.py +112 -0
- linkedin_cli/api/__init__.py +0 -0
- linkedin_cli/api/client.py +182 -0
- linkedin_cli/api/messaging/__init__.py +11 -0
- linkedin_cli/api/messaging/conversations.py +56 -0
- linkedin_cli/api/messaging/send.py +74 -0
- linkedin_cli/api/messaging/utils.py +24 -0
- linkedin_cli/api/voyager.py +319 -0
- linkedin_cli/auth.py +98 -0
- linkedin_cli/browser/__init__.py +0 -0
- linkedin_cli/browser/login.py +140 -0
- linkedin_cli/browser/nav.py +115 -0
- linkedin_cli/cli.py +396 -0
- linkedin_cli/conf.py +33 -0
- linkedin_cli/enums.py +11 -0
- linkedin_cli/exceptions.py +47 -0
- linkedin_cli/launcher.py +60 -0
- linkedin_cli/page_state.py +148 -0
- linkedin_cli/session.py +169 -0
- linkedin_cli/setup/__init__.py +0 -0
- linkedin_cli/setup/self_profile.py +25 -0
- linkedin_cli/url_utils.py +30 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# linkedin/browser/login.py
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
|
6
|
+
from playwright.sync_api import sync_playwright
|
|
7
|
+
from playwright_stealth import Stealth
|
|
8
|
+
from termcolor import colored
|
|
9
|
+
|
|
10
|
+
from linkedin_cli.browser.nav import goto_page, human_type, resolve_locator
|
|
11
|
+
from linkedin_cli.conf import (
|
|
12
|
+
BROWSER_DEFAULT_TIMEOUT_MS,
|
|
13
|
+
BROWSER_LOGIN_TIMEOUT_MS,
|
|
14
|
+
BROWSER_SLOW_MO,
|
|
15
|
+
CHECKPOINT_RESOLVE_TIMEOUT_S,
|
|
16
|
+
)
|
|
17
|
+
from linkedin_cli.page_state import PageState, classify_page
|
|
18
|
+
|
|
19
|
+
CHECKPOINT_POLL_S = 5
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
LINKEDIN_LOGIN_URL = "https://www.linkedin.com/login"
|
|
24
|
+
|
|
25
|
+
EMAIL_LOCATORS = [
|
|
26
|
+
lambda p: p.get_by_role("textbox", name="Email or phone"),
|
|
27
|
+
lambda p: p.get_by_label("Email or phone"),
|
|
28
|
+
lambda p: p.locator('input[autocomplete="webauthn"]'),
|
|
29
|
+
lambda p: p.locator('input[name="session_key"]'),
|
|
30
|
+
lambda p: p.locator('input#username'),
|
|
31
|
+
lambda p: p.locator('form input[type="text"]'),
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
PASSWORD_LOCATORS = [
|
|
35
|
+
lambda p: p.locator('input[type="password"]'),
|
|
36
|
+
lambda p: p.locator('input[autocomplete="current-password"]'),
|
|
37
|
+
lambda p: p.get_by_role("textbox", name="Password"),
|
|
38
|
+
lambda p: p.get_by_label("Password"),
|
|
39
|
+
lambda p: p.locator('input[name="session_password"]'),
|
|
40
|
+
lambda p: p.locator('input#password'),
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
SUBMIT_LOCATORS = [
|
|
44
|
+
lambda p: p.locator("form").get_by_role("button", name="Sign in", exact=True),
|
|
45
|
+
lambda p: p.get_by_role("button", name="Sign in", exact=True),
|
|
46
|
+
lambda p: p.locator('form button[type="submit"]'),
|
|
47
|
+
lambda p: p.locator('button[type="submit"]'),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
COMPLY_LOCATORS = [
|
|
51
|
+
lambda p: p.locator('button#content__button--primary--muted'),
|
|
52
|
+
lambda p: p.get_by_role("button", name="Agree to comply", exact=True),
|
|
53
|
+
lambda p: p.locator('button.content__button--primary'),
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
COMPLY_PROBE_TIMEOUT_MS = 5000
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def dismiss_comply_gate(page, timeout_ms: int = COMPLY_PROBE_TIMEOUT_MS) -> bool:
|
|
60
|
+
"""Click LinkedIn's 'Agree to comply' interstitial if present. Return True if clicked."""
|
|
61
|
+
for factory in COMPLY_LOCATORS:
|
|
62
|
+
locator = factory(page).first
|
|
63
|
+
try:
|
|
64
|
+
locator.wait_for(state="visible", timeout=timeout_ms)
|
|
65
|
+
except PlaywrightTimeoutError:
|
|
66
|
+
continue
|
|
67
|
+
logger.info(colored("Dismissing 'Agree to comply' interstitial", "yellow"))
|
|
68
|
+
locator.click()
|
|
69
|
+
return True
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def await_checkpoint_clear(page, timeout_s: int = CHECKPOINT_RESOLVE_TIMEOUT_S) -> bool:
|
|
74
|
+
"""Block while the user clears a LinkedIn checkpoint in the live browser.
|
|
75
|
+
|
|
76
|
+
The browser runs headed (noVNC at http://localhost:6080/vnc.html), so the
|
|
77
|
+
user can solve the challenge by hand. Returns True once the page leaves
|
|
78
|
+
``/checkpoint/``, or False if it is still there after *timeout_s*. We never
|
|
79
|
+
resubmit credentials — every automated retry hardens the block; the only
|
|
80
|
+
escape is a human.
|
|
81
|
+
"""
|
|
82
|
+
banner = "*" * 64
|
|
83
|
+
logger.error(colored(banner, "red", attrs=["bold"]))
|
|
84
|
+
logger.error(colored(" RESOLVE CHECKPOINT ".center(64, "*"), "red", attrs=["bold"]))
|
|
85
|
+
logger.error(colored(banner, "red", attrs=["bold"]))
|
|
86
|
+
logger.error(
|
|
87
|
+
colored(
|
|
88
|
+
"Clear the challenge by hand in the live browser:",
|
|
89
|
+
"red", attrs=["bold"],
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
logger.error("Open the browser here: http://localhost:6080/vnc.html")
|
|
93
|
+
logger.error(f"Checkpoint URL: {page.url}")
|
|
94
|
+
logger.error(colored(banner, "red", attrs=["bold"]))
|
|
95
|
+
deadline = time.monotonic() + timeout_s
|
|
96
|
+
while time.monotonic() < deadline:
|
|
97
|
+
if classify_page(page) is not PageState.CHECKPOINT:
|
|
98
|
+
logger.info(colored("Checkpoint cleared — continuing", "green", attrs=["bold"]))
|
|
99
|
+
return True
|
|
100
|
+
time.sleep(CHECKPOINT_POLL_S)
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def submit_login_form(session, username, password):
|
|
105
|
+
"""Fill and submit LinkedIn's login form (credentials supplied by the caller).
|
|
106
|
+
|
|
107
|
+
Does *not* assert the outcome — the caller (the auth flow's ``@transition``)
|
|
108
|
+
re-reads the page to decide what the submit produced: the feed, a checkpoint,
|
|
109
|
+
or, on rejected credentials, the login page again.
|
|
110
|
+
"""
|
|
111
|
+
page = session.page
|
|
112
|
+
logger.info(colored("Submitting login form", "cyan") + f" for {session}")
|
|
113
|
+
|
|
114
|
+
goto_page(
|
|
115
|
+
session,
|
|
116
|
+
action=lambda: page.goto(LINKEDIN_LOGIN_URL),
|
|
117
|
+
expected_url_pattern="/login",
|
|
118
|
+
error_message="Failed to load login page",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
human_type(resolve_locator(page, EMAIL_LOCATORS), username)
|
|
122
|
+
session.wait()
|
|
123
|
+
human_type(resolve_locator(page, PASSWORD_LOCATORS), password)
|
|
124
|
+
session.wait()
|
|
125
|
+
|
|
126
|
+
resolve_locator(page, SUBMIT_LOCATORS).click()
|
|
127
|
+
dismiss_comply_gate(page)
|
|
128
|
+
page.wait_for_load_state("domcontentloaded", timeout=BROWSER_LOGIN_TIMEOUT_MS)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def launch_browser(storage_state=None):
|
|
132
|
+
logger.debug("Launching Playwright")
|
|
133
|
+
playwright = sync_playwright().start()
|
|
134
|
+
browser = playwright.chromium.launch(headless=False, slow_mo=BROWSER_SLOW_MO)
|
|
135
|
+
context = browser.new_context(storage_state=storage_state)
|
|
136
|
+
context.set_default_timeout(BROWSER_DEFAULT_TIMEOUT_MS)
|
|
137
|
+
context.set_default_navigation_timeout(BROWSER_DEFAULT_TIMEOUT_MS)
|
|
138
|
+
Stealth().apply_stealth_sync(context)
|
|
139
|
+
page = context.new_page()
|
|
140
|
+
return page, context, browser, playwright
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# linkedin/browser/nav.py
|
|
2
|
+
import logging
|
|
3
|
+
import random
|
|
4
|
+
from urllib.parse import unquote, urlparse, urljoin
|
|
5
|
+
|
|
6
|
+
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
|
7
|
+
|
|
8
|
+
from linkedin_cli.conf import BROWSER_NAV_TIMEOUT_MS, DUMP_PAGES, FIXTURE_PAGES_DIR, HUMAN_TYPE_MIN_DELAY_MS, HUMAN_TYPE_MAX_DELAY_MS
|
|
9
|
+
from linkedin_cli.exceptions import CheckpointChallengeError, SkipProfile
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def goto_page(session,
|
|
15
|
+
action,
|
|
16
|
+
expected_url_pattern: str,
|
|
17
|
+
timeout: int = BROWSER_NAV_TIMEOUT_MS,
|
|
18
|
+
error_message: str = "",
|
|
19
|
+
):
|
|
20
|
+
page = session.page
|
|
21
|
+
action()
|
|
22
|
+
if not page:
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
page.wait_for_url(lambda url: expected_url_pattern in unquote(url), timeout=timeout)
|
|
27
|
+
except PlaywrightTimeoutError:
|
|
28
|
+
pass # we still continue and check URL below
|
|
29
|
+
|
|
30
|
+
session.wait()
|
|
31
|
+
|
|
32
|
+
current = unquote(page.url)
|
|
33
|
+
if expected_url_pattern not in current:
|
|
34
|
+
if "/404" in current:
|
|
35
|
+
raise SkipProfile(f"Profile returned 404 → {current}")
|
|
36
|
+
raise RuntimeError(f"{error_message} → expected '{expected_url_pattern}' | got '{current}'")
|
|
37
|
+
|
|
38
|
+
logger.debug("Navigated to %s", page.url)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def extract_in_urls(page):
|
|
42
|
+
"""Extract all /in/ profile URLs from the current page."""
|
|
43
|
+
from linkedin_cli.url_utils import url_to_public_id
|
|
44
|
+
|
|
45
|
+
seen = set()
|
|
46
|
+
urls = []
|
|
47
|
+
for link in page.locator('a[href*="/in/"]').all():
|
|
48
|
+
href = link.get_attribute("href")
|
|
49
|
+
if href and "/in/" in href:
|
|
50
|
+
full_url = urljoin(page.url, href.strip())
|
|
51
|
+
clean = urlparse(full_url)._replace(query="", fragment="").geturl()
|
|
52
|
+
if not url_to_public_id(clean):
|
|
53
|
+
continue
|
|
54
|
+
if clean not in seen:
|
|
55
|
+
seen.add(clean)
|
|
56
|
+
urls.append(clean)
|
|
57
|
+
logger.debug(f"Extracted {len(urls)} unique /in/ profiles")
|
|
58
|
+
return urls
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def find_first_visible(page, selectors: list[str]):
|
|
62
|
+
"""Try selectors in order, return first locator that matches."""
|
|
63
|
+
for selector in selectors:
|
|
64
|
+
locator = page.locator(selector)
|
|
65
|
+
if locator.count() > 0:
|
|
66
|
+
return locator.first
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def resolve_locator(page, candidates, timeout_per_ms: int = 5000):
|
|
71
|
+
"""Try locator factories in order, return the first one that becomes visible."""
|
|
72
|
+
for factory in candidates:
|
|
73
|
+
locator = factory(page).first
|
|
74
|
+
try:
|
|
75
|
+
locator.wait_for(state="visible", timeout=timeout_per_ms)
|
|
76
|
+
return locator
|
|
77
|
+
except PlaywrightTimeoutError:
|
|
78
|
+
continue
|
|
79
|
+
if "/checkpoint/" in page.url:
|
|
80
|
+
raise CheckpointChallengeError(page.url)
|
|
81
|
+
raise RuntimeError(f"No locator matched on {page.url}")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
TOP_CARD_SELECTORS = [
|
|
85
|
+
'section:has(div.top-card-background-hero-image)',
|
|
86
|
+
'section[data-member-id]',
|
|
87
|
+
'section.artdeco-card:has(> div.pv-top-card)',
|
|
88
|
+
'section:has(> div[class*="pv-top-card"])',
|
|
89
|
+
'section[componentkey*="com.linkedin.sdui.profile.card"]',
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def find_top_card(session):
|
|
94
|
+
top_card = find_first_visible(session.page, TOP_CARD_SELECTORS)
|
|
95
|
+
if top_card is None:
|
|
96
|
+
logger.warning("Top card not found on %s", session.page.url)
|
|
97
|
+
raise SkipProfile("Top Card section not found")
|
|
98
|
+
return top_card
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def human_type(locator, text: str, min_delay: int = HUMAN_TYPE_MIN_DELAY_MS, max_delay: int = HUMAN_TYPE_MAX_DELAY_MS):
|
|
102
|
+
"""Type text with randomized per-keystroke delay to mimic human input."""
|
|
103
|
+
locator.type(text, delay=random.randint(min_delay, max_delay))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def dump_page_html(session: "LinkedInSession", profile: dict, category: str = "connect"):
|
|
107
|
+
if not DUMP_PAGES:
|
|
108
|
+
return
|
|
109
|
+
dest = FIXTURE_PAGES_DIR / category
|
|
110
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
111
|
+
filepath = dest / f"{profile.get('public_identifier')}.html"
|
|
112
|
+
html_content = session.page.content()
|
|
113
|
+
with open(filepath, "w", encoding="utf-8") as f:
|
|
114
|
+
f.write(html_content)
|
|
115
|
+
logger.info("Saved page snapshot → %s", filepath)
|
linkedin_cli/cli.py
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
"""linkedin-cli — drive LinkedIn interactions inside a bound browser session.
|
|
2
|
+
|
|
3
|
+
``session open`` launches + binds a persistent browser (the session owner); the
|
|
4
|
+
verbs connect to it and drive LinkedIn. One session = one account; pick it with
|
|
5
|
+
``--session <name>`` (or ``$LINKEDIN_CLI_SESSION``).
|
|
6
|
+
|
|
7
|
+
Output contract — design decisions, kept here so they travel with the package:
|
|
8
|
+
|
|
9
|
+
* **Every verb produces a dict** — its canonical result. That one dict is both
|
|
10
|
+
the ``--json`` payload and the source the human renderer summarises, so the
|
|
11
|
+
two views can never drift.
|
|
12
|
+
* **Human-readable by default; ``--json`` on every verb for the full dict.**
|
|
13
|
+
Per clig.dev ("humans first", "keep it brief, err toward less output"), the
|
|
14
|
+
default is a short, scannable per-verb summary (``status`` → ``Connected``,
|
|
15
|
+
``profile`` → a few lines); ``--json`` emits the whole dict for machines.
|
|
16
|
+
* **No ``--out``/file flag — print to stdout, let the caller redirect.** To save
|
|
17
|
+
a result: ``linkedin-cli profile alice --json > alice.json``. This matches the
|
|
18
|
+
composability convention (clig.dev; ``kubectl -o``, ``aws --output``,
|
|
19
|
+
``gh --json``) and keeps the tool free of file-lifecycle concerns.
|
|
20
|
+
* **stdout carries only the result; logs and errors go to stderr.** Errors are an
|
|
21
|
+
``error: <type>: <message>`` line + non-zero exit (``type`` mirrors
|
|
22
|
+
``exceptions.py``). A verb that ran is exit 0 — ``message`` reports send success
|
|
23
|
+
in its dict (``sent``), not via the exit code.
|
|
24
|
+
|
|
25
|
+
This module is the composition root: it owns policy (e.g. interaction pacing)
|
|
26
|
+
and injects it into the session — the session/action layers read no config.
|
|
27
|
+
"""
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import argparse
|
|
31
|
+
import json
|
|
32
|
+
import logging
|
|
33
|
+
import os
|
|
34
|
+
import signal
|
|
35
|
+
import sys
|
|
36
|
+
|
|
37
|
+
from linkedin_cli.enums import ProfileState
|
|
38
|
+
from linkedin_cli.exceptions import (
|
|
39
|
+
AuthenticationError,
|
|
40
|
+
CheckpointChallengeError,
|
|
41
|
+
ProfileInaccessibleError,
|
|
42
|
+
ReachedConnectionLimit,
|
|
43
|
+
SkipProfile,
|
|
44
|
+
)
|
|
45
|
+
from linkedin_cli.session import PlaywrightCliSession, linkedin_cli_home, read_session
|
|
46
|
+
from linkedin_cli.url_utils import public_id_to_url, url_to_public_id
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger("linkedin_cli")
|
|
49
|
+
|
|
50
|
+
# Pacing policy lives here (the composition root), injected into the session.
|
|
51
|
+
DEFAULT_MIN_PACE_S = 5.0
|
|
52
|
+
DEFAULT_MAX_PACE_S = 8.0
|
|
53
|
+
|
|
54
|
+
# Exception → contract error `type`, in match order.
|
|
55
|
+
_ERROR_TYPES = [
|
|
56
|
+
(CheckpointChallengeError, "checkpoint_challenge"),
|
|
57
|
+
(AuthenticationError, "authentication"),
|
|
58
|
+
(ProfileInaccessibleError, "profile_inaccessible"),
|
|
59
|
+
(SkipProfile, "skip_profile"),
|
|
60
|
+
(ReachedConnectionLimit, "connection_limit"),
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ── output helpers ─────────────────────────────────────────────────
|
|
65
|
+
|
|
66
|
+
def _out(text: str) -> None:
|
|
67
|
+
"""Print a result line to stdout (the only thing that touches stdout)."""
|
|
68
|
+
sys.stdout.write(f"{text}\n")
|
|
69
|
+
sys.stdout.flush()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _err(text: str) -> None:
|
|
73
|
+
"""Print a log/error line to stderr."""
|
|
74
|
+
print(text, file=sys.stderr)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _error_type(exc: Exception) -> str | None:
|
|
78
|
+
for cls, name in _ERROR_TYPES:
|
|
79
|
+
if isinstance(exc, cls):
|
|
80
|
+
return name
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _self_block(profile: dict) -> dict:
|
|
85
|
+
return {
|
|
86
|
+
"public_identifier": profile.get("public_identifier"),
|
|
87
|
+
"urn": profile.get("urn"),
|
|
88
|
+
"full_name": profile.get("full_name"),
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ── human-readable rendering (the non-`--json` default) ─────────────
|
|
93
|
+
#
|
|
94
|
+
# clig.dev: "keep it brief", "err toward less output". Each verb gets a short,
|
|
95
|
+
# scannable summary of its result dict; `--json` always emits the full dict.
|
|
96
|
+
|
|
97
|
+
def _human_identity(result: dict) -> str:
|
|
98
|
+
member = result.get("self", result)
|
|
99
|
+
return f"{member.get('full_name')} ({member.get('public_identifier')})"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _human_state(result: dict) -> str:
|
|
103
|
+
return result.get("state", "")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _human_sent(result: dict) -> str:
|
|
107
|
+
return "sent" if result.get("sent") else "not sent"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _human_profile(result: dict) -> str:
|
|
111
|
+
industry = result.get("industry") or {}
|
|
112
|
+
subtitle = " · ".join(x for x in (
|
|
113
|
+
result.get("location_name"),
|
|
114
|
+
industry.get("name") if isinstance(industry, dict) else None,
|
|
115
|
+
) if x)
|
|
116
|
+
lines = [" — ".join(x for x in (result.get("full_name"), result.get("headline")) if x)]
|
|
117
|
+
if subtitle:
|
|
118
|
+
lines.append(subtitle)
|
|
119
|
+
lines.append(f"{len(result.get('positions') or [])} positions · "
|
|
120
|
+
f"{len(result.get('educations') or [])} schools")
|
|
121
|
+
lines.append("(--json for the full record)")
|
|
122
|
+
return "\n".join(lines)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _human_thread(result: dict) -> str:
|
|
126
|
+
messages = result.get("messages")
|
|
127
|
+
if not messages:
|
|
128
|
+
return "(no conversation)"
|
|
129
|
+
return "\n".join(
|
|
130
|
+
f"{m.get('timestamp', '')} {m.get('sender', '')}: {m.get('text', '')}"
|
|
131
|
+
for m in messages
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _human_search(result: dict) -> str:
|
|
136
|
+
profiles = result.get("profiles") or []
|
|
137
|
+
if not profiles:
|
|
138
|
+
return "(no results)"
|
|
139
|
+
header = f"{len(profiles)} result(s) on page {result.get('page', 1)}:"
|
|
140
|
+
return "\n".join([header] + [f" {p['public_identifier']}" for p in profiles])
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _human_closed(result: dict) -> str:
|
|
144
|
+
return f"closed {result.get('name')}"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
_HUMAN = {
|
|
148
|
+
"login": _human_identity,
|
|
149
|
+
"whoami": _human_identity,
|
|
150
|
+
"status": _human_state,
|
|
151
|
+
"connect": _human_state,
|
|
152
|
+
"message": _human_sent,
|
|
153
|
+
"profile": _human_profile,
|
|
154
|
+
"thread": _human_thread,
|
|
155
|
+
"search": _human_search,
|
|
156
|
+
"session-close": _human_closed,
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _render(command: str, result: dict, as_json: bool) -> None:
|
|
161
|
+
"""Print *result*: the full dict as JSON if ``--json``, else a brief summary."""
|
|
162
|
+
if as_json:
|
|
163
|
+
_out(json.dumps(result, ensure_ascii=False, default=str))
|
|
164
|
+
return
|
|
165
|
+
renderer = _HUMAN.get(command)
|
|
166
|
+
_out(renderer(result) if renderer
|
|
167
|
+
else "\n".join(f"{k}: {v}" for k, v in result.items()))
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _handle_to_profile(handle: str) -> dict:
|
|
171
|
+
"""Build a minimal ``{public_identifier, url}`` from a <url|id> handle."""
|
|
172
|
+
public_id = url_to_public_id(handle) if "/" in handle else handle
|
|
173
|
+
if not public_id:
|
|
174
|
+
raise ValueError(f"Could not resolve a public identifier from {handle!r}")
|
|
175
|
+
return {"public_identifier": public_id, "url": public_id_to_url(public_id)}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _scrape(session, handle: str) -> dict:
|
|
179
|
+
"""Scrape the target so urn-dependent verbs (message/thread) have its ``urn``."""
|
|
180
|
+
from linkedin_cli.actions.profile import scrape_profile
|
|
181
|
+
|
|
182
|
+
profile, _data = scrape_profile(session, _handle_to_profile(handle))
|
|
183
|
+
if not profile:
|
|
184
|
+
raise ProfileInaccessibleError(handle)
|
|
185
|
+
return profile
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# ── verbs ──────────────────────────────────────────────────────────
|
|
189
|
+
|
|
190
|
+
def _verb_login(session, args) -> dict:
|
|
191
|
+
from linkedin_cli.auth import authenticate
|
|
192
|
+
|
|
193
|
+
authenticate(session)
|
|
194
|
+
return {"account": args.name, "self": _self_block(session.self_profile)}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _verb_whoami(session, args) -> dict:
|
|
198
|
+
return {"self": _self_block(session.self_profile)}
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _verb_profile(session, args) -> dict:
|
|
202
|
+
from linkedin_cli.actions.profile import scrape_profile
|
|
203
|
+
|
|
204
|
+
profile, data = scrape_profile(session, _handle_to_profile(args.handle))
|
|
205
|
+
if not profile:
|
|
206
|
+
raise ProfileInaccessibleError(args.handle)
|
|
207
|
+
out = dict(profile)
|
|
208
|
+
if args.raw:
|
|
209
|
+
out["_raw"] = data
|
|
210
|
+
return out
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _verb_status(session, args) -> dict:
|
|
214
|
+
from linkedin_cli.actions.status import get_connection_status
|
|
215
|
+
|
|
216
|
+
profile = _handle_to_profile(args.handle)
|
|
217
|
+
state = get_connection_status(session, profile)
|
|
218
|
+
return {"public_identifier": profile["public_identifier"], "state": state.value}
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _verb_connect(session, args) -> dict:
|
|
222
|
+
from linkedin_cli.actions.connect import send_connection_request
|
|
223
|
+
from linkedin_cli.actions.status import get_connection_status
|
|
224
|
+
|
|
225
|
+
profile = _handle_to_profile(args.handle)
|
|
226
|
+
state = get_connection_status(session, profile)
|
|
227
|
+
if state not in (ProfileState.CONNECTED, ProfileState.PENDING):
|
|
228
|
+
state = send_connection_request(session, profile)
|
|
229
|
+
return {"public_identifier": profile["public_identifier"], "state": state.value}
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _verb_message(session, args) -> dict:
|
|
233
|
+
from linkedin_cli.actions.message import send_raw_message
|
|
234
|
+
|
|
235
|
+
profile = _scrape(session, args.handle)
|
|
236
|
+
sent = send_raw_message(session, profile, args.text)
|
|
237
|
+
return {"public_identifier": profile.get("public_identifier"), "sent": sent}
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _verb_thread(session, args) -> dict:
|
|
241
|
+
from linkedin_cli.actions.conversations import get_conversation
|
|
242
|
+
|
|
243
|
+
profile = _scrape(session, args.handle)
|
|
244
|
+
messages = get_conversation(session, profile.get("urn"), session.self_profile["urn"])
|
|
245
|
+
return {"public_identifier": profile.get("public_identifier"), "messages": messages}
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _verb_search(session, args) -> dict:
|
|
249
|
+
from linkedin_cli.actions.search import NETWORK_CODES, search_people
|
|
250
|
+
|
|
251
|
+
codes = [NETWORK_CODES[n] for n in (args.network or [])]
|
|
252
|
+
return search_people(session, args.keywords, page=args.page, network=codes or None)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
_VERBS = {
|
|
256
|
+
"login": _verb_login,
|
|
257
|
+
"whoami": _verb_whoami,
|
|
258
|
+
"profile": _verb_profile,
|
|
259
|
+
"status": _verb_status,
|
|
260
|
+
"connect": _verb_connect,
|
|
261
|
+
"message": _verb_message,
|
|
262
|
+
"thread": _verb_thread,
|
|
263
|
+
"search": _verb_search,
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
# ── session lifecycle commands ─────────────────────────────────────
|
|
268
|
+
|
|
269
|
+
def _cmd_session_open(args) -> int:
|
|
270
|
+
from linkedin_cli.launcher import open_bound_session
|
|
271
|
+
|
|
272
|
+
profile_dir = str(linkedin_cli_home() / "profiles" / args.name)
|
|
273
|
+
open_bound_session(args.name, profile_dir=profile_dir)
|
|
274
|
+
return 0
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _cmd_session_close(args) -> int:
|
|
278
|
+
record = read_session(args.name)
|
|
279
|
+
if not record:
|
|
280
|
+
_err(f"error: usage: no open session named {args.name!r}")
|
|
281
|
+
return 2
|
|
282
|
+
os.kill(record["pid"], signal.SIGTERM)
|
|
283
|
+
_render("session-close", {"name": args.name, "closed": True}, args.json)
|
|
284
|
+
return 0
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# ── verb runner ────────────────────────────────────────────────────
|
|
288
|
+
|
|
289
|
+
def _run_verb(args) -> int:
|
|
290
|
+
record = read_session(args.name)
|
|
291
|
+
if not record:
|
|
292
|
+
_err(f"error: usage: no open session named {args.name!r} — run "
|
|
293
|
+
f"`linkedin-cli session open --session {args.name}`")
|
|
294
|
+
return 2
|
|
295
|
+
|
|
296
|
+
session = PlaywrightCliSession(
|
|
297
|
+
record["endpoint"],
|
|
298
|
+
min_pace=DEFAULT_MIN_PACE_S,
|
|
299
|
+
max_pace=DEFAULT_MAX_PACE_S,
|
|
300
|
+
username=os.environ.get("LINKEDIN_USERNAME"),
|
|
301
|
+
password=os.environ.get("LINKEDIN_PASSWORD"),
|
|
302
|
+
name=args.name,
|
|
303
|
+
)
|
|
304
|
+
try:
|
|
305
|
+
session.ensure_browser()
|
|
306
|
+
_render(args.verb, _VERBS[args.verb](session, args), args.json)
|
|
307
|
+
return 0
|
|
308
|
+
except Exception as exc: # noqa: BLE001 — map known errors, re-raise the rest
|
|
309
|
+
error_type = _error_type(exc)
|
|
310
|
+
if error_type is None:
|
|
311
|
+
raise
|
|
312
|
+
_err(f"error: {error_type}: {exc}")
|
|
313
|
+
return 1
|
|
314
|
+
finally:
|
|
315
|
+
session.close()
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
# ── parser ─────────────────────────────────────────────────────────
|
|
319
|
+
|
|
320
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
321
|
+
common = argparse.ArgumentParser(add_help=False)
|
|
322
|
+
common.add_argument(
|
|
323
|
+
"--session", "--name", dest="name",
|
|
324
|
+
default=os.environ.get("LINKEDIN_CLI_SESSION", "default"),
|
|
325
|
+
help="Bound session name (default: $LINKEDIN_CLI_SESSION or 'default')",
|
|
326
|
+
)
|
|
327
|
+
common.add_argument(
|
|
328
|
+
"--json", action="store_true",
|
|
329
|
+
help="Emit the full result as JSON instead of a human-readable summary",
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
parser = argparse.ArgumentParser(prog="linkedin-cli", description=__doc__)
|
|
333
|
+
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
334
|
+
|
|
335
|
+
# session open / close
|
|
336
|
+
session_cmd = sub.add_parser("session", help="Manage the bound browser session")
|
|
337
|
+
session_sub = session_cmd.add_subparsers(dest="subcmd", required=True)
|
|
338
|
+
session_sub.add_parser("open", parents=[common], help="Launch + bind a persistent browser, then block")
|
|
339
|
+
session_sub.add_parser("close", parents=[common], help="Signal the session launcher to shut down")
|
|
340
|
+
|
|
341
|
+
# verbs
|
|
342
|
+
handle_help = "Profile URL or public identifier (e.g. alice-smith)"
|
|
343
|
+
|
|
344
|
+
sub.add_parser("login", parents=[common],
|
|
345
|
+
help="Log the session in (fill the form, clear a checkpoint) and report the logged-in member")
|
|
346
|
+
sub.add_parser("whoami", parents=[common],
|
|
347
|
+
help="Report who the session is logged in as — no login, no checkpoint")
|
|
348
|
+
|
|
349
|
+
p_profile = sub.add_parser("profile", parents=[common],
|
|
350
|
+
help="Scrape a member's full profile: headline, positions, education, location")
|
|
351
|
+
p_profile.add_argument("handle", help=handle_help)
|
|
352
|
+
p_profile.add_argument("--raw", action="store_true", help="Also emit the untouched Voyager blob under _raw")
|
|
353
|
+
|
|
354
|
+
sub.add_parser("status", parents=[common],
|
|
355
|
+
help="Report the connection state with the member: Connected, Pending, or Qualified"
|
|
356
|
+
).add_argument("handle", help=handle_help)
|
|
357
|
+
sub.add_parser("connect", parents=[common],
|
|
358
|
+
help="Send a connection request (no note); no-op if already Connected or Pending"
|
|
359
|
+
).add_argument("handle", help=handle_help)
|
|
360
|
+
sub.add_parser("thread", parents=[common],
|
|
361
|
+
help="Dump the conversation with the member as a list of messages (newest last)"
|
|
362
|
+
).add_argument("handle", help=handle_help)
|
|
363
|
+
|
|
364
|
+
p_message = sub.add_parser("message", parents=[common],
|
|
365
|
+
help="Send a direct message to the member")
|
|
366
|
+
p_message.add_argument("handle", help=handle_help)
|
|
367
|
+
p_message.add_argument("--text", required=True, help="Message body to send")
|
|
368
|
+
|
|
369
|
+
p_search = sub.add_parser("search", parents=[common],
|
|
370
|
+
help="Search People by keyword; list matching profile handles")
|
|
371
|
+
p_search.add_argument("keywords", help="Search keywords, e.g. 'San Francisco'")
|
|
372
|
+
p_search.add_argument("--network", action="append", choices=["first", "second", "third"],
|
|
373
|
+
help="Filter by connection degree (repeatable): first / second / third")
|
|
374
|
+
p_search.add_argument("--page", type=int, default=1, help="Result page (default: 1)")
|
|
375
|
+
return parser
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def _configure_logging() -> None:
|
|
379
|
+
level = os.environ.get("LINKEDIN_CLI_LOG", "INFO").upper()
|
|
380
|
+
logging.basicConfig(level=level, stream=sys.stderr,
|
|
381
|
+
format="%(asctime)s %(levelname)s %(name)s: %(message)s")
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def main(argv=None) -> int:
|
|
385
|
+
args = build_parser().parse_args(argv)
|
|
386
|
+
_configure_logging()
|
|
387
|
+
|
|
388
|
+
if args.cmd == "session":
|
|
389
|
+
return _cmd_session_open(args) if args.subcmd == "open" else _cmd_session_close(args)
|
|
390
|
+
|
|
391
|
+
args.verb = args.cmd
|
|
392
|
+
return _run_verb(args)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
if __name__ == "__main__":
|
|
396
|
+
raise SystemExit(main())
|
linkedin_cli/conf.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Platform constants for the LinkedIn interaction layer.
|
|
2
|
+
|
|
3
|
+
Browser timing/launch knobs and fixture paths — no campaign, CRM, or
|
|
4
|
+
scheduling config (that stays in OpenOutreach's ``linkedin/conf.py``).
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
ROOT_DIR = Path(__file__).parent.parent
|
|
11
|
+
|
|
12
|
+
# ----------------------------------------------------------------------
|
|
13
|
+
# Fixture paths (saved HTML pages + profile JSON for tests/fixture capture)
|
|
14
|
+
# ----------------------------------------------------------------------
|
|
15
|
+
FIXTURE_DIR = ROOT_DIR / "tests" / "fixtures"
|
|
16
|
+
FIXTURE_PROFILES_DIR = FIXTURE_DIR / "profiles"
|
|
17
|
+
FIXTURE_PAGES_DIR = FIXTURE_DIR / "pages"
|
|
18
|
+
DUMP_PAGES = False
|
|
19
|
+
|
|
20
|
+
# ----------------------------------------------------------------------
|
|
21
|
+
# Browser config
|
|
22
|
+
# ----------------------------------------------------------------------
|
|
23
|
+
BROWSER_HEADLESS = False # LinkedIn runs headed (under Xvfb in Docker)
|
|
24
|
+
BROWSER_SLOW_MO = 200
|
|
25
|
+
BROWSER_DEFAULT_TIMEOUT_MS = 30_000
|
|
26
|
+
BROWSER_LOGIN_TIMEOUT_MS = 40_000
|
|
27
|
+
BROWSER_NAV_TIMEOUT_MS = 10_000
|
|
28
|
+
HUMAN_TYPE_MIN_DELAY_MS = 50
|
|
29
|
+
HUMAN_TYPE_MAX_DELAY_MS = 200
|
|
30
|
+
|
|
31
|
+
# Seconds to wait for the user to clear a LinkedIn security checkpoint in the
|
|
32
|
+
# live browser (noVNC http://localhost:6080/vnc.html) before the daemon exits.
|
|
33
|
+
CHECKPOINT_RESOLVE_TIMEOUT_S = 1800
|