linkedin-agent-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkedin_agent_cli-0.1.0.dist-info/METADATA +197 -0
- linkedin_agent_cli-0.1.0.dist-info/RECORD +34 -0
- linkedin_agent_cli-0.1.0.dist-info/WHEEL +4 -0
- linkedin_agent_cli-0.1.0.dist-info/entry_points.txt +2 -0
- linkedin_agent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- linkedin_cli/__init__.py +9 -0
- linkedin_cli/actions/__init__.py +0 -0
- linkedin_cli/actions/connect.py +118 -0
- linkedin_cli/actions/conversations.py +132 -0
- linkedin_cli/actions/message.py +153 -0
- linkedin_cli/actions/profile.py +22 -0
- linkedin_cli/actions/search.py +186 -0
- linkedin_cli/actions/status.py +112 -0
- linkedin_cli/api/__init__.py +0 -0
- linkedin_cli/api/client.py +182 -0
- linkedin_cli/api/messaging/__init__.py +11 -0
- linkedin_cli/api/messaging/conversations.py +56 -0
- linkedin_cli/api/messaging/send.py +74 -0
- linkedin_cli/api/messaging/utils.py +24 -0
- linkedin_cli/api/voyager.py +319 -0
- linkedin_cli/auth.py +98 -0
- linkedin_cli/browser/__init__.py +0 -0
- linkedin_cli/browser/login.py +140 -0
- linkedin_cli/browser/nav.py +115 -0
- linkedin_cli/cli.py +396 -0
- linkedin_cli/conf.py +33 -0
- linkedin_cli/enums.py +11 -0
- linkedin_cli/exceptions.py +47 -0
- linkedin_cli/launcher.py +60 -0
- linkedin_cli/page_state.py +148 -0
- linkedin_cli/session.py +169 -0
- linkedin_cli/setup/__init__.py +0 -0
- linkedin_cli/setup/self_profile.py +25 -0
- linkedin_cli/url_utils.py +30 -0
linkedin_cli/enums.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
class AuthenticationError(Exception):
|
|
2
|
+
"""Custom exception for 401 Unauthorized errors."""
|
|
3
|
+
pass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TerminalStateError(Exception):
|
|
7
|
+
"""Profile is already done or dead — caller must skip it"""
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SkipProfile(Exception):
|
|
12
|
+
"""Profile must be skipped."""
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ProfileInaccessibleError(Exception):
|
|
17
|
+
"""Profile is private, deleted, or restricted (HTTP 403/404)."""
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ReachedConnectionLimit(Exception):
|
|
22
|
+
""" Weekly connection limit reached. """
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class IllegalPageTransition(Exception):
|
|
27
|
+
"""An action ran from, or produced, a page state its @transition contract forbids.
|
|
28
|
+
|
|
29
|
+
Raised by the ``transition`` decorator (see ``page_state``) when the live
|
|
30
|
+
page violates the action's declared precondition (``when``) or postcondition
|
|
31
|
+
(``then``) — e.g. submitting credentials and landing back on the login page
|
|
32
|
+
(rejected creds) instead of the feed or a checkpoint.
|
|
33
|
+
"""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CheckpointChallengeError(Exception):
|
|
38
|
+
"""LinkedIn flagged the account with a security checkpoint.
|
|
39
|
+
|
|
40
|
+
Carries the challenge URL so the user knows where to go to clear it.
|
|
41
|
+
Raised from the login flow; the daemon must NOT call reauthenticate()
|
|
42
|
+
when it sees this — that just hardens the block.
|
|
43
|
+
"""
|
|
44
|
+
def __init__(self, url: str):
|
|
45
|
+
self.url = url
|
|
46
|
+
super().__init__(f"LinkedIn checkpoint challenge: {url}")
|
|
47
|
+
|
linkedin_cli/launcher.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Launch the persistent, bound LinkedIn browser that verb processes connect to.
|
|
2
|
+
|
|
3
|
+
This is the session *owner*: it launches a persistent browser (auth/cookies live
|
|
4
|
+
in its on-disk profile), `browser.bind()`s it to a websocket, records the endpoint
|
|
5
|
+
in the session registry, and stays alive. Verb processes attach as clients via
|
|
6
|
+
``PlaywrightCliSession``; `playwright-cli attach <name>` can attach too (e.g. for a
|
|
7
|
+
human to clear a checkpoint in the live browser).
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import signal
|
|
14
|
+
|
|
15
|
+
from playwright.sync_api import sync_playwright
|
|
16
|
+
from playwright_stealth import Stealth
|
|
17
|
+
|
|
18
|
+
from linkedin_cli.conf import BROWSER_DEFAULT_TIMEOUT_MS, BROWSER_HEADLESS, BROWSER_SLOW_MO
|
|
19
|
+
from linkedin_cli.session import clear_session, write_session
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
LINKEDIN_FEED_URL = "https://www.linkedin.com/feed/"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def open_bound_session(name: str, *, profile_dir: str,
|
|
27
|
+
host: str = "127.0.0.1", port: int = 0) -> None:
|
|
28
|
+
"""Launch a persistent browser, bind it, register the endpoint, and block.
|
|
29
|
+
|
|
30
|
+
Runs until interrupted (SIGINT/SIGTERM), then deregisters and closes the
|
|
31
|
+
browser. The websocket endpoint is also printed to stdout for convenience.
|
|
32
|
+
Browser-launch knobs (headed/slow-mo/timeouts) come from ``conf``; ``host``/
|
|
33
|
+
``port`` default to a localhost OS-picked port (right for many sessions in
|
|
34
|
+
one container — no cross-container exposure needed).
|
|
35
|
+
"""
|
|
36
|
+
os.makedirs(profile_dir, exist_ok=True)
|
|
37
|
+
with sync_playwright() as pw:
|
|
38
|
+
context = pw.chromium.launch_persistent_context(
|
|
39
|
+
profile_dir, headless=BROWSER_HEADLESS, slow_mo=BROWSER_SLOW_MO,
|
|
40
|
+
)
|
|
41
|
+
context.set_default_timeout(BROWSER_DEFAULT_TIMEOUT_MS)
|
|
42
|
+
context.set_default_navigation_timeout(BROWSER_DEFAULT_TIMEOUT_MS)
|
|
43
|
+
Stealth().apply_stealth_sync(context)
|
|
44
|
+
|
|
45
|
+
endpoint = context.browser.bind(name, host=host, port=port)["endpoint"]
|
|
46
|
+
page = context.pages[0] if context.pages else context.new_page()
|
|
47
|
+
page.goto(LINKEDIN_FEED_URL)
|
|
48
|
+
|
|
49
|
+
write_session(name, endpoint, os.getpid())
|
|
50
|
+
logger.info("Session %r bound at %s (profile=%s)", name, endpoint, profile_dir)
|
|
51
|
+
print(endpoint, flush=True)
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
signal.pause() # block until a termination signal
|
|
55
|
+
except (KeyboardInterrupt, SystemExit):
|
|
56
|
+
pass
|
|
57
|
+
finally:
|
|
58
|
+
clear_session(name)
|
|
59
|
+
context.close()
|
|
60
|
+
logger.info("Session %r closed", name)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Classify the live LinkedIn page into a :class:`PageState`.
|
|
2
|
+
|
|
3
|
+
The browser is the source of truth: LinkedIn can bounce us to a login, an
|
|
4
|
+
authwall, or a checkpoint at any moment, so control loops re-read the page
|
|
5
|
+
rather than trust a remembered state. This module is that single, pure
|
|
6
|
+
classifier. It reads only the URL *path* — never the query string, whose
|
|
7
|
+
``?session_redirect=…%2Ffeed%2F`` once fooled a whole-URL substring check into
|
|
8
|
+
thinking an unauthenticated login page was the feed.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import functools
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from urllib.parse import urlsplit
|
|
15
|
+
|
|
16
|
+
from playwright.sync_api import Page
|
|
17
|
+
|
|
18
|
+
from linkedin_cli.exceptions import IllegalPageTransition
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PageState(str, Enum):
|
|
22
|
+
"""Where the browser currently is. Values match the auth machine's state ids."""
|
|
23
|
+
|
|
24
|
+
CHECKPOINT = "checkpoint"
|
|
25
|
+
LOGIN = "login"
|
|
26
|
+
AUTHWALL = "authwall"
|
|
27
|
+
FEED = "feed"
|
|
28
|
+
PROFILE = "profile"
|
|
29
|
+
MESSAGING = "messaging"
|
|
30
|
+
NOT_FOUND = "not_found"
|
|
31
|
+
UNKNOWN = "unknown"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Path prefix → state, in match order. Checkpoint is first: it can surface under
|
|
35
|
+
# any flow and must win over whatever path it decorates.
|
|
36
|
+
_ROUTES: list[tuple[str, PageState]] = [
|
|
37
|
+
("/checkpoint", PageState.CHECKPOINT),
|
|
38
|
+
("/login", PageState.LOGIN),
|
|
39
|
+
("/authwall", PageState.AUTHWALL),
|
|
40
|
+
("/feed", PageState.FEED),
|
|
41
|
+
("/in/", PageState.PROFILE),
|
|
42
|
+
("/messaging", PageState.MESSAGING),
|
|
43
|
+
("/404", PageState.NOT_FOUND),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def classify_page(page: Page) -> PageState:
|
|
48
|
+
"""Return the :class:`PageState` of the live page, judged by URL path only."""
|
|
49
|
+
path = urlsplit(page.url).path
|
|
50
|
+
for prefix, state in _ROUTES:
|
|
51
|
+
if path.startswith(prefix):
|
|
52
|
+
return state
|
|
53
|
+
return PageState.UNKNOWN
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def transition(*, when: PageState, then: PageState | set[PageState]):
|
|
57
|
+
"""Declare a page-state transition as a contract on the action that performs it.
|
|
58
|
+
|
|
59
|
+
The decorated action takes a session (anything exposing a live ``page``) and
|
|
60
|
+
drives the browser. The wrapper enforces, against the *live* page:
|
|
61
|
+
|
|
62
|
+
- **precondition** — the page must be in ``when`` before the action runs;
|
|
63
|
+
- **postcondition** — the action must leave the page in one of ``then``.
|
|
64
|
+
|
|
65
|
+
Either violation raises :class:`IllegalPageTransition`. Enforcing the
|
|
66
|
+
postcondition *after* the action (re-reading the page) is what a held-state
|
|
67
|
+
FSM cannot do: the destination is observed, not declared up front, and may be
|
|
68
|
+
one of several (login → feed *or* checkpoint). Returns the resulting state.
|
|
69
|
+
|
|
70
|
+
The action's contract is introspectable as ``fn.when`` / ``fn.then`` so a
|
|
71
|
+
driver can build its dispatch table from the decorated actions themselves.
|
|
72
|
+
"""
|
|
73
|
+
targets = frozenset({then} if isinstance(then, PageState) else then)
|
|
74
|
+
|
|
75
|
+
def decorator(fn):
|
|
76
|
+
@functools.wraps(fn)
|
|
77
|
+
def wrapper(session, *args, **kwargs) -> PageState:
|
|
78
|
+
before = classify_page(session.page)
|
|
79
|
+
if before is not when:
|
|
80
|
+
raise IllegalPageTransition(
|
|
81
|
+
f"{fn.__name__}() requires page state {when.value!r}, "
|
|
82
|
+
f"but page is {before.value!r} ({session.page.url})"
|
|
83
|
+
)
|
|
84
|
+
fn(session, *args, **kwargs)
|
|
85
|
+
after = classify_page(session.page)
|
|
86
|
+
if after not in targets:
|
|
87
|
+
expected = sorted(t.value for t in targets)
|
|
88
|
+
raise IllegalPageTransition(
|
|
89
|
+
f"{fn.__name__}() from {when.value!r} produced {after.value!r}; "
|
|
90
|
+
f"expected one of {expected} ({session.page.url})"
|
|
91
|
+
)
|
|
92
|
+
return after
|
|
93
|
+
|
|
94
|
+
wrapper.when = when
|
|
95
|
+
wrapper.then = targets
|
|
96
|
+
return wrapper
|
|
97
|
+
|
|
98
|
+
return decorator
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class PageFlow:
|
|
102
|
+
"""A page-state flow: a set of ``@transition`` actions plus one generic driver.
|
|
103
|
+
|
|
104
|
+
Declare a flow with a goal state, then attach its transitions as decorated
|
|
105
|
+
actions — each registers under its precondition (``when``). :meth:`run` is the
|
|
106
|
+
observe→act loop, written once for every flow: re-read the live page, dispatch
|
|
107
|
+
to the action for that state, repeat until the goal. There is no per-flow loop
|
|
108
|
+
and no hand-built dispatch table — a flow *is* its annotated transitions.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(self, name: str, *, goal: PageState):
|
|
112
|
+
self.name = name
|
|
113
|
+
self.goal = goal
|
|
114
|
+
self._actions: dict[PageState, object] = {}
|
|
115
|
+
|
|
116
|
+
def transition(self, *, when: PageState, then: PageState | set[PageState]):
|
|
117
|
+
"""Decorator: enforce the action's contract (via :func:`transition`) and
|
|
118
|
+
register it under ``when`` so :meth:`run` can dispatch to it."""
|
|
119
|
+
contract = transition(when=when, then=then) # the module-level contract decorator
|
|
120
|
+
|
|
121
|
+
def register(fn):
|
|
122
|
+
if when in self._actions:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
f"{self.name!r} flow already has a transition from {when.value!r}"
|
|
125
|
+
)
|
|
126
|
+
self._actions[when] = contract(fn)
|
|
127
|
+
return self._actions[when]
|
|
128
|
+
|
|
129
|
+
return register
|
|
130
|
+
|
|
131
|
+
def run(self, session, *, max_hops: int = 8) -> PageState:
|
|
132
|
+
"""Drive *session* to :attr:`goal`. Raise :class:`IllegalPageTransition`
|
|
133
|
+
if a page has no registered action or the goal isn't reached in time."""
|
|
134
|
+
for _ in range(max_hops):
|
|
135
|
+
state = classify_page(session.page)
|
|
136
|
+
if state is self.goal:
|
|
137
|
+
return state
|
|
138
|
+
action = self._actions.get(state)
|
|
139
|
+
if action is None:
|
|
140
|
+
raise IllegalPageTransition(
|
|
141
|
+
f"{self.name!r} flow: no transition from {state.value!r} "
|
|
142
|
+
f"({session.page.url})"
|
|
143
|
+
)
|
|
144
|
+
action(session)
|
|
145
|
+
raise IllegalPageTransition(
|
|
146
|
+
f"{self.name!r} flow: did not reach {self.goal.value!r} within "
|
|
147
|
+
f"{max_hops} hops (stuck at {classify_page(session.page).value!r})"
|
|
148
|
+
)
|
linkedin_cli/session.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""The session contract every linkedin_cli verb runs against.
|
|
2
|
+
|
|
3
|
+
linkedin_cli owns no browser lifecycle and no persistence. Each verb is handed
|
|
4
|
+
a *session* — an object that exposes a live Playwright page/context plus a few
|
|
5
|
+
lifecycle hooks — and drives LinkedIn through it. The concrete session is the
|
|
6
|
+
caller's job: OpenOutreach's daemon backs it with its Django ``AccountSession``;
|
|
7
|
+
the standalone CLI backs it with a Playwright CLI session adapter.
|
|
8
|
+
|
|
9
|
+
``LinkedInSession`` is the typed boundary between the two — it lists exactly what
|
|
10
|
+
the platform code touches, and nothing about campaigns, leads, or the DB.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
import os
|
|
17
|
+
import random
|
|
18
|
+
import time
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Protocol, runtime_checkable
|
|
21
|
+
|
|
22
|
+
from playwright.sync_api import BrowserContext, Page, sync_playwright
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@runtime_checkable
|
|
28
|
+
class LinkedInSession(Protocol):
|
|
29
|
+
"""Browser session a linkedin_cli verb attaches to.
|
|
30
|
+
|
|
31
|
+
Implementations own browser launch, the persistent profile, auth/cookies,
|
|
32
|
+
and fingerprint — none of which live here. The verbs only ever read
|
|
33
|
+
``page``/``context``, resolve their own identity via ``self_profile``, and
|
|
34
|
+
call the lifecycle hooks below.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
#: Live Playwright page for the authenticated session.
|
|
38
|
+
page: Page
|
|
39
|
+
#: Browser context owning the page (cookies, response listeners, storage).
|
|
40
|
+
context: BrowserContext
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def self_profile(self) -> dict:
|
|
44
|
+
"""The logged-in member's own profile dict (the messaging mailbox).
|
|
45
|
+
|
|
46
|
+
Resolved once and kept warm for the session; carries at least
|
|
47
|
+
``urn``, ``first_name``, ``last_name``.
|
|
48
|
+
"""
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
def ensure_browser(self) -> None:
|
|
52
|
+
"""Launch or recover the browser so ``page`` is usable. Idempotent."""
|
|
53
|
+
...
|
|
54
|
+
|
|
55
|
+
def wait(self, min_delay: float = ..., max_delay: float = ...) -> None:
|
|
56
|
+
"""Human-paced pause, then block until the page reaches DOM-ready."""
|
|
57
|
+
...
|
|
58
|
+
|
|
59
|
+
def close(self) -> None:
|
|
60
|
+
"""Release browser resources held by the session."""
|
|
61
|
+
...
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ── Session registry ──────────────────────────────────────────────
|
|
65
|
+
#
|
|
66
|
+
# The launcher (``linkedin-cli session open``) owns the bound browser and
|
|
67
|
+
# records its websocket endpoint here; verb processes look it up by name. This
|
|
68
|
+
# is the only on-disk state linkedin_cli keeps — a pointer to a running browser,
|
|
69
|
+
# not auth/cookies (those live in the launcher's persistent profile).
|
|
70
|
+
|
|
71
|
+
def linkedin_cli_home() -> Path:
|
|
72
|
+
"""Root dir for linkedin-cli's on-disk state (override via $LINKEDIN_CLI_HOME)."""
|
|
73
|
+
return Path(os.environ.get("LINKEDIN_CLI_HOME") or Path.home() / ".linkedin-cli")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _sessions_dir() -> Path:
|
|
77
|
+
return linkedin_cli_home() / "sessions"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _session_file(name: str) -> Path:
|
|
81
|
+
return _sessions_dir() / f"{name}.json"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def write_session(name: str, endpoint: str, pid: int) -> Path:
|
|
85
|
+
"""Record a bound browser's endpoint + launcher pid under *name* (atomic)."""
|
|
86
|
+
path = _session_file(name)
|
|
87
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
88
|
+
tmp = path.with_suffix(".json.tmp")
|
|
89
|
+
tmp.write_text(json.dumps({"name": name, "endpoint": endpoint, "pid": pid}))
|
|
90
|
+
tmp.replace(path)
|
|
91
|
+
return path
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def read_session(name: str) -> dict | None:
|
|
95
|
+
"""Return the recorded ``{name, endpoint, pid}`` for *name*, or None."""
|
|
96
|
+
path = _session_file(name)
|
|
97
|
+
if not path.exists():
|
|
98
|
+
return None
|
|
99
|
+
return json.loads(path.read_text())
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def clear_session(name: str) -> None:
|
|
103
|
+
"""Remove the registry entry for *name* if present."""
|
|
104
|
+
_session_file(name).unlink(missing_ok=True)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# ── Playwright-CLI-backed session (connect to a bound browser) ─────
|
|
108
|
+
|
|
109
|
+
class PlaywrightCliSession:
|
|
110
|
+
"""A `LinkedInSession` that drives a launcher-owned bound browser over `connect`.
|
|
111
|
+
|
|
112
|
+
The launcher (`linkedin-cli session open`) launches the persistent browser
|
|
113
|
+
and `browser.bind()`s it; this attaches with `chromium.connect(endpoint)`,
|
|
114
|
+
yielding a real `page`/`context` shared with the launcher (and with any
|
|
115
|
+
`playwright-cli attach`). It owns no browser lifecycle and no persistence —
|
|
116
|
+
`close()` only disconnects this client; the launcher's browser keeps running.
|
|
117
|
+
|
|
118
|
+
Pacing (``min_pace``/``max_pace``) is injected by the caller (the CLI), not
|
|
119
|
+
read from config here.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def __init__(self, endpoint: str, *, min_pace: float, max_pace: float,
|
|
123
|
+
username: str | None = None, password: str | None = None,
|
|
124
|
+
name: str | None = None):
|
|
125
|
+
self.endpoint = endpoint
|
|
126
|
+
self.min_pace = min_pace
|
|
127
|
+
self.max_pace = max_pace
|
|
128
|
+
self.username = username
|
|
129
|
+
self.password = password
|
|
130
|
+
self.name = name
|
|
131
|
+
self.page = None
|
|
132
|
+
self.context = None
|
|
133
|
+
self._playwright = None
|
|
134
|
+
self._browser = None
|
|
135
|
+
self._self_profile = None
|
|
136
|
+
|
|
137
|
+
def ensure_browser(self) -> None:
|
|
138
|
+
if self.page is not None and not self.page.is_closed():
|
|
139
|
+
return
|
|
140
|
+
self._playwright = sync_playwright().start()
|
|
141
|
+
self._browser = self._playwright.chromium.connect(self.endpoint)
|
|
142
|
+
self.context = self._browser.contexts[0] if self._browser.contexts else self._browser.new_context()
|
|
143
|
+
self.page = self.context.pages[0] if self.context.pages else self.context.new_page()
|
|
144
|
+
logger.debug("Connected to bound browser at %s", self.endpoint)
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def self_profile(self) -> dict:
|
|
148
|
+
if self._self_profile is None:
|
|
149
|
+
from linkedin_cli.setup.self_profile import discover_self_profile
|
|
150
|
+
self._self_profile = discover_self_profile(self)
|
|
151
|
+
return self._self_profile
|
|
152
|
+
|
|
153
|
+
def wait(self, min_delay: float | None = None, max_delay: float | None = None) -> None:
|
|
154
|
+
time.sleep(random.uniform(min_delay or self.min_pace, max_delay or self.max_pace))
|
|
155
|
+
if self.page:
|
|
156
|
+
self.page.wait_for_load_state("domcontentloaded")
|
|
157
|
+
|
|
158
|
+
def close(self) -> None:
|
|
159
|
+
# Disconnect this client only — the launcher owns the browser/profile.
|
|
160
|
+
try:
|
|
161
|
+
if self._browser:
|
|
162
|
+
self._browser.close()
|
|
163
|
+
if self._playwright:
|
|
164
|
+
self._playwright.stop()
|
|
165
|
+
finally:
|
|
166
|
+
self.page = self.context = self._browser = self._playwright = None
|
|
167
|
+
|
|
168
|
+
def __repr__(self) -> str:
|
|
169
|
+
return f"linkedin-cli-session:{self.name or self.endpoint}"
|
|
File without changes
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Discover the logged-in member's own LinkedIn profile (the messaging mailbox)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from linkedin_cli.api.client import PlaywrightLinkedinAPI
|
|
7
|
+
from linkedin_cli.exceptions import AuthenticationError
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def discover_self_profile(session) -> dict:
|
|
13
|
+
"""Scrape the logged-in member's own profile via Voyager (``me``).
|
|
14
|
+
|
|
15
|
+
Pure platform read — no persistence. Returns the parsed profile dict,
|
|
16
|
+
which carries at least ``public_identifier``, ``urn``, and ``full_name``.
|
|
17
|
+
Raises ``AuthenticationError`` if the API call fails (expired/blocked session).
|
|
18
|
+
"""
|
|
19
|
+
session.ensure_browser()
|
|
20
|
+
api = PlaywrightLinkedinAPI(session=session)
|
|
21
|
+
profile, _raw = api.get_profile(public_identifier="me")
|
|
22
|
+
if not profile:
|
|
23
|
+
raise AuthenticationError("Could not fetch own profile via Voyager API")
|
|
24
|
+
logger.info("Self-profile discovered: %s", profile.get("public_identifier"))
|
|
25
|
+
return profile
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from urllib.parse import quote, urlparse, unquote
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def url_to_public_id(url: str) -> Optional[str]:
|
|
6
|
+
"""
|
|
7
|
+
Strict LinkedIn public ID extractor:
|
|
8
|
+
- Path MUST start with /in/
|
|
9
|
+
- Returns the second segment, percent-decoded
|
|
10
|
+
- Returns None for empty or non-profile URLs
|
|
11
|
+
"""
|
|
12
|
+
if not url:
|
|
13
|
+
return None
|
|
14
|
+
|
|
15
|
+
path = urlparse(url.strip()).path
|
|
16
|
+
parts = path.strip("/").split("/")
|
|
17
|
+
|
|
18
|
+
if len(parts) < 2 or parts[0] != "in":
|
|
19
|
+
return None
|
|
20
|
+
|
|
21
|
+
public_id = parts[1]
|
|
22
|
+
return unquote(public_id)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def public_id_to_url(public_id: str) -> str:
|
|
26
|
+
"""Convert public_identifier back to a clean LinkedIn profile URL."""
|
|
27
|
+
if not public_id:
|
|
28
|
+
return ""
|
|
29
|
+
public_id = public_id.strip("/")
|
|
30
|
+
return f"https://www.linkedin.com/in/{quote(public_id, safe='')}/"
|