linkedin-agent-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
linkedin_cli/enums.py ADDED
@@ -0,0 +1,11 @@
1
+ from enum import StrEnum
2
+
3
+
4
+ class ProfileState(StrEnum):
5
+ QUALIFIED = "Qualified"
6
+ READY_TO_CONNECT = "Ready to Connect"
7
+ PENDING = "Pending"
8
+ CONNECTED = "Connected"
9
+ COMPLETED = "Completed"
10
+ FAILED = "Failed"
11
+
@@ -0,0 +1,47 @@
1
+ class AuthenticationError(Exception):
2
+ """Custom exception for 401 Unauthorized errors."""
3
+ pass
4
+
5
+
6
+ class TerminalStateError(Exception):
7
+ """Profile is already done or dead — caller must skip it"""
8
+ pass
9
+
10
+
11
+ class SkipProfile(Exception):
12
+ """Profile must be skipped."""
13
+ pass
14
+
15
+
16
+ class ProfileInaccessibleError(Exception):
17
+ """Profile is private, deleted, or restricted (HTTP 403/404)."""
18
+ pass
19
+
20
+
21
+ class ReachedConnectionLimit(Exception):
22
+ """ Weekly connection limit reached. """
23
+ pass
24
+
25
+
26
+ class IllegalPageTransition(Exception):
27
+ """An action ran from, or produced, a page state its @transition contract forbids.
28
+
29
+ Raised by the ``transition`` decorator (see ``page_state``) when the live
30
+ page violates the action's declared precondition (``when``) or postcondition
31
+ (``then``) — e.g. submitting credentials and landing back on the login page
32
+ (rejected creds) instead of the feed or a checkpoint.
33
+ """
34
+ pass
35
+
36
+
37
+ class CheckpointChallengeError(Exception):
38
+ """LinkedIn flagged the account with a security checkpoint.
39
+
40
+ Carries the challenge URL so the user knows where to go to clear it.
41
+ Raised from the login flow; the daemon must NOT call reauthenticate()
42
+ when it sees this — that just hardens the block.
43
+ """
44
+ def __init__(self, url: str):
45
+ self.url = url
46
+ super().__init__(f"LinkedIn checkpoint challenge: {url}")
47
+
@@ -0,0 +1,60 @@
1
+ """Launch the persistent, bound LinkedIn browser that verb processes connect to.
2
+
3
+ This is the session *owner*: it launches a persistent browser (auth/cookies live
4
+ in its on-disk profile), `browser.bind()`s it to a websocket, records the endpoint
5
+ in the session registry, and stays alive. Verb processes attach as clients via
6
+ ``PlaywrightCliSession``; `playwright-cli attach <name>` can attach too (e.g. for a
7
+ human to clear a checkpoint in the live browser).
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import os
13
+ import signal
14
+
15
+ from playwright.sync_api import sync_playwright
16
+ from playwright_stealth import Stealth
17
+
18
+ from linkedin_cli.conf import BROWSER_DEFAULT_TIMEOUT_MS, BROWSER_HEADLESS, BROWSER_SLOW_MO
19
+ from linkedin_cli.session import clear_session, write_session
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ LINKEDIN_FEED_URL = "https://www.linkedin.com/feed/"
24
+
25
+
26
+ def open_bound_session(name: str, *, profile_dir: str,
27
+ host: str = "127.0.0.1", port: int = 0) -> None:
28
+ """Launch a persistent browser, bind it, register the endpoint, and block.
29
+
30
+ Runs until interrupted (SIGINT/SIGTERM), then deregisters and closes the
31
+ browser. The websocket endpoint is also printed to stdout for convenience.
32
+ Browser-launch knobs (headed/slow-mo/timeouts) come from ``conf``; ``host``/
33
+ ``port`` default to a localhost OS-picked port (right for many sessions in
34
+ one container — no cross-container exposure needed).
35
+ """
36
+ os.makedirs(profile_dir, exist_ok=True)
37
+ with sync_playwright() as pw:
38
+ context = pw.chromium.launch_persistent_context(
39
+ profile_dir, headless=BROWSER_HEADLESS, slow_mo=BROWSER_SLOW_MO,
40
+ )
41
+ context.set_default_timeout(BROWSER_DEFAULT_TIMEOUT_MS)
42
+ context.set_default_navigation_timeout(BROWSER_DEFAULT_TIMEOUT_MS)
43
+ Stealth().apply_stealth_sync(context)
44
+
45
+ endpoint = context.browser.bind(name, host=host, port=port)["endpoint"]
46
+ page = context.pages[0] if context.pages else context.new_page()
47
+ page.goto(LINKEDIN_FEED_URL)
48
+
49
+ write_session(name, endpoint, os.getpid())
50
+ logger.info("Session %r bound at %s (profile=%s)", name, endpoint, profile_dir)
51
+ print(endpoint, flush=True)
52
+
53
+ try:
54
+ signal.pause() # block until a termination signal
55
+ except (KeyboardInterrupt, SystemExit):
56
+ pass
57
+ finally:
58
+ clear_session(name)
59
+ context.close()
60
+ logger.info("Session %r closed", name)
@@ -0,0 +1,148 @@
1
+ """Classify the live LinkedIn page into a :class:`PageState`.
2
+
3
+ The browser is the source of truth: LinkedIn can bounce us to a login, an
4
+ authwall, or a checkpoint at any moment, so control loops re-read the page
5
+ rather than trust a remembered state. This module is that single, pure
6
+ classifier. It reads only the URL *path* — never the query string, whose
7
+ ``?session_redirect=…%2Ffeed%2F`` once fooled a whole-URL substring check into
8
+ thinking an unauthenticated login page was the feed.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import functools
13
+ from enum import Enum
14
+ from urllib.parse import urlsplit
15
+
16
+ from playwright.sync_api import Page
17
+
18
+ from linkedin_cli.exceptions import IllegalPageTransition
19
+
20
+
21
+ class PageState(str, Enum):
22
+ """Where the browser currently is. Values match the auth machine's state ids."""
23
+
24
+ CHECKPOINT = "checkpoint"
25
+ LOGIN = "login"
26
+ AUTHWALL = "authwall"
27
+ FEED = "feed"
28
+ PROFILE = "profile"
29
+ MESSAGING = "messaging"
30
+ NOT_FOUND = "not_found"
31
+ UNKNOWN = "unknown"
32
+
33
+
34
+ # Path prefix → state, in match order. Checkpoint is first: it can surface under
35
+ # any flow and must win over whatever path it decorates.
36
+ _ROUTES: list[tuple[str, PageState]] = [
37
+ ("/checkpoint", PageState.CHECKPOINT),
38
+ ("/login", PageState.LOGIN),
39
+ ("/authwall", PageState.AUTHWALL),
40
+ ("/feed", PageState.FEED),
41
+ ("/in/", PageState.PROFILE),
42
+ ("/messaging", PageState.MESSAGING),
43
+ ("/404", PageState.NOT_FOUND),
44
+ ]
45
+
46
+
47
+ def classify_page(page: Page) -> PageState:
48
+ """Return the :class:`PageState` of the live page, judged by URL path only."""
49
+ path = urlsplit(page.url).path
50
+ for prefix, state in _ROUTES:
51
+ if path.startswith(prefix):
52
+ return state
53
+ return PageState.UNKNOWN
54
+
55
+
56
+ def transition(*, when: PageState, then: PageState | set[PageState]):
57
+ """Declare a page-state transition as a contract on the action that performs it.
58
+
59
+ The decorated action takes a session (anything exposing a live ``page``) and
60
+ drives the browser. The wrapper enforces, against the *live* page:
61
+
62
+ - **precondition** — the page must be in ``when`` before the action runs;
63
+ - **postcondition** — the action must leave the page in one of ``then``.
64
+
65
+ Either violation raises :class:`IllegalPageTransition`. Enforcing the
66
+ postcondition *after* the action (re-reading the page) is what a held-state
67
+ FSM cannot do: the destination is observed, not declared up front, and may be
68
+ one of several (login → feed *or* checkpoint). Returns the resulting state.
69
+
70
+ The action's contract is introspectable as ``fn.when`` / ``fn.then`` so a
71
+ driver can build its dispatch table from the decorated actions themselves.
72
+ """
73
+ targets = frozenset({then} if isinstance(then, PageState) else then)
74
+
75
+ def decorator(fn):
76
+ @functools.wraps(fn)
77
+ def wrapper(session, *args, **kwargs) -> PageState:
78
+ before = classify_page(session.page)
79
+ if before is not when:
80
+ raise IllegalPageTransition(
81
+ f"{fn.__name__}() requires page state {when.value!r}, "
82
+ f"but page is {before.value!r} ({session.page.url})"
83
+ )
84
+ fn(session, *args, **kwargs)
85
+ after = classify_page(session.page)
86
+ if after not in targets:
87
+ expected = sorted(t.value for t in targets)
88
+ raise IllegalPageTransition(
89
+ f"{fn.__name__}() from {when.value!r} produced {after.value!r}; "
90
+ f"expected one of {expected} ({session.page.url})"
91
+ )
92
+ return after
93
+
94
+ wrapper.when = when
95
+ wrapper.then = targets
96
+ return wrapper
97
+
98
+ return decorator
99
+
100
+
101
+ class PageFlow:
102
+ """A page-state flow: a set of ``@transition`` actions plus one generic driver.
103
+
104
+ Declare a flow with a goal state, then attach its transitions as decorated
105
+ actions — each registers under its precondition (``when``). :meth:`run` is the
106
+ observe→act loop, written once for every flow: re-read the live page, dispatch
107
+ to the action for that state, repeat until the goal. There is no per-flow loop
108
+ and no hand-built dispatch table — a flow *is* its annotated transitions.
109
+ """
110
+
111
+ def __init__(self, name: str, *, goal: PageState):
112
+ self.name = name
113
+ self.goal = goal
114
+ self._actions: dict[PageState, object] = {}
115
+
116
+ def transition(self, *, when: PageState, then: PageState | set[PageState]):
117
+ """Decorator: enforce the action's contract (via :func:`transition`) and
118
+ register it under ``when`` so :meth:`run` can dispatch to it."""
119
+ contract = transition(when=when, then=then) # the module-level contract decorator
120
+
121
+ def register(fn):
122
+ if when in self._actions:
123
+ raise ValueError(
124
+ f"{self.name!r} flow already has a transition from {when.value!r}"
125
+ )
126
+ self._actions[when] = contract(fn)
127
+ return self._actions[when]
128
+
129
+ return register
130
+
131
+ def run(self, session, *, max_hops: int = 8) -> PageState:
132
+ """Drive *session* to :attr:`goal`. Raise :class:`IllegalPageTransition`
133
+ if a page has no registered action or the goal isn't reached in time."""
134
+ for _ in range(max_hops):
135
+ state = classify_page(session.page)
136
+ if state is self.goal:
137
+ return state
138
+ action = self._actions.get(state)
139
+ if action is None:
140
+ raise IllegalPageTransition(
141
+ f"{self.name!r} flow: no transition from {state.value!r} "
142
+ f"({session.page.url})"
143
+ )
144
+ action(session)
145
+ raise IllegalPageTransition(
146
+ f"{self.name!r} flow: did not reach {self.goal.value!r} within "
147
+ f"{max_hops} hops (stuck at {classify_page(session.page).value!r})"
148
+ )
@@ -0,0 +1,169 @@
1
+ """The session contract every linkedin_cli verb runs against.
2
+
3
+ linkedin_cli owns no browser lifecycle and no persistence. Each verb is handed
4
+ a *session* — an object that exposes a live Playwright page/context plus a few
5
+ lifecycle hooks — and drives LinkedIn through it. The concrete session is the
6
+ caller's job: OpenOutreach's daemon backs it with its Django ``AccountSession``;
7
+ the standalone CLI backs it with a Playwright CLI session adapter.
8
+
9
+ ``LinkedInSession`` is the typed boundary between the two — it lists exactly what
10
+ the platform code touches, and nothing about campaigns, leads, or the DB.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import logging
16
+ import os
17
+ import random
18
+ import time
19
+ from pathlib import Path
20
+ from typing import Protocol, runtime_checkable
21
+
22
+ from playwright.sync_api import BrowserContext, Page, sync_playwright
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ @runtime_checkable
28
+ class LinkedInSession(Protocol):
29
+ """Browser session a linkedin_cli verb attaches to.
30
+
31
+ Implementations own browser launch, the persistent profile, auth/cookies,
32
+ and fingerprint — none of which live here. The verbs only ever read
33
+ ``page``/``context``, resolve their own identity via ``self_profile``, and
34
+ call the lifecycle hooks below.
35
+ """
36
+
37
+ #: Live Playwright page for the authenticated session.
38
+ page: Page
39
+ #: Browser context owning the page (cookies, response listeners, storage).
40
+ context: BrowserContext
41
+
42
+ @property
43
+ def self_profile(self) -> dict:
44
+ """The logged-in member's own profile dict (the messaging mailbox).
45
+
46
+ Resolved once and kept warm for the session; carries at least
47
+ ``urn``, ``first_name``, ``last_name``.
48
+ """
49
+ ...
50
+
51
+ def ensure_browser(self) -> None:
52
+ """Launch or recover the browser so ``page`` is usable. Idempotent."""
53
+ ...
54
+
55
+ def wait(self, min_delay: float = ..., max_delay: float = ...) -> None:
56
+ """Human-paced pause, then block until the page reaches DOM-ready."""
57
+ ...
58
+
59
+ def close(self) -> None:
60
+ """Release browser resources held by the session."""
61
+ ...
62
+
63
+
64
+ # ── Session registry ──────────────────────────────────────────────
65
+ #
66
+ # The launcher (``linkedin-cli session open``) owns the bound browser and
67
+ # records its websocket endpoint here; verb processes look it up by name. This
68
+ # is the only on-disk state linkedin_cli keeps — a pointer to a running browser,
69
+ # not auth/cookies (those live in the launcher's persistent profile).
70
+
71
+ def linkedin_cli_home() -> Path:
72
+ """Root dir for linkedin-cli's on-disk state (override via $LINKEDIN_CLI_HOME)."""
73
+ return Path(os.environ.get("LINKEDIN_CLI_HOME") or Path.home() / ".linkedin-cli")
74
+
75
+
76
+ def _sessions_dir() -> Path:
77
+ return linkedin_cli_home() / "sessions"
78
+
79
+
80
+ def _session_file(name: str) -> Path:
81
+ return _sessions_dir() / f"{name}.json"
82
+
83
+
84
+ def write_session(name: str, endpoint: str, pid: int) -> Path:
85
+ """Record a bound browser's endpoint + launcher pid under *name* (atomic)."""
86
+ path = _session_file(name)
87
+ path.parent.mkdir(parents=True, exist_ok=True)
88
+ tmp = path.with_suffix(".json.tmp")
89
+ tmp.write_text(json.dumps({"name": name, "endpoint": endpoint, "pid": pid}))
90
+ tmp.replace(path)
91
+ return path
92
+
93
+
94
+ def read_session(name: str) -> dict | None:
95
+ """Return the recorded ``{name, endpoint, pid}`` for *name*, or None."""
96
+ path = _session_file(name)
97
+ if not path.exists():
98
+ return None
99
+ return json.loads(path.read_text())
100
+
101
+
102
+ def clear_session(name: str) -> None:
103
+ """Remove the registry entry for *name* if present."""
104
+ _session_file(name).unlink(missing_ok=True)
105
+
106
+
107
+ # ── Playwright-CLI-backed session (connect to a bound browser) ─────
108
+
109
+ class PlaywrightCliSession:
110
+ """A `LinkedInSession` that drives a launcher-owned bound browser over `connect`.
111
+
112
+ The launcher (`linkedin-cli session open`) launches the persistent browser
113
+ and `browser.bind()`s it; this attaches with `chromium.connect(endpoint)`,
114
+ yielding a real `page`/`context` shared with the launcher (and with any
115
+ `playwright-cli attach`). It owns no browser lifecycle and no persistence —
116
+ `close()` only disconnects this client; the launcher's browser keeps running.
117
+
118
+ Pacing (``min_pace``/``max_pace``) is injected by the caller (the CLI), not
119
+ read from config here.
120
+ """
121
+
122
+ def __init__(self, endpoint: str, *, min_pace: float, max_pace: float,
123
+ username: str | None = None, password: str | None = None,
124
+ name: str | None = None):
125
+ self.endpoint = endpoint
126
+ self.min_pace = min_pace
127
+ self.max_pace = max_pace
128
+ self.username = username
129
+ self.password = password
130
+ self.name = name
131
+ self.page = None
132
+ self.context = None
133
+ self._playwright = None
134
+ self._browser = None
135
+ self._self_profile = None
136
+
137
+ def ensure_browser(self) -> None:
138
+ if self.page is not None and not self.page.is_closed():
139
+ return
140
+ self._playwright = sync_playwright().start()
141
+ self._browser = self._playwright.chromium.connect(self.endpoint)
142
+ self.context = self._browser.contexts[0] if self._browser.contexts else self._browser.new_context()
143
+ self.page = self.context.pages[0] if self.context.pages else self.context.new_page()
144
+ logger.debug("Connected to bound browser at %s", self.endpoint)
145
+
146
+ @property
147
+ def self_profile(self) -> dict:
148
+ if self._self_profile is None:
149
+ from linkedin_cli.setup.self_profile import discover_self_profile
150
+ self._self_profile = discover_self_profile(self)
151
+ return self._self_profile
152
+
153
+ def wait(self, min_delay: float | None = None, max_delay: float | None = None) -> None:
154
+ time.sleep(random.uniform(min_delay or self.min_pace, max_delay or self.max_pace))
155
+ if self.page:
156
+ self.page.wait_for_load_state("domcontentloaded")
157
+
158
+ def close(self) -> None:
159
+ # Disconnect this client only — the launcher owns the browser/profile.
160
+ try:
161
+ if self._browser:
162
+ self._browser.close()
163
+ if self._playwright:
164
+ self._playwright.stop()
165
+ finally:
166
+ self.page = self.context = self._browser = self._playwright = None
167
+
168
+ def __repr__(self) -> str:
169
+ return f"linkedin-cli-session:{self.name or self.endpoint}"
File without changes
@@ -0,0 +1,25 @@
1
+ """Discover the logged-in member's own LinkedIn profile (the messaging mailbox)."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+
6
+ from linkedin_cli.api.client import PlaywrightLinkedinAPI
7
+ from linkedin_cli.exceptions import AuthenticationError
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def discover_self_profile(session) -> dict:
13
+ """Scrape the logged-in member's own profile via Voyager (``me``).
14
+
15
+ Pure platform read — no persistence. Returns the parsed profile dict,
16
+ which carries at least ``public_identifier``, ``urn``, and ``full_name``.
17
+ Raises ``AuthenticationError`` if the API call fails (expired/blocked session).
18
+ """
19
+ session.ensure_browser()
20
+ api = PlaywrightLinkedinAPI(session=session)
21
+ profile, _raw = api.get_profile(public_identifier="me")
22
+ if not profile:
23
+ raise AuthenticationError("Could not fetch own profile via Voyager API")
24
+ logger.info("Self-profile discovered: %s", profile.get("public_identifier"))
25
+ return profile
@@ -0,0 +1,30 @@
1
+ from typing import Optional
2
+ from urllib.parse import quote, urlparse, unquote
3
+
4
+
5
+ def url_to_public_id(url: str) -> Optional[str]:
6
+ """
7
+ Strict LinkedIn public ID extractor:
8
+ - Path MUST start with /in/
9
+ - Returns the second segment, percent-decoded
10
+ - Returns None for empty or non-profile URLs
11
+ """
12
+ if not url:
13
+ return None
14
+
15
+ path = urlparse(url.strip()).path
16
+ parts = path.strip("/").split("/")
17
+
18
+ if len(parts) < 2 or parts[0] != "in":
19
+ return None
20
+
21
+ public_id = parts[1]
22
+ return unquote(public_id)
23
+
24
+
25
+ def public_id_to_url(public_id: str) -> str:
26
+ """Convert public_identifier back to a clean LinkedIn profile URL."""
27
+ if not public_id:
28
+ return ""
29
+ public_id = public_id.strip("/")
30
+ return f"https://www.linkedin.com/in/{quote(public_id, safe='')}/"