linkedin-agent-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkedin_agent_cli-0.1.0.dist-info/METADATA +197 -0
- linkedin_agent_cli-0.1.0.dist-info/RECORD +34 -0
- linkedin_agent_cli-0.1.0.dist-info/WHEEL +4 -0
- linkedin_agent_cli-0.1.0.dist-info/entry_points.txt +2 -0
- linkedin_agent_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- linkedin_cli/__init__.py +9 -0
- linkedin_cli/actions/__init__.py +0 -0
- linkedin_cli/actions/connect.py +118 -0
- linkedin_cli/actions/conversations.py +132 -0
- linkedin_cli/actions/message.py +153 -0
- linkedin_cli/actions/profile.py +22 -0
- linkedin_cli/actions/search.py +186 -0
- linkedin_cli/actions/status.py +112 -0
- linkedin_cli/api/__init__.py +0 -0
- linkedin_cli/api/client.py +182 -0
- linkedin_cli/api/messaging/__init__.py +11 -0
- linkedin_cli/api/messaging/conversations.py +56 -0
- linkedin_cli/api/messaging/send.py +74 -0
- linkedin_cli/api/messaging/utils.py +24 -0
- linkedin_cli/api/voyager.py +319 -0
- linkedin_cli/auth.py +98 -0
- linkedin_cli/browser/__init__.py +0 -0
- linkedin_cli/browser/login.py +140 -0
- linkedin_cli/browser/nav.py +115 -0
- linkedin_cli/cli.py +396 -0
- linkedin_cli/conf.py +33 -0
- linkedin_cli/enums.py +11 -0
- linkedin_cli/exceptions.py +47 -0
- linkedin_cli/launcher.py +60 -0
- linkedin_cli/page_state.py +148 -0
- linkedin_cli/session.py +169 -0
- linkedin_cli/setup/__init__.py +0 -0
- linkedin_cli/setup/self_profile.py +25 -0
- linkedin_cli/url_utils.py +30 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# linkedin/api/messaging/conversations.py
|
|
2
|
+
"""Retrieve conversations and messages via Voyager Messaging GraphQL API."""
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
6
|
+
|
|
7
|
+
from linkedin_cli.api.client import PlaywrightLinkedinAPI
|
|
8
|
+
from linkedin_cli.api.messaging.utils import encode_urn, check_response
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
_GRAPHQL_BASE = "https://www.linkedin.com/voyager/api/voyagerMessagingGraphQL/graphql"
|
|
13
|
+
_CONVERSATIONS_QUERY_ID = "messengerConversations.0d5e6781bbee71c3e51c8843c6519f48"
|
|
14
|
+
_MESSAGES_QUERY_ID = "messengerMessages.5846eeb71c981f11e0134cb6626cc314"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _graphql_headers(api: PlaywrightLinkedinAPI) -> dict:
|
|
18
|
+
headers = {**api.headers}
|
|
19
|
+
headers["accept"] = "application/graphql"
|
|
20
|
+
return headers
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@retry(
|
|
24
|
+
stop=stop_after_attempt(3),
|
|
25
|
+
wait=wait_exponential(multiplier=2, min=2, max=30),
|
|
26
|
+
retry=retry_if_exception_type(IOError),
|
|
27
|
+
reraise=True,
|
|
28
|
+
)
|
|
29
|
+
def fetch_conversations(api: PlaywrightLinkedinAPI, mailbox_urn: str) -> dict:
|
|
30
|
+
"""Fetch recent conversations list. Returns raw API response."""
|
|
31
|
+
url = (
|
|
32
|
+
f"{_GRAPHQL_BASE}"
|
|
33
|
+
f"?queryId={_CONVERSATIONS_QUERY_ID}"
|
|
34
|
+
f"&variables=(mailboxUrn:{encode_urn(mailbox_urn)})"
|
|
35
|
+
)
|
|
36
|
+
res = api.get(url, headers=_graphql_headers(api))
|
|
37
|
+
check_response(res, "fetch_conversations")
|
|
38
|
+
return res.json()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@retry(
|
|
42
|
+
stop=stop_after_attempt(3),
|
|
43
|
+
wait=wait_exponential(multiplier=2, min=2, max=30),
|
|
44
|
+
retry=retry_if_exception_type(IOError),
|
|
45
|
+
reraise=True,
|
|
46
|
+
)
|
|
47
|
+
def fetch_messages(api: PlaywrightLinkedinAPI, conversation_urn: str) -> dict:
|
|
48
|
+
"""Fetch messages for a conversation. Returns raw API response."""
|
|
49
|
+
url = (
|
|
50
|
+
f"{_GRAPHQL_BASE}"
|
|
51
|
+
f"?queryId={_MESSAGES_QUERY_ID}"
|
|
52
|
+
f"&variables=(conversationUrn:{encode_urn(conversation_urn)})"
|
|
53
|
+
)
|
|
54
|
+
res = api.get(url, headers=_graphql_headers(api))
|
|
55
|
+
check_response(res, "fetch_messages")
|
|
56
|
+
return res.json()
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# linkedin/api/messaging/send.py
|
|
2
|
+
"""Send messages via Voyager Messaging API."""
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import uuid
|
|
7
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
8
|
+
|
|
9
|
+
from linkedin_cli.api.client import PlaywrightLinkedinAPI
|
|
10
|
+
from linkedin_cli.api.messaging.utils import check_response
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@retry(
|
|
16
|
+
stop=stop_after_attempt(3),
|
|
17
|
+
wait=wait_exponential(multiplier=2, min=2, max=30),
|
|
18
|
+
retry=retry_if_exception_type(IOError),
|
|
19
|
+
reraise=True,
|
|
20
|
+
)
|
|
21
|
+
def send_message(
|
|
22
|
+
api: PlaywrightLinkedinAPI,
|
|
23
|
+
conversation_urn: str,
|
|
24
|
+
message_text: str,
|
|
25
|
+
mailbox_urn: str,
|
|
26
|
+
) -> dict:
|
|
27
|
+
"""Send a message via Voyager Messaging API.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
api: Authenticated PlaywrightLinkedinAPI instance.
|
|
31
|
+
conversation_urn: e.g. "urn:li:msg_conversation:(urn:li:fsd_profile:XXX,2-threadId)"
|
|
32
|
+
message_text: The message body.
|
|
33
|
+
mailbox_urn: Sender's profile URN.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
API response dict with delivery confirmation.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
origin_token = str(uuid.uuid4())
|
|
40
|
+
tracking_id = os.urandom(16).hex()
|
|
41
|
+
|
|
42
|
+
payload = {
|
|
43
|
+
"message": {
|
|
44
|
+
"body": {
|
|
45
|
+
"attributes": [],
|
|
46
|
+
"text": message_text,
|
|
47
|
+
},
|
|
48
|
+
"renderContentUnions": [],
|
|
49
|
+
"conversationUrn": conversation_urn,
|
|
50
|
+
"originToken": origin_token,
|
|
51
|
+
},
|
|
52
|
+
"mailboxUrn": mailbox_urn,
|
|
53
|
+
"trackingId": tracking_id,
|
|
54
|
+
"dedupeByClientGeneratedToken": False,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
url = (
|
|
58
|
+
"https://www.linkedin.com/voyager/api"
|
|
59
|
+
"/voyagerMessagingDashMessengerMessages?action=createMessage"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
headers = {**api.headers}
|
|
63
|
+
headers["accept"] = "application/json"
|
|
64
|
+
headers["content-type"] = "text/plain;charset=UTF-8"
|
|
65
|
+
|
|
66
|
+
logger.debug("Voyager send_message → %s", conversation_urn)
|
|
67
|
+
|
|
68
|
+
res = api.post(url, headers=headers, data=json.dumps(payload))
|
|
69
|
+
check_response(res, "send_message")
|
|
70
|
+
|
|
71
|
+
data = res.json()
|
|
72
|
+
delivered_at = data.get("value", {}).get("deliveredAt")
|
|
73
|
+
logger.info("Message delivered → %s (at %s)", conversation_urn, delivered_at)
|
|
74
|
+
return data
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# linkedin/api/messaging/utils.py
|
|
2
|
+
"""Shared helpers for messaging API modules."""
|
|
3
|
+
import logging
|
|
4
|
+
from urllib.parse import quote
|
|
5
|
+
|
|
6
|
+
from linkedin_cli.exceptions import AuthenticationError
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def encode_urn(urn: str) -> str:
|
|
12
|
+
"""Percent-encode a URN for use inside Voyager GraphQL variables."""
|
|
13
|
+
return quote(urn, safe="")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def check_response(res, context: str) -> None:
|
|
17
|
+
"""Check a Voyager messaging API response, raising on errors."""
|
|
18
|
+
match res.status:
|
|
19
|
+
case 401:
|
|
20
|
+
raise AuthenticationError(f"Messaging API 401 ({context})")
|
|
21
|
+
case 403 | 404:
|
|
22
|
+
raise IOError(f"Messaging API {res.status} ({context})")
|
|
23
|
+
if not res.ok:
|
|
24
|
+
raise IOError(f"Messaging API {res.status} ({context}): {res.text()[:500]}")
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
# linkedin/api/voyager.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field, asdict
|
|
5
|
+
from typing import List, Optional, Dict, Literal, Any
|
|
6
|
+
|
|
7
|
+
ConnectionDistance = Literal["DISTANCE_1", "DISTANCE_2", "DISTANCE_3", "OUT_OF_NETWORK", None]
|
|
8
|
+
|
|
9
|
+
DISTANCE_TO_DEGREE: Dict[str, Optional[int]] = {
|
|
10
|
+
"DISTANCE_1": 1,
|
|
11
|
+
"DISTANCE_2": 2,
|
|
12
|
+
"DISTANCE_3": 3,
|
|
13
|
+
"OUT_OF_NETWORK": None,
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ======================
|
|
18
|
+
# Internal dataclasses (only used for validation & structure)
|
|
19
|
+
# ======================
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class Date:
|
|
23
|
+
year: Optional[int] = None
|
|
24
|
+
month: Optional[int] = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class DateRange:
|
|
29
|
+
start: Optional[Date] = None
|
|
30
|
+
end: Optional[Date] = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class Position:
|
|
35
|
+
title: str
|
|
36
|
+
company_name: str
|
|
37
|
+
company_urn: Optional[str] = None
|
|
38
|
+
location: Optional[str] = None
|
|
39
|
+
date_range: Optional[DateRange] = None
|
|
40
|
+
description: Optional[str] = None
|
|
41
|
+
urn: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class Education:
|
|
46
|
+
school_name: str
|
|
47
|
+
degree_name: Optional[str] = None
|
|
48
|
+
field_of_study: Optional[str] = None
|
|
49
|
+
date_range: Optional[DateRange] = None
|
|
50
|
+
urn: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class LinkedInProfile:
|
|
55
|
+
url: str
|
|
56
|
+
urn: str
|
|
57
|
+
full_name: str
|
|
58
|
+
first_name: str
|
|
59
|
+
last_name: str
|
|
60
|
+
|
|
61
|
+
headline: Optional[str] = None
|
|
62
|
+
summary: Optional[str] = None
|
|
63
|
+
public_identifier: Optional[str] = None
|
|
64
|
+
location_name: Optional[str] = None
|
|
65
|
+
geo: Optional[Dict[str, Any]] = None
|
|
66
|
+
industry: Optional[Dict[str, Any]] = None
|
|
67
|
+
|
|
68
|
+
positions: List[Position] = field(default_factory=list)
|
|
69
|
+
educations: List[Education] = field(default_factory=list)
|
|
70
|
+
|
|
71
|
+
country_code: Optional[str] = None
|
|
72
|
+
supported_locales: List[str] = field(default_factory=list)
|
|
73
|
+
|
|
74
|
+
connection_distance: Optional[ConnectionDistance] = None
|
|
75
|
+
connection_degree: Optional[int] = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
# ======================
|
|
79
|
+
# Private helpers
|
|
80
|
+
# ======================
|
|
81
|
+
|
|
82
|
+
def _resolve_references(data: dict) -> Dict[str, dict]:
|
|
83
|
+
"""Build urn → entity lookup from 'included' array."""
|
|
84
|
+
return {
|
|
85
|
+
entity.get("entityUrn"): entity
|
|
86
|
+
for entity in data.get("included", [])
|
|
87
|
+
if entity.get("entityUrn")
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _resolve_star_field(entity: dict, urn_map: Dict[str, dict], field_name: str) -> Any:
|
|
92
|
+
"""Resolve *company, *school, *elements, etc."""
|
|
93
|
+
value = entity.get(field_name)
|
|
94
|
+
if not value:
|
|
95
|
+
return None
|
|
96
|
+
if isinstance(value, list):
|
|
97
|
+
return [urn_map.get(urn) for urn in value if urn_map.get(urn)]
|
|
98
|
+
return urn_map.get(value)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _date_from_raw(raw: Optional[dict]) -> Optional[Date]:
|
|
102
|
+
if not raw:
|
|
103
|
+
return None
|
|
104
|
+
return Date(year=raw.get("year"), month=raw.get("month"))
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _date_range_from_raw(raw: Optional[dict]) -> Optional[DateRange]:
|
|
108
|
+
if not raw:
|
|
109
|
+
return None
|
|
110
|
+
return DateRange(
|
|
111
|
+
start=_date_from_raw(raw.get("start")),
|
|
112
|
+
end=_date_from_raw(raw.get("end")),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _enrich_position(pos: dict, urn_map: Dict[str, dict]) -> Position:
|
|
117
|
+
company = _resolve_star_field(pos, urn_map, "*company")
|
|
118
|
+
|
|
119
|
+
return Position(
|
|
120
|
+
title=pos.get("title") or "Unknown Title",
|
|
121
|
+
company_name=company.get("name") if company else pos.get("companyName", "Unknown Company"),
|
|
122
|
+
company_urn=company.get("entityUrn") if company else pos.get("companyUrn"),
|
|
123
|
+
location=pos.get("locationName"),
|
|
124
|
+
date_range=_date_range_from_raw(pos.get("dateRange")),
|
|
125
|
+
description=pos.get("description"),
|
|
126
|
+
urn=pos.get("entityUrn"),
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _enrich_education(edu: dict, urn_map: Dict[str, dict]) -> Education:
|
|
131
|
+
school = _resolve_star_field(edu, urn_map, "*school")
|
|
132
|
+
|
|
133
|
+
return Education(
|
|
134
|
+
school_name=school.get("name") if school else edu.get("schoolName", "Unknown School"),
|
|
135
|
+
degree_name=edu.get("degreeName"),
|
|
136
|
+
field_of_study=edu.get("fieldOfStudy"),
|
|
137
|
+
date_range=_date_range_from_raw(edu.get("dateRange")),
|
|
138
|
+
urn=edu.get("entityUrn"),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _degree_from_union(union: dict) -> tuple[Optional[str], Optional[int]]:
|
|
143
|
+
"""Extract (distance_str, degree) from a memberRelationshipUnion/Data dict."""
|
|
144
|
+
if any(k in union for k in ("connectedMember", "connected", "*connection", "connection")):
|
|
145
|
+
return "DISTANCE_1", 1
|
|
146
|
+
|
|
147
|
+
if "noConnection" in union:
|
|
148
|
+
distance_str = union["noConnection"].get("memberDistance")
|
|
149
|
+
degree = DISTANCE_TO_DEGREE.get(distance_str)
|
|
150
|
+
return distance_str, degree
|
|
151
|
+
|
|
152
|
+
return None, None
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _extract_connection_info(profile_entity: dict, urn_map: Dict[str, dict]) -> tuple[Optional[str], Optional[int]]:
|
|
156
|
+
member_rel_urn = profile_entity.get("*memberRelationship")
|
|
157
|
+
if not member_rel_urn:
|
|
158
|
+
return None, None
|
|
159
|
+
|
|
160
|
+
rel = urn_map.get(member_rel_urn)
|
|
161
|
+
if not rel:
|
|
162
|
+
return None, None
|
|
163
|
+
|
|
164
|
+
union = rel.get("memberRelationshipUnion") or rel.get("memberRelationshipData")
|
|
165
|
+
if not union:
|
|
166
|
+
return None, None
|
|
167
|
+
|
|
168
|
+
return _degree_from_union(union)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def parse_connection_degree(json_response: dict) -> Optional[int]:
|
|
172
|
+
"""Extract connection degree by scanning included entities directly.
|
|
173
|
+
|
|
174
|
+
Works with any Voyager decoration that includes MemberRelationship
|
|
175
|
+
entities (e.g. TopCardSupplementary-120). Does not depend on the
|
|
176
|
+
profile entity linking via *memberRelationship.
|
|
177
|
+
"""
|
|
178
|
+
for entity in json_response.get("included", []):
|
|
179
|
+
if entity.get("$type") != "com.linkedin.voyager.dash.relationships.MemberRelationship":
|
|
180
|
+
continue
|
|
181
|
+
union = entity.get("memberRelationshipUnion") or entity.get("memberRelationshipData")
|
|
182
|
+
if not union:
|
|
183
|
+
continue
|
|
184
|
+
_, degree = _degree_from_union(union)
|
|
185
|
+
if degree is not None:
|
|
186
|
+
return degree
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# ======================
|
|
191
|
+
# Public function – returns plain dict
|
|
192
|
+
# ======================
|
|
193
|
+
|
|
194
|
+
def parse_linkedin_voyager_response(
|
|
195
|
+
json_response: dict,
|
|
196
|
+
public_identifier: Optional[str] = None,
|
|
197
|
+
) -> dict:
|
|
198
|
+
"""
|
|
199
|
+
Parse a full LinkedIn Voyager profile response and return a clean dictionary.
|
|
200
|
+
|
|
201
|
+
Uses dataclasses internally for validation and structure,
|
|
202
|
+
but returns a plain, JSON-serializable dict (no dataclass leakage).
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
json_response: Raw JSON from Voyager API (with "data" and "included")
|
|
206
|
+
public_identifier: Optional filter – only parse profile with this public ID
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
dict with clean, structured LinkedIn profile data
|
|
210
|
+
"""
|
|
211
|
+
urn_map = _resolve_references(json_response)
|
|
212
|
+
|
|
213
|
+
# Find the main Profile entity
|
|
214
|
+
profile_entity = None
|
|
215
|
+
for entity in json_response.get("included", []):
|
|
216
|
+
if entity.get("$type") == "com.linkedin.voyager.dash.identity.profile.Profile":
|
|
217
|
+
entity_id = entity.get("publicIdentifier")
|
|
218
|
+
if public_identifier is not None and entity_id == public_identifier:
|
|
219
|
+
profile_entity = entity
|
|
220
|
+
break
|
|
221
|
+
if public_identifier is None:
|
|
222
|
+
recipes = entity.get("$recipeTypes", [])
|
|
223
|
+
is_full = any("FullProfile" in r for r in recipes)
|
|
224
|
+
if is_full:
|
|
225
|
+
profile_entity = entity
|
|
226
|
+
break
|
|
227
|
+
if profile_entity is None:
|
|
228
|
+
profile_entity = entity
|
|
229
|
+
|
|
230
|
+
# Fallback if not found via $type
|
|
231
|
+
if not profile_entity:
|
|
232
|
+
main_urn = json_response.get("data", {}).get("*elements", [None])[0]
|
|
233
|
+
profile_entity = urn_map.get(main_urn)
|
|
234
|
+
|
|
235
|
+
if not profile_entity:
|
|
236
|
+
raise ValueError("Could not find profile entity in the Voyager response")
|
|
237
|
+
|
|
238
|
+
first_name = profile_entity.get("firstName", "")
|
|
239
|
+
last_name = profile_entity.get("lastName", "")
|
|
240
|
+
|
|
241
|
+
# Extract connection info
|
|
242
|
+
connection_distance, connection_degree = _extract_connection_info(profile_entity, urn_map)
|
|
243
|
+
|
|
244
|
+
# Build positions
|
|
245
|
+
positions: List[Position] = []
|
|
246
|
+
pos_groups_urn = profile_entity.get("*profilePositionGroups")
|
|
247
|
+
if pos_groups_urn:
|
|
248
|
+
pos_groups_resp = urn_map.get(pos_groups_urn)
|
|
249
|
+
if pos_groups_resp and pos_groups_resp.get("*elements"):
|
|
250
|
+
for group_urn in pos_groups_resp["*elements"]:
|
|
251
|
+
group = urn_map.get(group_urn)
|
|
252
|
+
if not group:
|
|
253
|
+
continue
|
|
254
|
+
positions_coll_urn = group.get("*profilePositionInPositionGroup")
|
|
255
|
+
if positions_coll_urn:
|
|
256
|
+
positions_coll = urn_map.get(positions_coll_urn)
|
|
257
|
+
if positions_coll and positions_coll.get("*elements"):
|
|
258
|
+
for pos_urn in positions_coll["*elements"]:
|
|
259
|
+
pos = urn_map.get(pos_urn)
|
|
260
|
+
if pos:
|
|
261
|
+
positions.append(_enrich_position(pos, urn_map))
|
|
262
|
+
|
|
263
|
+
# Build educations
|
|
264
|
+
educations: List[Education] = []
|
|
265
|
+
educations_urn = profile_entity.get("*profileEducations")
|
|
266
|
+
if educations_urn:
|
|
267
|
+
edu_coll = urn_map.get(educations_urn)
|
|
268
|
+
if edu_coll and edu_coll.get("*elements"):
|
|
269
|
+
for edu_urn in edu_coll["*elements"]:
|
|
270
|
+
edu = urn_map.get(edu_urn)
|
|
271
|
+
if edu:
|
|
272
|
+
educations.append(_enrich_education(edu, urn_map))
|
|
273
|
+
|
|
274
|
+
# Resolve geo — try direct *geo first, then nested geoLocation.*geo
|
|
275
|
+
geo_entity = _resolve_star_field(profile_entity, urn_map, "*geo")
|
|
276
|
+
if not geo_entity:
|
|
277
|
+
geo_location = profile_entity.get("geoLocation")
|
|
278
|
+
if geo_location:
|
|
279
|
+
geo_urn = geo_location.get("*geo") or geo_location.get("geoUrn")
|
|
280
|
+
if geo_urn:
|
|
281
|
+
geo_entity = urn_map.get(geo_urn)
|
|
282
|
+
|
|
283
|
+
location_name = profile_entity.get("locationName")
|
|
284
|
+
if not location_name and geo_entity:
|
|
285
|
+
location_name = geo_entity.get("defaultLocalizedName")
|
|
286
|
+
|
|
287
|
+
# Extract country code from profile location
|
|
288
|
+
country_code = profile_entity.get("location", {}).get("countryCode")
|
|
289
|
+
|
|
290
|
+
# Extract supported languages from profile locales
|
|
291
|
+
supported_raw = profile_entity.get("supportedLocales") or []
|
|
292
|
+
supported_locales = [loc.get("language") for loc in supported_raw if loc.get("language")]
|
|
293
|
+
|
|
294
|
+
# Assemble data for dataclass validation
|
|
295
|
+
profile_data = {
|
|
296
|
+
"urn": profile_entity["entityUrn"],
|
|
297
|
+
"first_name": first_name,
|
|
298
|
+
"last_name": last_name,
|
|
299
|
+
"full_name": f"{first_name} {last_name}".strip() or None,
|
|
300
|
+
"headline": profile_entity.get("headline"),
|
|
301
|
+
"summary": profile_entity.get("summary"),
|
|
302
|
+
"public_identifier": profile_entity.get("publicIdentifier"),
|
|
303
|
+
"location_name": location_name,
|
|
304
|
+
"geo": geo_entity,
|
|
305
|
+
"industry": _resolve_star_field(profile_entity, urn_map, "*industry"),
|
|
306
|
+
"country_code": country_code,
|
|
307
|
+
"supported_locales": supported_locales,
|
|
308
|
+
"url": f"https://www.linkedin.com/in/{profile_entity.get('publicIdentifier', '')}/",
|
|
309
|
+
"positions": positions,
|
|
310
|
+
"educations": educations,
|
|
311
|
+
"connection_distance": connection_distance,
|
|
312
|
+
"connection_degree": connection_degree,
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
# Validate with dataclass (will raise if something is wrong)
|
|
316
|
+
profile_obj = LinkedInProfile(**profile_data)
|
|
317
|
+
|
|
318
|
+
# Return clean dictionary – perfect for JSON, APIs, logging, etc.
|
|
319
|
+
return asdict(profile_obj)
|
linkedin_cli/auth.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Drive a LinkedIn browser session to the authenticated feed.
|
|
2
|
+
|
|
3
|
+
The auth flow is declared, not coded: each step is a ``@auth_flow.transition``
|
|
4
|
+
action annotated with the page state it runs *from* and the states it may legally
|
|
5
|
+
*produce*. The generic :meth:`PageFlow.run` loop (in ``page_state``) does the
|
|
6
|
+
driving — observe the live page, dispatch to the action for that state, repeat
|
|
7
|
+
until the feed. There is no hand-written loop or dispatch table here.
|
|
8
|
+
|
|
9
|
+
Both the standalone CLI (``linkedin-cli login``) and the daemon
|
|
10
|
+
(``linkedin/browser/launch.py``) call :func:`authenticate`, so the two share one
|
|
11
|
+
enforced flow instead of hand-rolling their own login sequences.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
|
|
17
|
+
from termcolor import colored
|
|
18
|
+
|
|
19
|
+
from linkedin_cli.browser.login import (
|
|
20
|
+
LINKEDIN_LOGIN_URL,
|
|
21
|
+
await_checkpoint_clear,
|
|
22
|
+
submit_login_form,
|
|
23
|
+
)
|
|
24
|
+
from linkedin_cli.exceptions import (
|
|
25
|
+
AuthenticationError,
|
|
26
|
+
CheckpointChallengeError,
|
|
27
|
+
IllegalPageTransition,
|
|
28
|
+
)
|
|
29
|
+
from linkedin_cli.page_state import PageFlow, PageState
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
LINKEDIN_FEED_URL = "https://www.linkedin.com/feed/"
|
|
34
|
+
|
|
35
|
+
auth_flow = PageFlow("auth", goal=PageState.FEED)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@auth_flow.transition(
|
|
39
|
+
when=PageState.UNKNOWN,
|
|
40
|
+
then={PageState.LOGIN, PageState.FEED, PageState.AUTHWALL, PageState.CHECKPOINT},
|
|
41
|
+
)
|
|
42
|
+
def _from_unknown(session) -> None:
|
|
43
|
+
"""Blank/unknown page → head to the feed and let LinkedIn route us."""
|
|
44
|
+
session.page.goto(LINKEDIN_FEED_URL)
|
|
45
|
+
session.page.wait_for_load_state("domcontentloaded")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@auth_flow.transition(when=PageState.AUTHWALL, then={PageState.LOGIN})
|
|
49
|
+
def _from_authwall(session) -> None:
|
|
50
|
+
"""Guest authwall → go to the login form."""
|
|
51
|
+
session.page.goto(LINKEDIN_LOGIN_URL)
|
|
52
|
+
session.page.wait_for_load_state("domcontentloaded")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@auth_flow.transition(when=PageState.LOGIN, then={PageState.FEED, PageState.CHECKPOINT})
|
|
56
|
+
def _from_login(session) -> None:
|
|
57
|
+
"""Login form → submit credentials.
|
|
58
|
+
|
|
59
|
+
Landing back on the login page (rejected credentials) is outside the declared
|
|
60
|
+
``then`` and so raises — which also enforces the never-resubmit rule: every
|
|
61
|
+
credential resubmit hardens LinkedIn's block, so we try exactly once.
|
|
62
|
+
"""
|
|
63
|
+
if not getattr(session, "username", None):
|
|
64
|
+
raise AuthenticationError(
|
|
65
|
+
"Not logged in and no LINKEDIN_USERNAME/LINKEDIN_PASSWORD provided"
|
|
66
|
+
)
|
|
67
|
+
submit_login_form(session, session.username, session.password)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@auth_flow.transition(when=PageState.CHECKPOINT, then={PageState.FEED})
|
|
71
|
+
def _from_checkpoint(session) -> None:
|
|
72
|
+
"""Checkpoint challenge → wait for a human to clear it in the live browser."""
|
|
73
|
+
if not await_checkpoint_clear(session.page):
|
|
74
|
+
raise CheckpointChallengeError(session.page.url)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def authenticate(session, *, username=None, password=None) -> None:
|
|
78
|
+
"""Drive *session* to the authenticated feed, or raise.
|
|
79
|
+
|
|
80
|
+
Credentials, when given, are stamped onto the session (the daemon passes them
|
|
81
|
+
explicitly; the standalone CLI lets the session carry them from the
|
|
82
|
+
environment), then the ``auth_flow`` drives to the feed.
|
|
83
|
+
|
|
84
|
+
Raises :class:`AuthenticationError` if the feed can't be reached (no action
|
|
85
|
+
for the current page, rejected credentials, or too many hops) and
|
|
86
|
+
:class:`CheckpointChallengeError` if a challenge can't be cleared in time.
|
|
87
|
+
"""
|
|
88
|
+
if username is not None:
|
|
89
|
+
session.username = username
|
|
90
|
+
if password is not None:
|
|
91
|
+
session.password = password
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
auth_flow.run(session)
|
|
95
|
+
except IllegalPageTransition as exc:
|
|
96
|
+
raise AuthenticationError(str(exc)) from exc
|
|
97
|
+
|
|
98
|
+
logger.info(colored("Authenticated — on the feed", "green", attrs=["bold"]))
|
|
File without changes
|