closed-linkedin-api 2.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,233 @@
1
+ """
2
+ Cookie-based authentication for LinkedIn API.
3
+
4
+ This module provides utilities to authenticate with LinkedIn using
5
+ browser cookies instead of username/password credentials.
6
+ """
7
+
8
+ from typing import Dict, Optional, Union
9
+ from requests.cookies import RequestsCookieJar
10
+ from http.cookiejar import Cookie
11
+ import time
12
+
13
+
14
+ class CookieAuthenticationError(Exception):
15
+ """Raised when cookie-based authentication fails."""
16
+ pass
17
+
18
+
19
+ class CookieAuthenticator:
20
+ """
21
+ Helper class to create and validate LinkedIn session cookies.
22
+
23
+ LinkedIn authentication requires at minimum two cookies:
24
+ - li_at: The main authentication token
25
+ - JSESSIONID: Session ID (used to derive csrf-token header)
26
+
27
+ Example usage:
28
+ # From a dict
29
+ cookies = CookieAuthenticator.from_dict({
30
+ 'li_at': 'your_li_at_value',
31
+ 'JSESSIONID': '"ajax:1234567890"'
32
+ })
33
+
34
+ # From separate values
35
+ cookies = CookieAuthenticator.from_li_at_and_jsessionid(
36
+ li_at='your_li_at_value',
37
+ jsessionid='"ajax:1234567890"'
38
+ )
39
+ """
40
+
41
+ REQUIRED_COOKIES = ['li_at', 'JSESSIONID']
42
+ OPTIONAL_COOKIES = ['li_rm', 'bcookie', 'bscookie', 'lidc', 'li_gc', 'liap']
43
+ LINKEDIN_DOMAIN = '.linkedin.com'
44
+
45
+ @staticmethod
46
+ def _create_cookie(
47
+ name: str,
48
+ value: str,
49
+ domain: str = '.linkedin.com',
50
+ path: str = '/',
51
+ expires: Optional[int] = None,
52
+ secure: bool = True,
53
+ ) -> Cookie:
54
+ """
55
+ Create a Cookie object with the given parameters.
56
+
57
+ :param name: Cookie name
58
+ :param value: Cookie value
59
+ :param domain: Cookie domain (default: .linkedin.com)
60
+ :param path: Cookie path (default: /)
61
+ :param expires: Expiration timestamp (default: None = session cookie)
62
+ :param secure: Whether cookie is secure (default: True)
63
+ :return: Cookie object
64
+ """
65
+ # Default expiry to 1 year from now if not specified
66
+ if expires is None:
67
+ expires = int(time.time()) + (365 * 24 * 60 * 60)
68
+
69
+ return Cookie(
70
+ version=0,
71
+ name=name,
72
+ value=value,
73
+ port=None,
74
+ port_specified=False,
75
+ domain=domain,
76
+ domain_specified=True,
77
+ domain_initial_dot=domain.startswith('.'),
78
+ path=path,
79
+ path_specified=True,
80
+ secure=secure,
81
+ expires=expires,
82
+ discard=False,
83
+ comment=None,
84
+ comment_url=None,
85
+ rest={'HttpOnly': ''},
86
+ rfc2109=False,
87
+ )
88
+
89
+ @classmethod
90
+ def from_dict(cls, cookies: Dict[str, str]) -> RequestsCookieJar:
91
+ """
92
+ Convert a dict of cookie name->value pairs to a RequestsCookieJar.
93
+
94
+ :param cookies: Dictionary with cookie names as keys and values as values.
95
+ Must contain at least 'li_at' and 'JSESSIONID'.
96
+ :return: RequestsCookieJar configured for LinkedIn
97
+ :raises CookieAuthenticationError: If required cookies are missing
98
+ """
99
+ # Validate required cookies
100
+ missing = [c for c in cls.REQUIRED_COOKIES if c not in cookies or not cookies[c]]
101
+ if missing:
102
+ raise CookieAuthenticationError(
103
+ f"Missing required cookies: {', '.join(missing)}. "
104
+ f"Required cookies are: {', '.join(cls.REQUIRED_COOKIES)}"
105
+ )
106
+
107
+ jar = RequestsCookieJar()
108
+
109
+ for name, value in cookies.items():
110
+ # Clean up JSESSIONID if it has extra quotes
111
+ if name == 'JSESSIONID':
112
+ value = value.strip()
113
+ # Ensure JSESSIONID has surrounding quotes if not present
114
+ if not (value.startswith('"') and value.endswith('"')):
115
+ if not value.startswith('"'):
116
+ value = f'"{value}'
117
+ if not value.endswith('"'):
118
+ value = f'{value}"'
119
+
120
+ cookie = cls._create_cookie(
121
+ name=name,
122
+ value=value,
123
+ domain=cls.LINKEDIN_DOMAIN,
124
+ )
125
+ jar.set_cookie(cookie)
126
+
127
+ return jar
128
+
129
+ @classmethod
130
+ def from_li_at_and_jsessionid(
131
+ cls,
132
+ li_at: str,
133
+ jsessionid: str,
134
+ additional_cookies: Optional[Dict[str, str]] = None,
135
+ ) -> RequestsCookieJar:
136
+ """
137
+ Create a RequestsCookieJar from the two required cookie values.
138
+
139
+ :param li_at: The li_at authentication token value
140
+ :param jsessionid: The JSESSIONID value (with or without surrounding quotes)
141
+ :param additional_cookies: Optional dict of additional cookies to include
142
+ :return: RequestsCookieJar configured for LinkedIn
143
+ :raises CookieAuthenticationError: If li_at or jsessionid is empty
144
+ """
145
+ if not li_at:
146
+ raise CookieAuthenticationError("li_at cookie value cannot be empty")
147
+ if not jsessionid:
148
+ raise CookieAuthenticationError("JSESSIONID cookie value cannot be empty")
149
+
150
+ cookies = {
151
+ 'li_at': li_at,
152
+ 'JSESSIONID': jsessionid,
153
+ }
154
+
155
+ if additional_cookies:
156
+ cookies.update(additional_cookies)
157
+
158
+ return cls.from_dict(cookies)
159
+
160
+ @classmethod
161
+ def validate_cookies(cls, cookies: Union[Dict[str, str], RequestsCookieJar]) -> bool:
162
+ """
163
+ Check if the provided cookies contain all required values.
164
+
165
+ :param cookies: Either a dict or RequestsCookieJar to validate
166
+ :return: True if all required cookies are present with non-empty values
167
+ """
168
+ if isinstance(cookies, RequestsCookieJar):
169
+ cookie_dict = {c.name: c.value for c in cookies}
170
+ else:
171
+ cookie_dict = cookies
172
+
173
+ for required in cls.REQUIRED_COOKIES:
174
+ if required not in cookie_dict or not cookie_dict[required]:
175
+ return False
176
+
177
+ return True
178
+
179
+ @classmethod
180
+ def get_csrf_token(cls, cookies: Union[Dict[str, str], RequestsCookieJar]) -> str:
181
+ """
182
+ Extract the CSRF token from cookies.
183
+
184
+ LinkedIn uses the JSESSIONID value (with quotes stripped) as the csrf-token.
185
+
186
+ :param cookies: Either a dict or RequestsCookieJar containing JSESSIONID
187
+ :return: The csrf-token value
188
+ :raises CookieAuthenticationError: If JSESSIONID is not found
189
+ """
190
+ if isinstance(cookies, RequestsCookieJar):
191
+ jsessionid = None
192
+ for c in cookies:
193
+ if c.name == 'JSESSIONID':
194
+ jsessionid = c.value
195
+ break
196
+ else:
197
+ jsessionid = cookies.get('JSESSIONID')
198
+
199
+ if not jsessionid:
200
+ raise CookieAuthenticationError("JSESSIONID cookie not found")
201
+
202
+ # Strip surrounding quotes
203
+ return jsessionid.strip('"')
204
+
205
+ @classmethod
206
+ def is_cookie_expired(
207
+ cls,
208
+ cookies: Union[Dict[str, str], RequestsCookieJar],
209
+ cookie_name: str = 'li_at',
210
+ ) -> bool:
211
+ """
212
+ Check if a specific cookie has expired.
213
+
214
+ Note: This only works for RequestsCookieJar objects that contain
215
+ expiration information. Dict-based cookies are assumed to be valid.
216
+
217
+ :param cookies: Cookies to check
218
+ :param cookie_name: Name of cookie to check (default: li_at)
219
+ :return: True if the cookie is expired, False otherwise
220
+ """
221
+ if isinstance(cookies, dict):
222
+ # Can't determine expiration from a plain dict
223
+ return False
224
+
225
+ now = time.time()
226
+ for cookie in cookies:
227
+ if cookie.name == cookie_name:
228
+ if cookie.expires and cookie.expires < now:
229
+ return True
230
+ return False
231
+
232
+ # Cookie not found
233
+ return True
@@ -0,0 +1,70 @@
1
+ import os
2
+ import pickle
3
+ import time
4
+ import open_linkedin_api.settings as settings
5
+ from requests.cookies import RequestsCookieJar
6
+ from typing import Optional
7
+
8
+
9
+ class Error(Exception):
10
+ """Base class for other exceptions"""
11
+
12
+ pass
13
+
14
+
15
+ class LinkedinSessionExpired(Error):
16
+ pass
17
+
18
+
19
+ class CookieRepository(object):
20
+ """
21
+ Class to act as a repository for the cookies.
22
+
23
+ TODO: refactor to use http.cookiejar.FileCookieJar
24
+ """
25
+
26
+ def __init__(self, cookies_dir=settings.COOKIE_PATH):
27
+ self.cookies_dir = cookies_dir or settings.COOKIE_PATH
28
+
29
+ def save(self, cookies, username):
30
+ self._ensure_cookies_dir()
31
+ cookiejar_filepath = self._get_cookies_filepath(username)
32
+ with open(cookiejar_filepath, "wb") as f:
33
+ pickle.dump(cookies, f)
34
+
35
+ def get(self, username: str) -> Optional[RequestsCookieJar]:
36
+ cookies = self._load_cookies_from_cache(username)
37
+ if cookies and not CookieRepository._is_token_still_valid(cookies):
38
+ raise LinkedinSessionExpired
39
+
40
+ return cookies
41
+
42
+ def _ensure_cookies_dir(self):
43
+ if not os.path.exists(self.cookies_dir):
44
+ os.makedirs(self.cookies_dir)
45
+
46
+ def _get_cookies_filepath(self, username) -> str:
47
+ """
48
+ Return the absolute path of the cookiejar for a given username
49
+ """
50
+ return "{}{}.jr".format(self.cookies_dir, username)
51
+
52
+ def _load_cookies_from_cache(self, username: str) -> Optional[RequestsCookieJar]:
53
+ cookiejar_filepath = self._get_cookies_filepath(username)
54
+ try:
55
+ with open(cookiejar_filepath, "rb") as f:
56
+ cookies = pickle.load(f)
57
+ return cookies
58
+ except FileNotFoundError:
59
+ return None
60
+
61
+ @staticmethod
62
+ def _is_token_still_valid(cookiejar: RequestsCookieJar):
63
+ _now = time.time()
64
+ for cookie in cookiejar:
65
+ if cookie.name == "JSESSIONID" and cookie.value:
66
+ if cookie.expires and cookie.expires > _now:
67
+ return True
68
+ break
69
+
70
+ return False
@@ -0,0 +1,237 @@
1
+ """
2
+ GraphQL abstraction layer for LinkedIn Voyager API.
3
+
4
+ This module provides a registry of known GraphQL query IDs and a client
5
+ for executing GraphQL requests against LinkedIn's Voyager API.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from enum import Enum
10
+ from typing import Any, Dict, Optional
11
+ from urllib.parse import quote
12
+ import logging
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class QueryType(Enum):
18
+ """Types of GraphQL endpoints available."""
19
+ VOYAGER = "voyager" # /voyager/api/graphql
20
+ MESSAGING = "messaging" # /voyager/api/voyagerMessagingGraphQL/graphql
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class GraphQLQuery:
25
+ """Represents a LinkedIn GraphQL query with its metadata."""
26
+ query_id: str
27
+ query_type: QueryType
28
+ description: str
29
+
30
+
31
+ class QueryRegistry:
32
+ """
33
+ Registry of known LinkedIn GraphQL query IDs.
34
+
35
+ These query IDs were discovered through network traffic analysis.
36
+ LinkedIn uses fixed query IDs that map to specific GraphQL operations.
37
+ """
38
+
39
+ # Profile queries (Voyager GraphQL)
40
+ PROFILE = GraphQLQuery(
41
+ query_id="voyagerIdentityDashProfiles.2ca312bdbe80fac72fd663a3e06a83e7",
42
+ query_type=QueryType.VOYAGER,
43
+ description="Fetch full profile data by vanity name"
44
+ )
45
+
46
+ PROFILE_CARDS = GraphQLQuery(
47
+ query_id="voyagerIdentityDashProfileCards.55af784c21dc8640b500ab5b45937064",
48
+ query_type=QueryType.VOYAGER,
49
+ description="Fetch profile cards/sections"
50
+ )
51
+
52
+ PROFILE_COMPONENTS = GraphQLQuery(
53
+ query_id="voyagerIdentityDashProfileComponents.7af5d6f176f11583b382e37e5639e69e",
54
+ query_type=QueryType.VOYAGER,
55
+ description="Fetch profile components (experience, education, etc.)"
56
+ )
57
+
58
+ # Feed queries (Voyager GraphQL)
59
+ MAIN_FEED = GraphQLQuery(
60
+ query_id="voyagerFeedDashMainFeed.923020905727c01516495a0ac90bb475",
61
+ query_type=QueryType.VOYAGER,
62
+ description="Fetch main feed posts"
63
+ )
64
+
65
+ # Settings queries (Voyager GraphQL)
66
+ MY_SETTINGS = GraphQLQuery(
67
+ query_id="voyagerDashMySettings.7ea6de345b41dfb57b660a9a4bebe1b8",
68
+ query_type=QueryType.VOYAGER,
69
+ description="Fetch user settings"
70
+ )
71
+
72
+ # Messaging queries (Messaging GraphQL)
73
+ CONVERSATIONS = GraphQLQuery(
74
+ query_id="messengerConversations.0d5e6781bbee71c3e51c8843c6519f48",
75
+ query_type=QueryType.MESSAGING,
76
+ description="Fetch list of conversations"
77
+ )
78
+
79
+ MESSAGES = GraphQLQuery(
80
+ query_id="messengerMessages.5846eeb71c981f11e0134cb6626cc314",
81
+ query_type=QueryType.MESSAGING,
82
+ description="Fetch messages in a conversation"
83
+ )
84
+
85
+ MAILBOX_COUNTS = GraphQLQuery(
86
+ query_id="messengerMailboxCounts.fc528a5a81a76dff212a4a3d2d48e84b",
87
+ query_type=QueryType.MESSAGING,
88
+ description="Fetch unread message counts"
89
+ )
90
+
91
+ # Organization/Company queries (Voyager GraphQL)
92
+ ORGANIZATION_COMPANIES = GraphQLQuery(
93
+ query_id="voyagerOrganizationDashCompanies.148b1aebfadd0a455f32806df656c3c1",
94
+ query_type=QueryType.VOYAGER,
95
+ description="Fetch company data by universal name"
96
+ )
97
+
98
+ ORGANIZATION_PAGE_UPDATES = GraphQLQuery(
99
+ query_id="voyagerFeedDashOrganizationalPageUpdates.827e11d165078dd7a5afaf1cba734121",
100
+ query_type=QueryType.VOYAGER,
101
+ description="Fetch company/organization page posts"
102
+ )
103
+
104
+ # Social/Reactions queries (Voyager GraphQL)
105
+ REACTIONS = GraphQLQuery(
106
+ query_id="voyagerSocialDashReactions.41ebf31a9f4c4a84e35a49d5abc9010b",
107
+ query_type=QueryType.VOYAGER,
108
+ description="Fetch post reactions with reactor profile data"
109
+ )
110
+
111
+
112
+ class GraphQLClient:
113
+ """
114
+ Client for executing LinkedIn GraphQL requests.
115
+
116
+ This class handles the construction of GraphQL URLs and provides
117
+ a clean interface for making GraphQL requests.
118
+ """
119
+
120
+ # Base URLs for different GraphQL endpoints (relative to API_BASE_URL which is /voyager/api)
121
+ VOYAGER_GRAPHQL_URL = "/graphql"
122
+ MESSAGING_GRAPHQL_URL = "/voyagerMessagingGraphQL/graphql"
123
+
124
+ def __init__(self, session, api_base_url: str):
125
+ """
126
+ Initialize the GraphQL client.
127
+
128
+ :param session: requests.Session object with authentication cookies
129
+ :param api_base_url: Base URL for the LinkedIn API
130
+ """
131
+ self.session = session
132
+ self.api_base_url = api_base_url
133
+
134
+ def _get_endpoint(self, query_type: QueryType) -> str:
135
+ """Get the appropriate endpoint URL for a query type."""
136
+ if query_type == QueryType.VOYAGER:
137
+ return self.VOYAGER_GRAPHQL_URL
138
+ elif query_type == QueryType.MESSAGING:
139
+ return self.MESSAGING_GRAPHQL_URL
140
+ else:
141
+ raise ValueError(f"Unknown query type: {query_type}")
142
+
143
+ def _build_variables_string(self, variables: Dict[str, Any]) -> str:
144
+ """
145
+ Build a LinkedIn-style variables string from a dictionary.
146
+
147
+ LinkedIn uses a custom format for GraphQL variables that looks like:
148
+ (key1:value1,key2:value2,key3:(nestedKey:nestedValue))
149
+
150
+ :param variables: Dictionary of variables
151
+ :return: Formatted variables string
152
+ """
153
+ def format_value(value: Any) -> str:
154
+ if isinstance(value, bool):
155
+ return "true" if value else "false"
156
+ elif isinstance(value, str):
157
+ return value
158
+ elif isinstance(value, (int, float)):
159
+ return str(value)
160
+ elif isinstance(value, dict):
161
+ inner = ",".join(f"{k}:{format_value(v)}" for k, v in value.items())
162
+ return f"({inner})"
163
+ elif isinstance(value, list):
164
+ items = ",".join(format_value(v) for v in value)
165
+ return f"List({items})"
166
+ else:
167
+ return str(value)
168
+
169
+ parts = []
170
+ for key, value in variables.items():
171
+ parts.append(f"{key}:{format_value(value)}")
172
+
173
+ return f"({','.join(parts)})"
174
+
175
+ def build_url(
176
+ self,
177
+ query: GraphQLQuery,
178
+ variables: Optional[Dict[str, Any]] = None,
179
+ include_web_metadata: bool = False
180
+ ) -> str:
181
+ """
182
+ Build a complete GraphQL URL for a query.
183
+
184
+ :param query: The GraphQL query to execute
185
+ :param variables: Variables to pass to the query
186
+ :param include_web_metadata: Whether to include web metadata in response
187
+ :return: Complete URL for the GraphQL request
188
+ """
189
+ endpoint = self._get_endpoint(query.query_type)
190
+
191
+ url_parts = [endpoint, "?"]
192
+
193
+ if variables:
194
+ variables_str = self._build_variables_string(variables)
195
+ url_parts.append(f"variables={quote(variables_str, safe='(),:-')}")
196
+ url_parts.append("&")
197
+
198
+ url_parts.append(f"queryId={query.query_id}")
199
+
200
+ if include_web_metadata:
201
+ url_parts.append("&includeWebMetadata=true")
202
+
203
+ return "".join(url_parts)
204
+
205
+ def execute(
206
+ self,
207
+ query: GraphQLQuery,
208
+ variables: Optional[Dict[str, Any]] = None,
209
+ include_web_metadata: bool = False,
210
+ headers: Optional[Dict[str, str]] = None
211
+ ) -> Dict[str, Any]:
212
+ """
213
+ Execute a GraphQL query and return the response.
214
+
215
+ :param query: The GraphQL query to execute
216
+ :param variables: Variables to pass to the query
217
+ :param include_web_metadata: Whether to include web metadata
218
+ :param headers: Additional headers to include in the request
219
+ :return: JSON response from the API
220
+ :raises: requests.HTTPError on request failure
221
+ """
222
+ url = self.build_url(query, variables, include_web_metadata)
223
+ full_url = f"{self.api_base_url}{url}"
224
+
225
+ request_headers = {
226
+ "accept": "application/vnd.linkedin.normalized+json+2.1"
227
+ }
228
+ if headers:
229
+ request_headers.update(headers)
230
+
231
+ logger.debug(f"Executing GraphQL query: {query.query_id}")
232
+ logger.debug(f"URL: {full_url}")
233
+
234
+ response = self.session.get(full_url, headers=request_headers)
235
+ response.raise_for_status()
236
+
237
+ return response.json()