dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/schemas/common.py +33 -0
- dhisana/schemas/sales.py +224 -23
- dhisana/utils/add_mapping.py +72 -63
- dhisana/utils/apollo_tools.py +739 -109
- dhisana/utils/built_with_api_tools.py +4 -2
- dhisana/utils/cache_output_tools.py +23 -23
- dhisana/utils/check_email_validity_tools.py +456 -458
- dhisana/utils/check_for_intent_signal.py +1 -2
- dhisana/utils/check_linkedin_url_validity.py +34 -8
- dhisana/utils/clay_tools.py +3 -2
- dhisana/utils/clean_properties.py +3 -1
- dhisana/utils/compose_salesnav_query.py +0 -1
- dhisana/utils/compose_search_query.py +7 -3
- dhisana/utils/composite_tools.py +0 -1
- dhisana/utils/dataframe_tools.py +2 -2
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +585 -85
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +1 -1
- dhisana/utils/g2_tools.py +0 -1
- dhisana/utils/generate_content.py +0 -1
- dhisana/utils/generate_email.py +69 -16
- dhisana/utils/generate_email_response.py +298 -41
- dhisana/utils/generate_flow.py +0 -1
- dhisana/utils/generate_linkedin_connect_message.py +19 -6
- dhisana/utils/generate_linkedin_response_message.py +156 -65
- dhisana/utils/generate_structured_output_internal.py +351 -131
- dhisana/utils/google_custom_search.py +150 -44
- dhisana/utils/google_oauth_tools.py +721 -0
- dhisana/utils/google_workspace_tools.py +391 -25
- dhisana/utils/hubspot_clearbit.py +3 -1
- dhisana/utils/hubspot_crm_tools.py +771 -167
- dhisana/utils/instantly_tools.py +3 -1
- dhisana/utils/lusha_tools.py +10 -7
- dhisana/utils/mailgun_tools.py +150 -0
- dhisana/utils/microsoft365_tools.py +447 -0
- dhisana/utils/openai_assistant_and_file_utils.py +121 -177
- dhisana/utils/openai_helpers.py +19 -16
- dhisana/utils/parse_linkedin_messages_txt.py +2 -3
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +507 -206
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/research_lead.py +121 -68
- dhisana/utils/sales_navigator_crawler.py +1 -6
- dhisana/utils/salesforce_crm_tools.py +323 -50
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +126 -91
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +363 -432
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +576 -0
- dhisana/utils/test_connect.py +1765 -92
- dhisana/utils/trasform_json.py +95 -16
- dhisana/utils/web_download_parse_tools.py +0 -1
- dhisana/utils/zoominfo_tools.py +2 -3
- dhisana/workflow/test.py +1 -1
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
- dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
- dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
|
@@ -5,15 +5,12 @@ import os
|
|
|
5
5
|
import re
|
|
6
6
|
import aiohttp
|
|
7
7
|
import backoff
|
|
8
|
-
from typing import Dict, List, Optional
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
9
|
|
|
10
|
-
from bs4 import BeautifulSoup
|
|
11
10
|
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
12
11
|
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
13
12
|
from dhisana.utils.clean_properties import cleanup_properties
|
|
14
|
-
from dhisana.utils.
|
|
15
|
-
from dhisana.utils.serpapi_search_tools import search_google
|
|
16
|
-
from dhisana.utils.web_download_parse_tools import get_html_content_from_url
|
|
13
|
+
from dhisana.utils.search_router import search_google_with_tools
|
|
17
14
|
from urllib.parse import urlparse, urlunparse
|
|
18
15
|
|
|
19
16
|
logging.basicConfig(level=logging.INFO)
|
|
@@ -24,16 +21,8 @@ def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
|
24
21
|
"""
|
|
25
22
|
Retrieves the PROXY_CURL_API_KEY access token from the provided tool configuration.
|
|
26
23
|
|
|
27
|
-
Args:
|
|
28
|
-
tool_config (list): A list of dictionaries containing the tool configuration.
|
|
29
|
-
Each dictionary should have a "name" key and a "configuration" key,
|
|
30
|
-
where "configuration" is a list of dictionaries containing "name" and "value" keys.
|
|
31
|
-
|
|
32
|
-
Returns:
|
|
33
|
-
str: The PROXY_CURL_API_KEY access token.
|
|
34
|
-
|
|
35
24
|
Raises:
|
|
36
|
-
ValueError: If the
|
|
25
|
+
ValueError: If the Proxycurl integration has not been configured.
|
|
37
26
|
"""
|
|
38
27
|
PROXY_CURL_API_KEY = None
|
|
39
28
|
|
|
@@ -58,8 +47,11 @@ def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
|
58
47
|
PROXY_CURL_API_KEY = PROXY_CURL_API_KEY or os.getenv("PROXY_CURL_API_KEY")
|
|
59
48
|
|
|
60
49
|
if not PROXY_CURL_API_KEY:
|
|
61
|
-
logger.error("
|
|
62
|
-
raise ValueError(
|
|
50
|
+
logger.error("Proxycurl integration is not configured.")
|
|
51
|
+
raise ValueError(
|
|
52
|
+
"Proxycurl integration is not configured. Please configure the connection to Proxycurl in Integrations."
|
|
53
|
+
)
|
|
54
|
+
|
|
63
55
|
return PROXY_CURL_API_KEY
|
|
64
56
|
|
|
65
57
|
|
|
@@ -76,34 +68,33 @@ async def enrich_person_info_from_proxycurl(
|
|
|
76
68
|
email: Optional[str] = None,
|
|
77
69
|
phone: Optional[str] = None,
|
|
78
70
|
tool_config: Optional[List[Dict]] = None
|
|
79
|
-
):
|
|
71
|
+
) -> Dict:
|
|
80
72
|
"""
|
|
81
73
|
Fetch a person's details from Proxycurl using LinkedIn URL, email, or phone number.
|
|
82
74
|
|
|
83
|
-
Parameters:
|
|
84
|
-
- linkedin_url (str, optional): LinkedIn profile URL of the person.
|
|
85
|
-
- email (str, optional): Email address of the person.
|
|
86
|
-
- phone (str, optional): Phone number of the person.
|
|
87
|
-
|
|
88
75
|
Returns:
|
|
89
|
-
|
|
76
|
+
dict: JSON response containing person information or an error.
|
|
90
77
|
"""
|
|
91
78
|
logger.info("Entering enrich_person_info_from_proxycurl")
|
|
92
79
|
|
|
93
|
-
|
|
80
|
+
try:
|
|
81
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
82
|
+
except ValueError as e:
|
|
83
|
+
return {"error": str(e)}
|
|
84
|
+
|
|
94
85
|
HEADERS = {
|
|
95
86
|
'Authorization': f'Bearer {API_KEY}',
|
|
96
87
|
'Content-Type': 'application/json'
|
|
97
88
|
}
|
|
98
89
|
|
|
99
|
-
if not linkedin_url
|
|
100
|
-
logger.warning("No linkedin_url
|
|
101
|
-
return {'error': "
|
|
102
|
-
|
|
90
|
+
if not linkedin_url:
|
|
91
|
+
logger.warning("No linkedin_url provided.")
|
|
92
|
+
return {'error': "linkedin_url must be provided"}
|
|
93
|
+
|
|
103
94
|
# Check cache if linkedin_url is provided
|
|
104
95
|
if linkedin_url:
|
|
105
96
|
cached_response = retrieve_output("enrich_person_info_from_proxycurl", linkedin_url)
|
|
106
|
-
if cached_response is not None:
|
|
97
|
+
if cached_response is not None and cached_response.get('error') is None:
|
|
107
98
|
logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
|
|
108
99
|
return cached_response
|
|
109
100
|
|
|
@@ -112,10 +103,13 @@ async def enrich_person_info_from_proxycurl(
|
|
|
112
103
|
params['url'] = linkedin_url
|
|
113
104
|
if email:
|
|
114
105
|
params['email'] = email
|
|
106
|
+
else:
|
|
107
|
+
# Request Proxycurl to include personal emails when no email is provided
|
|
108
|
+
params['personal_email'] = 'include'
|
|
115
109
|
if phone:
|
|
116
110
|
params['phone'] = phone
|
|
117
111
|
|
|
118
|
-
url = 'https://
|
|
112
|
+
url = 'https://enrichlayer.com/api/v2/profile'
|
|
119
113
|
logger.debug(f"Making request to Proxycurl with params: {params}")
|
|
120
114
|
|
|
121
115
|
async with aiohttp.ClientSession() as session:
|
|
@@ -131,27 +125,21 @@ async def enrich_person_info_from_proxycurl(
|
|
|
131
125
|
elif response.status == 404:
|
|
132
126
|
msg = "Person not found"
|
|
133
127
|
logger.warning(msg)
|
|
134
|
-
if linkedin_url:
|
|
135
|
-
cache_output("enrich_person_info_from_proxycurl", linkedin_url, {'error': msg})
|
|
136
128
|
return {'error': msg}
|
|
137
129
|
elif response.status == 429:
|
|
138
130
|
msg = "Rate limit exceeded"
|
|
139
131
|
logger.warning(msg)
|
|
132
|
+
# Sleep and then return an error (no raise)
|
|
140
133
|
await asyncio.sleep(30)
|
|
141
|
-
|
|
142
|
-
request_info=response.request_info,
|
|
143
|
-
history=response.history,
|
|
144
|
-
status=response.status,
|
|
145
|
-
message=msg,
|
|
146
|
-
headers=response.headers
|
|
147
|
-
)
|
|
134
|
+
return {'error': msg}
|
|
148
135
|
else:
|
|
149
136
|
error_text = await response.text()
|
|
150
137
|
logger.error(f"Error from Proxycurl: {error_text}")
|
|
151
138
|
return {'error': error_text}
|
|
152
139
|
except Exception as e:
|
|
153
140
|
logger.exception("Exception occurred while fetching person info from Proxycurl.")
|
|
154
|
-
|
|
141
|
+
return {"error": str(e)}
|
|
142
|
+
|
|
155
143
|
|
|
156
144
|
@assistant_tool
|
|
157
145
|
@backoff.on_exception(
|
|
@@ -166,14 +154,24 @@ async def lookup_person_in_proxy_curl_by_name(
|
|
|
166
154
|
last_name: str,
|
|
167
155
|
company_name: Optional[str] = None,
|
|
168
156
|
tool_config: Optional[List[Dict]] = None,
|
|
169
|
-
):
|
|
157
|
+
) -> Dict:
|
|
158
|
+
"""
|
|
159
|
+
Look up a person in Proxycurl by first and last name, optionally a company name.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
dict: JSON response containing search results or an error.
|
|
163
|
+
"""
|
|
170
164
|
logger.info("Entering lookup_person_in_proxy_curl_by_name")
|
|
171
165
|
|
|
172
166
|
if not first_name or not last_name:
|
|
173
167
|
logger.warning("First name or last name missing for lookup.")
|
|
174
168
|
return {'error': "Full name is required"}
|
|
175
169
|
|
|
176
|
-
|
|
170
|
+
try:
|
|
171
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
172
|
+
except ValueError as e:
|
|
173
|
+
return {"error": str(e)}
|
|
174
|
+
|
|
177
175
|
headers = {'Authorization': f'Bearer {API_KEY}'}
|
|
178
176
|
params = {
|
|
179
177
|
'first_name': first_name,
|
|
@@ -190,7 +188,7 @@ async def lookup_person_in_proxy_curl_by_name(
|
|
|
190
188
|
logger.info(f"Cache hit for name lookup key: {key}")
|
|
191
189
|
return cached_response
|
|
192
190
|
|
|
193
|
-
url = 'https://
|
|
191
|
+
url = 'https://enrichlayer.com/api/v2/search/person'
|
|
194
192
|
logger.debug(f"Making request to Proxycurl with params: {params}")
|
|
195
193
|
|
|
196
194
|
async with aiohttp.ClientSession() as session:
|
|
@@ -212,43 +210,109 @@ async def lookup_person_in_proxy_curl_by_name(
|
|
|
212
210
|
msg = "Rate limit exceeded"
|
|
213
211
|
logger.warning(msg)
|
|
214
212
|
await asyncio.sleep(30)
|
|
215
|
-
|
|
216
|
-
request_info=response.request_info,
|
|
217
|
-
history=response.history,
|
|
218
|
-
status=response.status,
|
|
219
|
-
message=msg,
|
|
220
|
-
headers=response.headers
|
|
221
|
-
)
|
|
213
|
+
return {'error': msg}
|
|
222
214
|
else:
|
|
223
215
|
result = await response.json()
|
|
224
216
|
logger.warning(f"lookup_person_in_proxycurl_by_name error: {result}")
|
|
225
217
|
return {'error': result}
|
|
226
218
|
except Exception as e:
|
|
227
219
|
logger.exception("Exception occurred while looking up person by name.")
|
|
228
|
-
|
|
229
|
-
|
|
220
|
+
return {"error": str(e)}
|
|
230
221
|
|
|
231
222
|
|
|
232
223
|
def transform_company_data(data: dict) -> dict:
|
|
233
224
|
"""
|
|
234
|
-
Transform the company data by mapping
|
|
235
|
-
|
|
225
|
+
Transform the company data by mapping:
|
|
226
|
+
- 'name' to 'organization_name'
|
|
227
|
+
- 'website' to 'organization_website'
|
|
228
|
+
- 'industry' to 'organization_industry'
|
|
229
|
+
- 'hq' or 'headquarters' to 'organization_hq_location'
|
|
230
|
+
in the format "city, state, country" (skipping empty parts).
|
|
231
|
+
Copies over all other properties except the ones that are mapped.
|
|
236
232
|
If data is empty, returns an empty dictionary.
|
|
237
233
|
"""
|
|
238
234
|
if not data:
|
|
239
235
|
return {}
|
|
236
|
+
|
|
240
237
|
transformed = {}
|
|
241
|
-
|
|
238
|
+
|
|
239
|
+
# Map name, website, and industry
|
|
242
240
|
if "name" in data:
|
|
243
241
|
transformed["organization_name"] = data["name"]
|
|
244
242
|
if "website" in data:
|
|
245
243
|
transformed["organization_website"] = data["website"]
|
|
246
|
-
|
|
244
|
+
if "industry" in data:
|
|
245
|
+
transformed["organization_industry"] = data["industry"]
|
|
246
|
+
|
|
247
|
+
if "company_size" in data:
|
|
248
|
+
transformed["company_size_list"] = data["company_size"]
|
|
249
|
+
|
|
250
|
+
if "company_size_on_linkedin" in data:
|
|
251
|
+
transformed["organization_size"] = data["company_size_on_linkedin"]
|
|
252
|
+
transformed["company_size"] = data["company_size_on_linkedin"]
|
|
253
|
+
|
|
254
|
+
# Determine headquarters info from "hq" or "headquarters"
|
|
255
|
+
hq_data = data.get("hq") or data.get("headquarters")
|
|
256
|
+
if hq_data:
|
|
257
|
+
if isinstance(hq_data, dict):
|
|
258
|
+
city = hq_data.get("city", "")
|
|
259
|
+
state = hq_data.get("geographic_area", "")
|
|
260
|
+
country = hq_data.get("country", "")
|
|
261
|
+
# Join non-empty parts with a comma and a space
|
|
262
|
+
parts = [part for part in (city, state, country) if part]
|
|
263
|
+
transformed["organization_hq_location"] = ", ".join(parts)
|
|
264
|
+
else:
|
|
265
|
+
# If hq_data is not a dict, assume it's already in the desired format
|
|
266
|
+
transformed["organization_hq_location"] = hq_data
|
|
267
|
+
|
|
268
|
+
# Copy all other properties, excluding those already mapped
|
|
247
269
|
for key, value in data.items():
|
|
248
|
-
if key not in ("name", "website"):
|
|
270
|
+
if key not in ("name", "website", "industry", "hq", "headquarters", "company_size"):
|
|
249
271
|
transformed[key] = value
|
|
272
|
+
|
|
250
273
|
return transformed
|
|
251
274
|
|
|
275
|
+
|
|
276
|
+
def _build_company_profile_params(
|
|
277
|
+
company_url: str,
|
|
278
|
+
profile_flags: Dict[str, Optional[str]],
|
|
279
|
+
) -> Dict[str, str]:
|
|
280
|
+
"""
|
|
281
|
+
Build request params for the Enrichlayer company profile endpoint,
|
|
282
|
+
ensuring we only forward flags that were explicitly provided.
|
|
283
|
+
"""
|
|
284
|
+
params: Dict[str, str] = {'url': company_url}
|
|
285
|
+
for key, value in profile_flags.items():
|
|
286
|
+
if value is not None:
|
|
287
|
+
params[key] = value
|
|
288
|
+
return params
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _build_company_cache_key(identifier: str, profile_flags: Dict[str, Optional[str]]) -> str:
|
|
292
|
+
"""
|
|
293
|
+
Builds a cache key that is unique for the combination of identifier
|
|
294
|
+
(LinkedIn URL or domain) and the optional enrichment flags.
|
|
295
|
+
"""
|
|
296
|
+
suffix_bits = [
|
|
297
|
+
f"{key}={value}"
|
|
298
|
+
for key, value in sorted(profile_flags.items())
|
|
299
|
+
if value is not None
|
|
300
|
+
]
|
|
301
|
+
if suffix_bits:
|
|
302
|
+
return f"{identifier}|{'&'.join(suffix_bits)}"
|
|
303
|
+
return identifier
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _bool_to_include_exclude(value: Optional[bool]) -> Optional[str]:
|
|
307
|
+
"""
|
|
308
|
+
Convert a boolean flag into the string literals expected by Proxycurl.
|
|
309
|
+
True -> "include", False -> "exclude", None -> None (omit parameter).
|
|
310
|
+
"""
|
|
311
|
+
if value is None:
|
|
312
|
+
return None
|
|
313
|
+
return "include" if value else "exclude"
|
|
314
|
+
|
|
315
|
+
|
|
252
316
|
@backoff.on_exception(
|
|
253
317
|
backoff.expo,
|
|
254
318
|
aiohttp.ClientResponseError,
|
|
@@ -259,22 +323,39 @@ def transform_company_data(data: dict) -> dict:
|
|
|
259
323
|
async def enrich_organization_info_from_proxycurl(
|
|
260
324
|
organization_domain: Optional[str] = None,
|
|
261
325
|
organization_linkedin_url: Optional[str] = None,
|
|
262
|
-
tool_config: Optional[List[Dict]] = None
|
|
326
|
+
tool_config: Optional[List[Dict]] = None,
|
|
327
|
+
categories: Optional[bool] = None,
|
|
328
|
+
funding_data: Optional[bool] = None,
|
|
329
|
+
exit_data: Optional[bool] = None,
|
|
330
|
+
acquisitions: Optional[bool] = None,
|
|
331
|
+
extra: Optional[bool] = None,
|
|
332
|
+
use_cache: Optional[str] = "if-present",
|
|
333
|
+
fallback_to_cache: Optional[str] = "on-error",
|
|
263
334
|
) -> Dict:
|
|
264
335
|
"""
|
|
265
336
|
Fetch an organization's details from Proxycurl using either the organization domain or LinkedIn URL.
|
|
337
|
+
Additional keyword parameters map directly to the Enrichlayer Company Profile endpoint.
|
|
266
338
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
339
|
+
Args:
|
|
340
|
+
organization_domain: Organization's domain name to resolve via Proxycurl.
|
|
341
|
+
organization_linkedin_url: LinkedIn company profile URL.
|
|
342
|
+
tool_config: Optional tool configuration metadata for credential lookup.
|
|
343
|
+
categories/funding_data/exit_data/acquisitions/extra: Set True to request
|
|
344
|
+
"include", False for "exclude", or None to omit.
|
|
345
|
+
use_cache: Controls Proxycurl caching behaviour (e.g. "if-present").
|
|
346
|
+
fallback_to_cache: Controls Proxycurl cache fallback behaviour (e.g. "on-error").
|
|
270
347
|
|
|
271
348
|
Returns:
|
|
272
|
-
|
|
273
|
-
|
|
349
|
+
dict: Transformed JSON response containing organization information,
|
|
350
|
+
or {'error': ...} on error, or empty dict if not found.
|
|
274
351
|
"""
|
|
275
352
|
logger.info("Entering enrich_organization_info_from_proxycurl")
|
|
276
353
|
|
|
277
|
-
|
|
354
|
+
try:
|
|
355
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
356
|
+
except ValueError as e:
|
|
357
|
+
return {"error": str(e)}
|
|
358
|
+
|
|
278
359
|
HEADERS = {
|
|
279
360
|
'Authorization': f'Bearer {API_KEY}',
|
|
280
361
|
'Content-Type': 'application/json'
|
|
@@ -284,9 +365,22 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
284
365
|
logger.warning("No organization domain or LinkedIn URL provided.")
|
|
285
366
|
return {}
|
|
286
367
|
|
|
368
|
+
profile_flags: Dict[str, Optional[str]] = {
|
|
369
|
+
"categories": _bool_to_include_exclude(categories),
|
|
370
|
+
"funding_data": _bool_to_include_exclude(funding_data),
|
|
371
|
+
"exit_data": _bool_to_include_exclude(exit_data),
|
|
372
|
+
"acquisitions": _bool_to_include_exclude(acquisitions),
|
|
373
|
+
"extra": _bool_to_include_exclude(extra),
|
|
374
|
+
"use_cache": use_cache,
|
|
375
|
+
"fallback_to_cache": fallback_to_cache,
|
|
376
|
+
}
|
|
377
|
+
|
|
287
378
|
# If LinkedIn URL is provided, standardize it and fetch data
|
|
288
379
|
if organization_linkedin_url:
|
|
289
380
|
logger.debug(f"Organization LinkedIn URL provided: {organization_linkedin_url}")
|
|
381
|
+
if "linkedin.com/company" not in organization_linkedin_url:
|
|
382
|
+
logger.warning("Invalid LinkedIn URL provided." + organization_linkedin_url)
|
|
383
|
+
return {}
|
|
290
384
|
parsed_url = urlparse(organization_linkedin_url)
|
|
291
385
|
if parsed_url.netloc != 'www.linkedin.com':
|
|
292
386
|
standardized_netloc = 'www.linkedin.com'
|
|
@@ -303,19 +397,17 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
303
397
|
if standardized_url and not standardized_url.endswith('/'):
|
|
304
398
|
standardized_url += '/'
|
|
305
399
|
|
|
400
|
+
cache_key = _build_company_cache_key(standardized_url, profile_flags)
|
|
306
401
|
# Check cache for standardized LinkedIn URL
|
|
307
|
-
cached_response = retrieve_output("enrich_organization_info_from_proxycurl",
|
|
402
|
+
cached_response = retrieve_output("enrich_organization_info_from_proxycurl", cache_key)
|
|
308
403
|
if cached_response is not None:
|
|
309
404
|
logger.info(f"Cache hit for organization LinkedIn URL: {standardized_url}")
|
|
405
|
+
cached_response = transform_company_data(cached_response)
|
|
310
406
|
return cached_response
|
|
311
407
|
|
|
312
408
|
# Fetch details using standardized LinkedIn URL
|
|
313
|
-
url = 'https://
|
|
314
|
-
params =
|
|
315
|
-
'url': standardized_url,
|
|
316
|
-
'use_cache': 'if-present',
|
|
317
|
-
'fallback_to_cache': 'on-error',
|
|
318
|
-
}
|
|
409
|
+
url = 'https://enrichlayer.com/api/v2/company'
|
|
410
|
+
params = _build_company_profile_params(standardized_url, profile_flags)
|
|
319
411
|
logger.debug(f"Making request to Proxycurl with params: {params}")
|
|
320
412
|
|
|
321
413
|
async with aiohttp.ClientSession() as session:
|
|
@@ -325,26 +417,43 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
325
417
|
if response.status == 200:
|
|
326
418
|
result = await response.json()
|
|
327
419
|
transformed_result = transform_company_data(result)
|
|
328
|
-
cache_output("enrich_organization_info_from_proxycurl",
|
|
420
|
+
cache_output("enrich_organization_info_from_proxycurl", cache_key, transformed_result)
|
|
329
421
|
logger.info("Successfully retrieved and transformed organization info from Proxycurl by LinkedIn URL.")
|
|
330
422
|
return transformed_result
|
|
423
|
+
elif response.status == 429:
|
|
424
|
+
msg = "Rate limit exceeded"
|
|
425
|
+
logger.warning(msg)
|
|
426
|
+
await asyncio.sleep(30)
|
|
427
|
+
return {"error": msg}
|
|
428
|
+
elif response.status == 404:
|
|
429
|
+
error_text = await response.text()
|
|
430
|
+
logger.warning(
|
|
431
|
+
f"Proxycurl organization profile not found for LinkedIn URL {standardized_url}: {error_text}"
|
|
432
|
+
)
|
|
433
|
+
cache_output(
|
|
434
|
+
"enrich_organization_info_from_proxycurl", cache_key, {}
|
|
435
|
+
)
|
|
436
|
+
return {}
|
|
331
437
|
else:
|
|
332
438
|
error_text = await response.text()
|
|
333
|
-
logger.error(
|
|
439
|
+
logger.error(
|
|
440
|
+
f"Error from Proxycurl organization info fetch by URL: {error_text}"
|
|
441
|
+
)
|
|
334
442
|
return {}
|
|
335
443
|
except Exception as e:
|
|
336
444
|
logger.exception("Exception occurred while fetching organization info from Proxycurl by LinkedIn URL.")
|
|
337
|
-
|
|
445
|
+
return {"error": str(e)}
|
|
338
446
|
|
|
339
447
|
# If organization domain is provided, resolve domain to LinkedIn URL and fetch data
|
|
340
448
|
if organization_domain:
|
|
341
449
|
logger.debug(f"Organization domain provided: {organization_domain}")
|
|
342
|
-
|
|
450
|
+
domain_cache_key = _build_company_cache_key(organization_domain, profile_flags)
|
|
451
|
+
cached_response = retrieve_output("enrich_organization_info_from_proxycurl", domain_cache_key)
|
|
343
452
|
if cached_response is not None:
|
|
344
453
|
logger.info(f"Cache hit for organization domain: {organization_domain}")
|
|
345
454
|
return cached_response
|
|
346
455
|
|
|
347
|
-
resolve_url = 'https://
|
|
456
|
+
resolve_url = 'https://enrichlayer.com/api/v2/company/resolve'
|
|
348
457
|
params = {'domain': organization_domain}
|
|
349
458
|
logger.debug(f"Making request to Proxycurl to resolve domain with params: {params}")
|
|
350
459
|
|
|
@@ -368,23 +477,29 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
368
477
|
else:
|
|
369
478
|
standardized_url = company_url
|
|
370
479
|
|
|
371
|
-
profile_url = 'https://
|
|
480
|
+
profile_url = 'https://enrichlayer.com/api/v2/company'
|
|
372
481
|
try:
|
|
373
|
-
|
|
482
|
+
profile_params = _build_company_profile_params(standardized_url, profile_flags)
|
|
483
|
+
async with session.get(profile_url, headers=HEADERS, params=profile_params) as profile_response:
|
|
374
484
|
logger.debug(f"Received profile response status: {profile_response.status}")
|
|
375
485
|
if profile_response.status == 200:
|
|
376
486
|
result = await profile_response.json()
|
|
377
487
|
transformed_result = transform_company_data(result)
|
|
378
|
-
cache_output("enrich_organization_info_from_proxycurl",
|
|
488
|
+
cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, transformed_result)
|
|
379
489
|
logger.info("Successfully retrieved and transformed organization info from Proxycurl by domain.")
|
|
380
490
|
return transformed_result
|
|
491
|
+
elif profile_response.status == 429:
|
|
492
|
+
msg = "Rate limit exceeded"
|
|
493
|
+
logger.warning(msg)
|
|
494
|
+
await asyncio.sleep(30)
|
|
495
|
+
return {"error": msg}
|
|
381
496
|
else:
|
|
382
497
|
error_text = await profile_response.text()
|
|
383
498
|
logger.error(f"Error from Proxycurl organization profile fetch by resolved domain: {error_text}")
|
|
384
499
|
return {}
|
|
385
500
|
except Exception as e:
|
|
386
501
|
logger.exception("Exception occurred while fetching organization profile data.")
|
|
387
|
-
|
|
502
|
+
return {"error": str(e)}
|
|
388
503
|
else:
|
|
389
504
|
logger.warning("Company URL not found for the provided domain.")
|
|
390
505
|
return {}
|
|
@@ -392,17 +507,11 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
392
507
|
msg = "Rate limit exceeded"
|
|
393
508
|
logger.warning(msg)
|
|
394
509
|
await asyncio.sleep(30)
|
|
395
|
-
|
|
396
|
-
request_info=response.request_info,
|
|
397
|
-
history=response.history,
|
|
398
|
-
status=response.status,
|
|
399
|
-
message=msg,
|
|
400
|
-
headers=response.headers
|
|
401
|
-
)
|
|
510
|
+
return {"error": msg}
|
|
402
511
|
elif response.status == 404:
|
|
403
512
|
msg = "Item not found"
|
|
404
513
|
logger.warning(msg)
|
|
405
|
-
cache_output("enrich_organization_info_from_proxycurl",
|
|
514
|
+
cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, {})
|
|
406
515
|
return {}
|
|
407
516
|
else:
|
|
408
517
|
error_text = await response.text()
|
|
@@ -410,7 +519,10 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
410
519
|
return {}
|
|
411
520
|
except Exception as e:
|
|
412
521
|
logger.exception("Exception occurred while resolving organization domain on Proxycurl.")
|
|
413
|
-
|
|
522
|
+
return {"error": str(e)}
|
|
523
|
+
|
|
524
|
+
return {}
|
|
525
|
+
|
|
414
526
|
|
|
415
527
|
@assistant_tool
|
|
416
528
|
@backoff.on_exception(
|
|
@@ -423,19 +535,20 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
423
535
|
async def enrich_job_info_from_proxycurl(
|
|
424
536
|
job_url: Optional[str] = None,
|
|
425
537
|
tool_config: Optional[List[Dict]] = None
|
|
426
|
-
):
|
|
538
|
+
) -> Dict:
|
|
427
539
|
"""
|
|
428
540
|
Fetch a job's details from Proxycurl using the job URL.
|
|
429
541
|
|
|
430
|
-
Parameters:
|
|
431
|
-
- job_url (str, optional): URL of the LinkedIn job posting.
|
|
432
|
-
|
|
433
542
|
Returns:
|
|
434
|
-
|
|
543
|
+
dict: JSON response containing job information or error.
|
|
435
544
|
"""
|
|
436
545
|
logger.info("Entering enrich_job_info_from_proxycurl")
|
|
437
546
|
|
|
438
|
-
|
|
547
|
+
try:
|
|
548
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
549
|
+
except ValueError as e:
|
|
550
|
+
return {"error": str(e)}
|
|
551
|
+
|
|
439
552
|
HEADERS = {
|
|
440
553
|
'Authorization': f'Bearer {API_KEY}',
|
|
441
554
|
'Content-Type': 'application/json'
|
|
@@ -444,7 +557,7 @@ async def enrich_job_info_from_proxycurl(
|
|
|
444
557
|
if not job_url:
|
|
445
558
|
logger.warning("No job URL provided.")
|
|
446
559
|
return {'error': "Job URL must be provided"}
|
|
447
|
-
|
|
560
|
+
|
|
448
561
|
# Check cache
|
|
449
562
|
cached_response = retrieve_output("enrich_job_info_from_proxycurl", job_url)
|
|
450
563
|
if cached_response is not None:
|
|
@@ -452,7 +565,7 @@ async def enrich_job_info_from_proxycurl(
|
|
|
452
565
|
return cached_response
|
|
453
566
|
|
|
454
567
|
params = {'url': job_url}
|
|
455
|
-
api_endpoint = 'https://
|
|
568
|
+
api_endpoint = 'https://enrichlayer.com/api/v2/job'
|
|
456
569
|
logger.debug(f"Making request to Proxycurl for job info with params: {params}")
|
|
457
570
|
|
|
458
571
|
async with aiohttp.ClientSession() as session:
|
|
@@ -468,13 +581,7 @@ async def enrich_job_info_from_proxycurl(
|
|
|
468
581
|
msg = "Rate limit exceeded"
|
|
469
582
|
logger.warning(msg)
|
|
470
583
|
await asyncio.sleep(30)
|
|
471
|
-
|
|
472
|
-
request_info=response.request_info,
|
|
473
|
-
history=response.history,
|
|
474
|
-
status=response.status,
|
|
475
|
-
message=msg,
|
|
476
|
-
headers=response.headers
|
|
477
|
-
)
|
|
584
|
+
return {'error': msg}
|
|
478
585
|
elif response.status == 404:
|
|
479
586
|
msg = "Job not found"
|
|
480
587
|
logger.warning(msg)
|
|
@@ -486,7 +593,7 @@ async def enrich_job_info_from_proxycurl(
|
|
|
486
593
|
return {'error': error_text}
|
|
487
594
|
except Exception as e:
|
|
488
595
|
logger.exception("Exception occurred while fetching job info from Proxycurl.")
|
|
489
|
-
|
|
596
|
+
return {"error": str(e)}
|
|
490
597
|
|
|
491
598
|
|
|
492
599
|
@assistant_tool
|
|
@@ -506,23 +613,23 @@ async def search_recent_job_changes(
|
|
|
506
613
|
"""
|
|
507
614
|
Search for individuals with specified job titles and locations who have recently changed jobs.
|
|
508
615
|
|
|
509
|
-
Parameters:
|
|
510
|
-
- job_titles (List[str]): List of job titles to search for.
|
|
511
|
-
- locations (List[str]): List of locations to search in.
|
|
512
|
-
- max_items_to_return (int, optional): Maximum number of items to return. Defaults to 100.
|
|
513
|
-
|
|
514
616
|
Returns:
|
|
515
|
-
|
|
617
|
+
List[dict]: List of individuals matching the criteria, or empty list on failure/error.
|
|
516
618
|
"""
|
|
517
619
|
logger.info("Entering search_recent_job_changes")
|
|
518
620
|
|
|
519
|
-
|
|
621
|
+
try:
|
|
622
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
623
|
+
except ValueError as e:
|
|
624
|
+
logger.error(str(e))
|
|
625
|
+
return []
|
|
626
|
+
|
|
520
627
|
HEADERS = {
|
|
521
628
|
'Authorization': f'Bearer {API_KEY}',
|
|
522
629
|
'Content-Type': 'application/json'
|
|
523
630
|
}
|
|
524
631
|
|
|
525
|
-
url = 'https://
|
|
632
|
+
url = 'https://enrichlayer.com/api/v2/search/person'
|
|
526
633
|
results = []
|
|
527
634
|
page = 1
|
|
528
635
|
per_page = min(max_items_to_return, 100)
|
|
@@ -558,18 +665,14 @@ async def search_recent_job_changes(
|
|
|
558
665
|
msg = "Rate limit exceeded"
|
|
559
666
|
logger.warning(msg)
|
|
560
667
|
await asyncio.sleep(30)
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
status=response.status,
|
|
565
|
-
message=msg,
|
|
566
|
-
headers=response.headers
|
|
567
|
-
)
|
|
668
|
+
# Without raising, won't trigger another backoff retry
|
|
669
|
+
# so just continue or break as desired:
|
|
670
|
+
continue
|
|
568
671
|
else:
|
|
569
672
|
error_text = await response.text()
|
|
570
673
|
logger.error(f"Error while searching recent job changes: {error_text}")
|
|
571
674
|
break
|
|
572
|
-
except Exception
|
|
675
|
+
except Exception:
|
|
573
676
|
logger.exception("Exception occurred while searching recent job changes.")
|
|
574
677
|
break
|
|
575
678
|
|
|
@@ -585,18 +688,11 @@ async def find_matching_job_posting_proxy_curl(
|
|
|
585
688
|
tool_config: Optional[List[Dict]] = None
|
|
586
689
|
) -> List[str]:
|
|
587
690
|
"""
|
|
588
|
-
Find job postings on LinkedIn for a given company using Google Custom Search
|
|
589
|
-
|
|
691
|
+
Find job postings on LinkedIn for a given company using Google Custom Search,
|
|
692
|
+
then optionally validate those links with Proxycurl.
|
|
590
693
|
|
|
591
|
-
Args:
|
|
592
|
-
company_name (str): The name of the company.
|
|
593
|
-
keywords_check (List[str]): A list of keywords to include in the search.
|
|
594
|
-
optional_keywords (List[str]): A list of optional keywords to include in the search.
|
|
595
|
-
organization_linkedin_url (Optional[str]): The LinkedIn URL of the company.
|
|
596
|
-
tool_config (Optional[List[Dict]]): Proxycurl tool configuration.
|
|
597
|
-
|
|
598
694
|
Returns:
|
|
599
|
-
List[str]: A list of job posting links.
|
|
695
|
+
List[str]: A list of matching job posting links.
|
|
600
696
|
"""
|
|
601
697
|
logger.info("Entering find_matching_job_posting_proxy_curl")
|
|
602
698
|
|
|
@@ -622,11 +718,11 @@ async def find_matching_job_posting_proxy_curl(
|
|
|
622
718
|
logger.debug(f"Google search query: {query}")
|
|
623
719
|
|
|
624
720
|
# First Google search attempt
|
|
625
|
-
results = await
|
|
721
|
+
results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
|
|
626
722
|
if not isinstance(results, list) or len(results) == 0:
|
|
627
723
|
logger.info("No results found. Attempting fallback query without optional keywords.")
|
|
628
724
|
query = f'site:*linkedin.com/jobs/view/ "{company_name}" {keywords_str}'
|
|
629
|
-
results = await
|
|
725
|
+
results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
|
|
630
726
|
if not isinstance(results, list) or len(results) == 0:
|
|
631
727
|
logger.info("No job postings found in fallback search either.")
|
|
632
728
|
return job_posting_links
|
|
@@ -655,18 +751,14 @@ async def find_matching_job_posting_proxy_curl(
|
|
|
655
751
|
|
|
656
752
|
# Use Proxycurl to enrich job info
|
|
657
753
|
logger.debug(f"Fetching job info from Proxycurl for link: {link}")
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
logger.exception("Exception occurred while enriching job info from Proxycurl.")
|
|
662
|
-
continue
|
|
663
|
-
|
|
664
|
-
if not json_result:
|
|
665
|
-
logger.debug("No job info returned; skipping.")
|
|
754
|
+
json_result = await enrich_job_info_from_proxycurl(link, tool_config=tool_config)
|
|
755
|
+
if not json_result or 'error' in json_result:
|
|
756
|
+
logger.debug("No valid job info returned; skipping.")
|
|
666
757
|
continue
|
|
667
758
|
|
|
668
759
|
text = json.dumps(json_result).lower()
|
|
669
760
|
|
|
761
|
+
# If the user gave an organization_linkedin_url, check if it matches
|
|
670
762
|
company_match = False
|
|
671
763
|
if organization_linkedin_url and json_result.get('company', {}):
|
|
672
764
|
result_url = json_result.get('company', {}).get('url', '').lower()
|
|
@@ -685,43 +777,45 @@ async def find_matching_job_posting_proxy_curl(
|
|
|
685
777
|
logger.info(f"Found {len(job_posting_links)} matching job postings.")
|
|
686
778
|
return job_posting_links
|
|
687
779
|
|
|
780
|
+
|
|
688
781
|
def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -> dict:
|
|
689
782
|
"""
|
|
690
783
|
If input_user_properties has a non-empty value for a field, keep it.
|
|
691
784
|
Otherwise, use that field from person_data.
|
|
692
785
|
"""
|
|
693
786
|
|
|
694
|
-
# Helper function to determine if a property is considered "empty"
|
|
695
787
|
def is_empty(value):
|
|
696
788
|
# Checks for None, empty string, or string with only whitespace
|
|
697
789
|
return value is None or (isinstance(value, str) and not value.strip())
|
|
698
790
|
|
|
699
|
-
# Email
|
|
791
|
+
# Email - use first personal email if input is empty
|
|
700
792
|
if is_empty(input_user_properties.get("email")):
|
|
701
|
-
|
|
793
|
+
personal_emails = person_data.get("personal_emails")
|
|
794
|
+
if isinstance(personal_emails, list) and personal_emails:
|
|
795
|
+
input_user_properties["email"] = personal_emails[0]
|
|
702
796
|
|
|
703
797
|
# Phone
|
|
704
798
|
if is_empty(input_user_properties.get("phone")):
|
|
705
799
|
input_user_properties["phone"] = person_data.get("contact", {}).get("sanitized_phone", "")
|
|
706
800
|
|
|
707
801
|
# Full name
|
|
708
|
-
if
|
|
802
|
+
if person_data.get("full_name"):
|
|
709
803
|
input_user_properties["full_name"] = person_data["full_name"]
|
|
710
804
|
|
|
711
805
|
# First name
|
|
712
|
-
if
|
|
806
|
+
if person_data.get("first_name"):
|
|
713
807
|
input_user_properties["first_name"] = person_data["first_name"]
|
|
714
808
|
|
|
715
809
|
# Last name
|
|
716
|
-
if
|
|
810
|
+
if person_data.get("last_name"):
|
|
717
811
|
input_user_properties["last_name"] = person_data["last_name"]
|
|
718
812
|
|
|
719
813
|
# Occupation -> job_title
|
|
720
|
-
if
|
|
814
|
+
if person_data.get("occupation"):
|
|
721
815
|
input_user_properties["job_title"] = person_data["occupation"]
|
|
722
816
|
|
|
723
817
|
# Headline
|
|
724
|
-
if
|
|
818
|
+
if person_data.get("headline"):
|
|
725
819
|
input_user_properties["headline"] = person_data["headline"]
|
|
726
820
|
|
|
727
821
|
# Summary
|
|
@@ -732,11 +826,9 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
|
|
|
732
826
|
experiences = person_data.get("experiences", [])
|
|
733
827
|
if experiences:
|
|
734
828
|
# Current role data
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
input_user_properties["organization_name"] = experiences[0].get("company", "")
|
|
829
|
+
|
|
830
|
+
input_user_properties["organization_name"] = experiences[0].get("company", "")
|
|
738
831
|
|
|
739
|
-
# Organization Linkedin URL
|
|
740
832
|
org_url = experiences[0].get("company_linkedin_profile_url", "")
|
|
741
833
|
if org_url and is_empty(input_user_properties.get("organization_linkedin_url")):
|
|
742
834
|
input_user_properties["organization_linkedin_url"] = org_url
|
|
@@ -752,27 +844,39 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
|
|
|
752
844
|
if is_empty(input_user_properties.get("previous_organization_name")):
|
|
753
845
|
input_user_properties["previous_organization_name"] = previous_org.get("company", "")
|
|
754
846
|
|
|
755
|
-
# Combine city/state if available (and if lead_location is empty)
|
|
847
|
+
# Combine city/state if available (and if lead_location is empty); avoid literal "None"
|
|
756
848
|
if is_empty(input_user_properties.get("lead_location")):
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
849
|
+
city = person_data.get("city")
|
|
850
|
+
state = person_data.get("state")
|
|
851
|
+
parts = []
|
|
852
|
+
for value in (city, state):
|
|
853
|
+
if value is None:
|
|
854
|
+
continue
|
|
855
|
+
s = str(value).strip()
|
|
856
|
+
if not s or s.lower() == "none":
|
|
857
|
+
continue
|
|
858
|
+
parts.append(s)
|
|
859
|
+
if parts:
|
|
860
|
+
input_user_properties["lead_location"] = ", ".join(parts)
|
|
861
|
+
|
|
862
|
+
# LinkedIn Followers Count
|
|
863
|
+
if is_empty(input_user_properties.get("linkedin_follower_count")):
|
|
864
|
+
input_user_properties["linkedin_follower_count"] = person_data.get("follower_count", 0)
|
|
760
865
|
|
|
761
866
|
return input_user_properties
|
|
762
867
|
|
|
763
868
|
|
|
869
|
+
|
|
764
870
|
async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_config: Optional[List[Dict]] = None) -> dict:
|
|
765
871
|
"""
|
|
766
|
-
Enriches the user info (input_user_properties) with data from Proxycurl
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
Args:
|
|
771
|
-
input_user_properties (dict): Dictionary with user details (e.g. LinkedIn URL, email, names).
|
|
772
|
-
tool_config (Optional[List[Dict]]): Proxycurl tool configuration.
|
|
872
|
+
Enriches the user info (input_user_properties) with data from Proxycurl.
|
|
873
|
+
If the user_linkedin_url is determined to be a proxy (acw* and length > 10),
|
|
874
|
+
we skip calling enrich_person_info_from_proxycurl, keep the input as-is,
|
|
875
|
+
and only perform the organization enrichment logic.
|
|
773
876
|
|
|
774
877
|
Returns:
|
|
775
|
-
dict: Updated input_user_properties with enriched data
|
|
878
|
+
dict: Updated input_user_properties with enriched data or
|
|
879
|
+
with an error field if something goes wrong.
|
|
776
880
|
"""
|
|
777
881
|
logger.info("Entering enrich_user_info_with_proxy_curl")
|
|
778
882
|
|
|
@@ -784,23 +888,76 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
784
888
|
email = input_user_properties.get("email", "")
|
|
785
889
|
user_data_from_proxycurl = None
|
|
786
890
|
|
|
787
|
-
logger.debug(
|
|
788
|
-
f"Attempting to enrich data for LinkedIn URL='{linkedin_url}', Email='{email}'"
|
|
789
|
-
)
|
|
891
|
+
logger.debug(f"Attempting to enrich data for LinkedIn URL='{linkedin_url}', Email='{email}'")
|
|
790
892
|
|
|
791
|
-
#
|
|
792
|
-
if
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
893
|
+
# ---------------------------------------------------------------
|
|
894
|
+
# 1) Detect if the LinkedIn URL is a "proxy" URL (acw + length > 10)
|
|
895
|
+
# ---------------------------------------------------------------
|
|
896
|
+
def is_proxy_linkedin_url(url: str) -> bool:
|
|
897
|
+
"""
|
|
898
|
+
Checks if the LinkedIn URL has an /in/<profile_id> path
|
|
899
|
+
that starts with 'acw' and has length > 10, indicating a proxy.
|
|
900
|
+
"""
|
|
901
|
+
match = re.search(r"linkedin\.com/in/([^/]+)", url, re.IGNORECASE)
|
|
902
|
+
if match:
|
|
903
|
+
profile_id = match.group(1)
|
|
904
|
+
if profile_id.startswith("acw") and len(profile_id) > 10:
|
|
905
|
+
return True
|
|
906
|
+
return False
|
|
907
|
+
|
|
908
|
+
if is_proxy_linkedin_url(linkedin_url):
|
|
909
|
+
logger.info("The LinkedIn URL appears to be a proxy URL. Skipping user data enrichment from Proxycurl.")
|
|
910
|
+
# We do NOT call enrich_person_info_from_proxycurl for user data.
|
|
911
|
+
# We just set linkedin_url_match = False and enrich organization info if possible:
|
|
912
|
+
input_user_properties["linkedin_url_match"] = False
|
|
913
|
+
|
|
914
|
+
# Attempt organization enrichment if we have an organization_linkedin_url:
|
|
915
|
+
company_data = {}
|
|
916
|
+
if input_user_properties.get("organization_linkedin_url"):
|
|
917
|
+
company_data = await enrich_organization_info_from_proxycurl(
|
|
918
|
+
organization_linkedin_url=input_user_properties["organization_linkedin_url"],
|
|
797
919
|
tool_config=tool_config
|
|
798
920
|
)
|
|
799
|
-
if
|
|
921
|
+
if company_data and not company_data.get("error"):
|
|
922
|
+
if company_data.get("organization_linkedin_url"):
|
|
923
|
+
input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
|
|
924
|
+
if company_data.get("organization_name"):
|
|
925
|
+
input_user_properties["organization_name"] = company_data.get("organization_name", "")
|
|
926
|
+
input_user_properties["organization_size"] = str(
|
|
927
|
+
company_data.get("company_size_on_linkedin", "")
|
|
928
|
+
)
|
|
929
|
+
input_user_properties["company_size"] = str(
|
|
930
|
+
company_data.get("company_size_on_linkedin", "")
|
|
931
|
+
)
|
|
932
|
+
input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
|
|
933
|
+
input_user_properties["industry"] = company_data.get("organization_industry", "")
|
|
934
|
+
input_user_properties["organization_revenue"] = ""
|
|
935
|
+
|
|
936
|
+
# Always clean & store any returned org info:
|
|
937
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
938
|
+
company_data = cleanup_properties(company_data)
|
|
939
|
+
additional_props["pc_company_data"] = json.dumps(company_data)
|
|
940
|
+
input_user_properties["additional_properties"] = additional_props
|
|
941
|
+
|
|
942
|
+
logger.info("Returning after skipping user enrichment for proxy URL.")
|
|
943
|
+
return input_user_properties
|
|
944
|
+
|
|
945
|
+
# ----------------------------------------------------------------
|
|
946
|
+
# 2) If not proxy, proceed with normal user enrichment logic
|
|
947
|
+
# ----------------------------------------------------------------
|
|
948
|
+
if linkedin_url or email:
|
|
949
|
+
user_data = await enrich_person_info_from_proxycurl(
|
|
950
|
+
linkedin_url=linkedin_url,
|
|
951
|
+
email=email,
|
|
952
|
+
tool_config=tool_config
|
|
953
|
+
)
|
|
954
|
+
if not user_data or 'error' in user_data:
|
|
955
|
+
logger.warning("No valid person data found by LinkedIn or email.")
|
|
956
|
+
else:
|
|
957
|
+
user_data_from_proxycurl = user_data
|
|
958
|
+
if linkedin_url:
|
|
800
959
|
logger.info(f"User data found for LinkedIn URL: {linkedin_url}")
|
|
801
960
|
input_user_properties["user_linkedin_url"] = linkedin_url
|
|
802
|
-
except Exception as e:
|
|
803
|
-
logger.exception("Exception occurred while enriching person info by LinkedIn or email.")
|
|
804
961
|
else:
|
|
805
962
|
# Otherwise, fallback to name-based lookup
|
|
806
963
|
first_name = input_user_properties.get("first_name", "")
|
|
@@ -811,7 +968,8 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
811
968
|
if full_name:
|
|
812
969
|
name_parts = full_name.split(" ", 1)
|
|
813
970
|
first_name = first_name or name_parts[0]
|
|
814
|
-
|
|
971
|
+
if len(name_parts) > 1:
|
|
972
|
+
last_name = last_name or name_parts[1]
|
|
815
973
|
|
|
816
974
|
if not full_name:
|
|
817
975
|
full_name = f"{first_name} {last_name}".strip()
|
|
@@ -820,14 +978,15 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
820
978
|
logger.debug(f"Looking up person by name: {first_name} {last_name}, company: {company}")
|
|
821
979
|
|
|
822
980
|
if first_name and last_name:
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
981
|
+
lookup_result = await lookup_person_in_proxy_curl_by_name(
|
|
982
|
+
first_name=first_name,
|
|
983
|
+
last_name=last_name,
|
|
984
|
+
company_name=company,
|
|
985
|
+
tool_config=tool_config
|
|
986
|
+
)
|
|
987
|
+
# Expecting a dict (search_result)
|
|
988
|
+
if lookup_result and not lookup_result.get('error'):
|
|
989
|
+
results = lookup_result.get("results", [])
|
|
831
990
|
person_company = ""
|
|
832
991
|
for person in results:
|
|
833
992
|
linkedin_profile_url = person.get("linkedin_profile_url", "")
|
|
@@ -836,7 +995,7 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
836
995
|
linkedin_url=linkedin_profile_url,
|
|
837
996
|
tool_config=tool_config
|
|
838
997
|
)
|
|
839
|
-
if data_from_proxycurl:
|
|
998
|
+
if data_from_proxycurl and not data_from_proxycurl.get('error'):
|
|
840
999
|
person_name = data_from_proxycurl.get("name", "").lower()
|
|
841
1000
|
person_first_name = data_from_proxycurl.get("first_name", "").lower()
|
|
842
1001
|
person_last_name = data_from_proxycurl.get("last_name", "").lower()
|
|
@@ -846,7 +1005,7 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
846
1005
|
if exp_company == company.lower():
|
|
847
1006
|
person_company = exp_company
|
|
848
1007
|
break
|
|
849
|
-
|
|
1008
|
+
|
|
850
1009
|
if (
|
|
851
1010
|
(person_name == full_name.lower() or
|
|
852
1011
|
(person_first_name == first_name.lower() and person_last_name == last_name.lower()))
|
|
@@ -856,16 +1015,15 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
856
1015
|
input_user_properties["user_linkedin_url"] = linkedin_profile_url
|
|
857
1016
|
user_data_from_proxycurl = data_from_proxycurl
|
|
858
1017
|
break
|
|
859
|
-
except Exception as e:
|
|
860
|
-
logger.exception("Exception occurred while looking up person by name.")
|
|
861
|
-
pass
|
|
862
1018
|
|
|
863
1019
|
if not user_data_from_proxycurl:
|
|
864
1020
|
logger.debug("No user data returned from Proxycurl.")
|
|
865
1021
|
input_user_properties["linkedin_url_match"] = False
|
|
866
1022
|
return input_user_properties
|
|
867
1023
|
|
|
868
|
-
#
|
|
1024
|
+
# ------------------------------------------------------------------
|
|
1025
|
+
# 3) If user data was found, sanitize & fill user properties
|
|
1026
|
+
# ------------------------------------------------------------------
|
|
869
1027
|
url_pattern = re.compile(r'(https?://[^\s]+)', re.IGNORECASE)
|
|
870
1028
|
|
|
871
1029
|
def sanitize_urls_in_data(data):
|
|
@@ -890,13 +1048,13 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
890
1048
|
|
|
891
1049
|
person_data = sanitize_urls_in_data(user_data_from_proxycurl)
|
|
892
1050
|
additional_props = input_user_properties.get("additional_properties") or {}
|
|
893
|
-
|
|
1051
|
+
|
|
894
1052
|
# Check if there's a match on first/last name
|
|
895
|
-
first_matched =
|
|
1053
|
+
first_matched = (
|
|
896
1054
|
input_user_properties.get("first_name")
|
|
897
1055
|
and person_data.get("first_name") == input_user_properties["first_name"]
|
|
898
1056
|
)
|
|
899
|
-
last_matched =
|
|
1057
|
+
last_matched = (
|
|
900
1058
|
input_user_properties.get("last_name")
|
|
901
1059
|
and person_data.get("last_name") == input_user_properties["last_name"]
|
|
902
1060
|
)
|
|
@@ -904,24 +1062,167 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
904
1062
|
if first_matched and last_matched:
|
|
905
1063
|
input_user_properties["linkedin_url_match"] = True
|
|
906
1064
|
input_user_properties["linkedin_validation_status"] = "valid"
|
|
907
|
-
|
|
908
1065
|
|
|
909
1066
|
input_user_properties = fill_in_missing_properties(input_user_properties, person_data)
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
1067
|
+
|
|
1068
|
+
# ------------------------------------------------------------------
|
|
1069
|
+
# 4) Attempt organization enrichment if we have an org LinkedIn URL
|
|
1070
|
+
# ------------------------------------------------------------------
|
|
1071
|
+
company_data = {}
|
|
1072
|
+
if input_user_properties.get("organization_linkedin_url"):
|
|
1073
|
+
company_data = await enrich_organization_info_from_proxycurl(
|
|
1074
|
+
organization_linkedin_url=input_user_properties["organization_linkedin_url"],
|
|
1075
|
+
tool_config=tool_config
|
|
1076
|
+
)
|
|
1077
|
+
if company_data and not company_data.get("error"):
|
|
1078
|
+
if company_data.get("organization_linkedin_url"):
|
|
1079
|
+
input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
|
|
1080
|
+
if company_data.get("organization_name"):
|
|
1081
|
+
input_user_properties["organization_name"] = company_data.get("organization_name", "")
|
|
1082
|
+
input_user_properties["organization_size"] = str(
|
|
1083
|
+
company_data.get("company_size_on_linkedin", "")
|
|
1084
|
+
)
|
|
1085
|
+
input_user_properties["company_size"] = str(
|
|
1086
|
+
company_data.get("company_size_on_linkedin", "")
|
|
1087
|
+
)
|
|
1088
|
+
input_user_properties["company_size_list"] = company_data.get("company_size", "")
|
|
1089
|
+
input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
|
|
1090
|
+
input_user_properties["industry"] = company_data.get("organization_industry", "")
|
|
1091
|
+
input_user_properties["organization_revenue"] = ""
|
|
1092
|
+
|
|
917
1093
|
person_data = cleanup_properties(person_data)
|
|
918
|
-
|
|
919
1094
|
additional_props["pc_person_data"] = json.dumps(person_data)
|
|
920
|
-
|
|
1095
|
+
|
|
921
1096
|
company_data = cleanup_properties(company_data)
|
|
922
1097
|
additional_props["pc_company_data"] = json.dumps(company_data)
|
|
923
1098
|
input_user_properties["additional_properties"] = additional_props
|
|
924
1099
|
|
|
925
|
-
|
|
926
1100
|
logger.info("Enrichment of user info with Proxycurl complete.")
|
|
927
1101
|
return input_user_properties
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
@assistant_tool
|
|
1108
|
+
async def find_leads_by_job_openings_proxy_curl(
|
|
1109
|
+
query_params: Dict[str, Any],
|
|
1110
|
+
hiring_manager_roles: List[str],
|
|
1111
|
+
tool_config: Optional[List[Dict]] = None,
|
|
1112
|
+
) -> List[Dict]:
|
|
1113
|
+
"""Search LinkedIn job postings using Proxycurl and find hiring manager leads.
|
|
1114
|
+
|
|
1115
|
+
Args:
|
|
1116
|
+
query_params: Dictionary of parameters to Proxycurl job search API. The
|
|
1117
|
+
key ``job_title`` is required. Other keys like ``location`` may also
|
|
1118
|
+
be supplied.
|
|
1119
|
+
hiring_manager_roles: List of job titles to lookup at the company for
|
|
1120
|
+
potential hiring managers.
|
|
1121
|
+
tool_config: Optional configuration containing Proxycurl credentials.
|
|
1122
|
+
|
|
1123
|
+
Returns:
|
|
1124
|
+
A list of lead dictionaries with normalized keys such as
|
|
1125
|
+
``first_name``, ``last_name``, ``user_linkedin_url``,
|
|
1126
|
+
``organization_name``, and ``organization_linkedin_url``.
|
|
1127
|
+
"""
|
|
1128
|
+
logger.info("Entering find_leads_by_job_openings_proxy_curl")
|
|
1129
|
+
|
|
1130
|
+
if not isinstance(query_params, dict) or not query_params.get("job_title"):
|
|
1131
|
+
logger.warning("query_params must include 'job_title'")
|
|
1132
|
+
return []
|
|
1133
|
+
|
|
1134
|
+
try:
|
|
1135
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
1136
|
+
except ValueError as e:
|
|
1137
|
+
logger.error(str(e))
|
|
1138
|
+
return []
|
|
1139
|
+
|
|
1140
|
+
headers = {
|
|
1141
|
+
"Authorization": f"Bearer {API_KEY}",
|
|
1142
|
+
"Content-Type": "application/json",
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
job_search_url = "https://enrichlayer.com/api/v2/company/job"
|
|
1146
|
+
leads: List[Dict] = []
|
|
1147
|
+
|
|
1148
|
+
# ------------------------------------------------------------------
|
|
1149
|
+
# 1) Look up job openings
|
|
1150
|
+
# ------------------------------------------------------------------
|
|
1151
|
+
try:
|
|
1152
|
+
async with aiohttp.ClientSession() as session:
|
|
1153
|
+
async with session.get(job_search_url, headers=headers, params=query_params) as resp:
|
|
1154
|
+
if resp.status == 200:
|
|
1155
|
+
job_result = await resp.json()
|
|
1156
|
+
jobs = job_result.get("results") or job_result.get("jobs") or []
|
|
1157
|
+
elif resp.status == 429:
|
|
1158
|
+
logger.warning("Rate limit exceeded on job search")
|
|
1159
|
+
await asyncio.sleep(30)
|
|
1160
|
+
return []
|
|
1161
|
+
else:
|
|
1162
|
+
error_text = await resp.text()
|
|
1163
|
+
logger.error("Job search error %s: %s", resp.status, error_text)
|
|
1164
|
+
return []
|
|
1165
|
+
except Exception:
|
|
1166
|
+
logger.exception("Exception while searching jobs on Proxycurl")
|
|
1167
|
+
return []
|
|
1168
|
+
|
|
1169
|
+
# ------------------------------------------------------------------
|
|
1170
|
+
# 2) For each job, find leads for specified hiring manager roles
|
|
1171
|
+
# ------------------------------------------------------------------
|
|
1172
|
+
for job in jobs:
|
|
1173
|
+
company = job.get("company", {}) if isinstance(job, dict) else {}
|
|
1174
|
+
company_name = company.get("name", "")
|
|
1175
|
+
company_url = company.get("url", "")
|
|
1176
|
+
if not company_name:
|
|
1177
|
+
continue
|
|
1178
|
+
|
|
1179
|
+
for role in hiring_manager_roles:
|
|
1180
|
+
employee_params = {
|
|
1181
|
+
"url": company_url,
|
|
1182
|
+
"role_search": role,
|
|
1183
|
+
"employment_status": "current",
|
|
1184
|
+
"page_size": 1,
|
|
1185
|
+
}
|
|
1186
|
+
employees = []
|
|
1187
|
+
try:
|
|
1188
|
+
async with aiohttp.ClientSession() as session:
|
|
1189
|
+
async with session.get(
|
|
1190
|
+
"https://enrichlayer.com/api/v2/company/employees",
|
|
1191
|
+
headers=headers,
|
|
1192
|
+
params=employee_params,
|
|
1193
|
+
) as e_resp:
|
|
1194
|
+
if e_resp.status == 200:
|
|
1195
|
+
data = await e_resp.json()
|
|
1196
|
+
employees = data.get("employees") or data.get("profiles") or []
|
|
1197
|
+
elif e_resp.status == 429:
|
|
1198
|
+
logger.warning("Rate limit exceeded while fetching employees")
|
|
1199
|
+
await asyncio.sleep(30)
|
|
1200
|
+
continue
|
|
1201
|
+
except Exception:
|
|
1202
|
+
logger.exception("Exception while fetching employees from Proxycurl")
|
|
1203
|
+
continue
|
|
1204
|
+
|
|
1205
|
+
for emp in employees:
|
|
1206
|
+
profile_url = emp.get("linkedin_profile_url") or emp.get("profile_url")
|
|
1207
|
+
if not profile_url:
|
|
1208
|
+
continue
|
|
1209
|
+
person = await enrich_person_info_from_proxycurl(
|
|
1210
|
+
linkedin_url=profile_url, tool_config=tool_config
|
|
1211
|
+
)
|
|
1212
|
+
if not person or person.get("error"):
|
|
1213
|
+
continue
|
|
1214
|
+
lead = {
|
|
1215
|
+
"first_name": person.get("first_name", ""),
|
|
1216
|
+
"last_name": person.get("last_name", ""),
|
|
1217
|
+
"full_name": person.get("full_name", ""),
|
|
1218
|
+
"user_linkedin_url": profile_url,
|
|
1219
|
+
"job_title": person.get("occupation", role),
|
|
1220
|
+
"organization_name": company_name,
|
|
1221
|
+
"organization_linkedin_url": company_url,
|
|
1222
|
+
}
|
|
1223
|
+
cleaned = cleanup_properties(lead)
|
|
1224
|
+
if cleaned:
|
|
1225
|
+
leads.append(cleaned)
|
|
1226
|
+
|
|
1227
|
+
logger.info("Returning %d leads from Proxycurl job search", len(leads))
|
|
1228
|
+
return leads
|