dhisana 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/schemas/common.py +10 -1
- dhisana/schemas/sales.py +203 -22
- dhisana/utils/add_mapping.py +0 -2
- dhisana/utils/apollo_tools.py +739 -119
- dhisana/utils/built_with_api_tools.py +4 -2
- dhisana/utils/check_email_validity_tools.py +35 -18
- dhisana/utils/check_for_intent_signal.py +1 -2
- dhisana/utils/check_linkedin_url_validity.py +34 -8
- dhisana/utils/clay_tools.py +3 -2
- dhisana/utils/clean_properties.py +1 -4
- dhisana/utils/compose_salesnav_query.py +0 -1
- dhisana/utils/compose_search_query.py +7 -3
- dhisana/utils/composite_tools.py +0 -1
- dhisana/utils/dataframe_tools.py +2 -2
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_provider.py +174 -35
- dhisana/utils/enrich_lead_information.py +183 -53
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +1 -1
- dhisana/utils/g2_tools.py +0 -1
- dhisana/utils/generate_content.py +0 -1
- dhisana/utils/generate_email.py +68 -23
- dhisana/utils/generate_email_response.py +294 -46
- dhisana/utils/generate_flow.py +0 -1
- dhisana/utils/generate_linkedin_connect_message.py +9 -2
- dhisana/utils/generate_linkedin_response_message.py +137 -66
- dhisana/utils/generate_structured_output_internal.py +317 -164
- dhisana/utils/google_custom_search.py +150 -44
- dhisana/utils/google_oauth_tools.py +721 -0
- dhisana/utils/google_workspace_tools.py +278 -54
- dhisana/utils/hubspot_clearbit.py +3 -1
- dhisana/utils/hubspot_crm_tools.py +718 -272
- dhisana/utils/instantly_tools.py +3 -1
- dhisana/utils/lusha_tools.py +10 -7
- dhisana/utils/mailgun_tools.py +150 -0
- dhisana/utils/microsoft365_tools.py +447 -0
- dhisana/utils/openai_assistant_and_file_utils.py +121 -177
- dhisana/utils/openai_helpers.py +8 -6
- dhisana/utils/parse_linkedin_messages_txt.py +1 -3
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +377 -76
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/research_lead.py +3 -3
- dhisana/utils/sales_navigator_crawler.py +1 -6
- dhisana/utils/salesforce_crm_tools.py +323 -50
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +126 -91
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +360 -432
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +178 -18
- dhisana/utils/test_connect.py +1603 -130
- dhisana/utils/trasform_json.py +3 -3
- dhisana/utils/web_download_parse_tools.py +0 -1
- dhisana/utils/zoominfo_tools.py +2 -3
- dhisana/workflow/test.py +1 -1
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +1 -1
- dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
- dhisana-0.0.1.dev116.dist-info/RECORD +0 -83
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
|
@@ -5,15 +5,12 @@ import os
|
|
|
5
5
|
import re
|
|
6
6
|
import aiohttp
|
|
7
7
|
import backoff
|
|
8
|
-
from typing import Dict, List, Optional
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
9
|
|
|
10
|
-
from bs4 import BeautifulSoup
|
|
11
10
|
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
12
11
|
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
13
12
|
from dhisana.utils.clean_properties import cleanup_properties
|
|
14
|
-
from dhisana.utils.
|
|
15
|
-
from dhisana.utils.serpapi_search_tools import search_google
|
|
16
|
-
from dhisana.utils.web_download_parse_tools import get_html_content_from_url
|
|
13
|
+
from dhisana.utils.search_router import search_google_with_tools
|
|
17
14
|
from urllib.parse import urlparse, urlunparse
|
|
18
15
|
|
|
19
16
|
logging.basicConfig(level=logging.INFO)
|
|
@@ -23,9 +20,9 @@ logger = logging.getLogger(__name__)
|
|
|
23
20
|
def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
24
21
|
"""
|
|
25
22
|
Retrieves the PROXY_CURL_API_KEY access token from the provided tool configuration.
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
23
|
+
|
|
24
|
+
Raises:
|
|
25
|
+
ValueError: If the Proxycurl integration has not been configured.
|
|
29
26
|
"""
|
|
30
27
|
PROXY_CURL_API_KEY = None
|
|
31
28
|
|
|
@@ -50,8 +47,10 @@ def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
|
50
47
|
PROXY_CURL_API_KEY = PROXY_CURL_API_KEY or os.getenv("PROXY_CURL_API_KEY")
|
|
51
48
|
|
|
52
49
|
if not PROXY_CURL_API_KEY:
|
|
53
|
-
logger.error("
|
|
54
|
-
|
|
50
|
+
logger.error("Proxycurl integration is not configured.")
|
|
51
|
+
raise ValueError(
|
|
52
|
+
"Proxycurl integration is not configured. Please configure the connection to Proxycurl in Integrations."
|
|
53
|
+
)
|
|
55
54
|
|
|
56
55
|
return PROXY_CURL_API_KEY
|
|
57
56
|
|
|
@@ -78,23 +77,24 @@ async def enrich_person_info_from_proxycurl(
|
|
|
78
77
|
"""
|
|
79
78
|
logger.info("Entering enrich_person_info_from_proxycurl")
|
|
80
79
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
80
|
+
try:
|
|
81
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
82
|
+
except ValueError as e:
|
|
83
|
+
return {"error": str(e)}
|
|
84
84
|
|
|
85
85
|
HEADERS = {
|
|
86
86
|
'Authorization': f'Bearer {API_KEY}',
|
|
87
87
|
'Content-Type': 'application/json'
|
|
88
88
|
}
|
|
89
89
|
|
|
90
|
-
if not linkedin_url
|
|
91
|
-
logger.warning("No linkedin_url
|
|
92
|
-
return {'error': "
|
|
90
|
+
if not linkedin_url:
|
|
91
|
+
logger.warning("No linkedin_url provided.")
|
|
92
|
+
return {'error': "linkedin_url must be provided"}
|
|
93
93
|
|
|
94
94
|
# Check cache if linkedin_url is provided
|
|
95
95
|
if linkedin_url:
|
|
96
96
|
cached_response = retrieve_output("enrich_person_info_from_proxycurl", linkedin_url)
|
|
97
|
-
if cached_response is not None:
|
|
97
|
+
if cached_response is not None and cached_response.get('error') is None:
|
|
98
98
|
logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
|
|
99
99
|
return cached_response
|
|
100
100
|
|
|
@@ -103,10 +103,13 @@ async def enrich_person_info_from_proxycurl(
|
|
|
103
103
|
params['url'] = linkedin_url
|
|
104
104
|
if email:
|
|
105
105
|
params['email'] = email
|
|
106
|
+
else:
|
|
107
|
+
# Request Proxycurl to include personal emails when no email is provided
|
|
108
|
+
params['personal_email'] = 'include'
|
|
106
109
|
if phone:
|
|
107
110
|
params['phone'] = phone
|
|
108
111
|
|
|
109
|
-
url = 'https://
|
|
112
|
+
url = 'https://enrichlayer.com/api/v2/profile'
|
|
110
113
|
logger.debug(f"Making request to Proxycurl with params: {params}")
|
|
111
114
|
|
|
112
115
|
async with aiohttp.ClientSession() as session:
|
|
@@ -122,8 +125,6 @@ async def enrich_person_info_from_proxycurl(
|
|
|
122
125
|
elif response.status == 404:
|
|
123
126
|
msg = "Person not found"
|
|
124
127
|
logger.warning(msg)
|
|
125
|
-
if linkedin_url:
|
|
126
|
-
cache_output("enrich_person_info_from_proxycurl", linkedin_url, {'error': msg})
|
|
127
128
|
return {'error': msg}
|
|
128
129
|
elif response.status == 429:
|
|
129
130
|
msg = "Rate limit exceeded"
|
|
@@ -166,9 +167,10 @@ async def lookup_person_in_proxy_curl_by_name(
|
|
|
166
167
|
logger.warning("First name or last name missing for lookup.")
|
|
167
168
|
return {'error': "Full name is required"}
|
|
168
169
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
170
|
+
try:
|
|
171
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
172
|
+
except ValueError as e:
|
|
173
|
+
return {"error": str(e)}
|
|
172
174
|
|
|
173
175
|
headers = {'Authorization': f'Bearer {API_KEY}'}
|
|
174
176
|
params = {
|
|
@@ -186,7 +188,7 @@ async def lookup_person_in_proxy_curl_by_name(
|
|
|
186
188
|
logger.info(f"Cache hit for name lookup key: {key}")
|
|
187
189
|
return cached_response
|
|
188
190
|
|
|
189
|
-
url = 'https://
|
|
191
|
+
url = 'https://enrichlayer.com/api/v2/search/person'
|
|
190
192
|
logger.debug(f"Making request to Proxycurl with params: {params}")
|
|
191
193
|
|
|
192
194
|
async with aiohttp.ClientSession() as session:
|
|
@@ -241,6 +243,13 @@ def transform_company_data(data: dict) -> dict:
|
|
|
241
243
|
transformed["organization_website"] = data["website"]
|
|
242
244
|
if "industry" in data:
|
|
243
245
|
transformed["organization_industry"] = data["industry"]
|
|
246
|
+
|
|
247
|
+
if "company_size" in data:
|
|
248
|
+
transformed["company_size_list"] = data["company_size"]
|
|
249
|
+
|
|
250
|
+
if "company_size_on_linkedin" in data:
|
|
251
|
+
transformed["organization_size"] = data["company_size_on_linkedin"]
|
|
252
|
+
transformed["company_size"] = data["company_size_on_linkedin"]
|
|
244
253
|
|
|
245
254
|
# Determine headquarters info from "hq" or "headquarters"
|
|
246
255
|
hq_data = data.get("hq") or data.get("headquarters")
|
|
@@ -258,12 +267,52 @@ def transform_company_data(data: dict) -> dict:
|
|
|
258
267
|
|
|
259
268
|
# Copy all other properties, excluding those already mapped
|
|
260
269
|
for key, value in data.items():
|
|
261
|
-
if key not in ("name", "website", "industry", "hq", "headquarters"):
|
|
270
|
+
if key not in ("name", "website", "industry", "hq", "headquarters", "company_size"):
|
|
262
271
|
transformed[key] = value
|
|
263
272
|
|
|
264
273
|
return transformed
|
|
265
274
|
|
|
266
275
|
|
|
276
|
+
def _build_company_profile_params(
|
|
277
|
+
company_url: str,
|
|
278
|
+
profile_flags: Dict[str, Optional[str]],
|
|
279
|
+
) -> Dict[str, str]:
|
|
280
|
+
"""
|
|
281
|
+
Build request params for the Enrichlayer company profile endpoint,
|
|
282
|
+
ensuring we only forward flags that were explicitly provided.
|
|
283
|
+
"""
|
|
284
|
+
params: Dict[str, str] = {'url': company_url}
|
|
285
|
+
for key, value in profile_flags.items():
|
|
286
|
+
if value is not None:
|
|
287
|
+
params[key] = value
|
|
288
|
+
return params
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _build_company_cache_key(identifier: str, profile_flags: Dict[str, Optional[str]]) -> str:
|
|
292
|
+
"""
|
|
293
|
+
Builds a cache key that is unique for the combination of identifier
|
|
294
|
+
(LinkedIn URL or domain) and the optional enrichment flags.
|
|
295
|
+
"""
|
|
296
|
+
suffix_bits = [
|
|
297
|
+
f"{key}={value}"
|
|
298
|
+
for key, value in sorted(profile_flags.items())
|
|
299
|
+
if value is not None
|
|
300
|
+
]
|
|
301
|
+
if suffix_bits:
|
|
302
|
+
return f"{identifier}|{'&'.join(suffix_bits)}"
|
|
303
|
+
return identifier
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _bool_to_include_exclude(value: Optional[bool]) -> Optional[str]:
|
|
307
|
+
"""
|
|
308
|
+
Convert a boolean flag into the string literals expected by Proxycurl.
|
|
309
|
+
True -> "include", False -> "exclude", None -> None (omit parameter).
|
|
310
|
+
"""
|
|
311
|
+
if value is None:
|
|
312
|
+
return None
|
|
313
|
+
return "include" if value else "exclude"
|
|
314
|
+
|
|
315
|
+
|
|
267
316
|
@backoff.on_exception(
|
|
268
317
|
backoff.expo,
|
|
269
318
|
aiohttp.ClientResponseError,
|
|
@@ -274,10 +323,27 @@ def transform_company_data(data: dict) -> dict:
|
|
|
274
323
|
async def enrich_organization_info_from_proxycurl(
|
|
275
324
|
organization_domain: Optional[str] = None,
|
|
276
325
|
organization_linkedin_url: Optional[str] = None,
|
|
277
|
-
tool_config: Optional[List[Dict]] = None
|
|
326
|
+
tool_config: Optional[List[Dict]] = None,
|
|
327
|
+
categories: Optional[bool] = None,
|
|
328
|
+
funding_data: Optional[bool] = None,
|
|
329
|
+
exit_data: Optional[bool] = None,
|
|
330
|
+
acquisitions: Optional[bool] = None,
|
|
331
|
+
extra: Optional[bool] = None,
|
|
332
|
+
use_cache: Optional[str] = "if-present",
|
|
333
|
+
fallback_to_cache: Optional[str] = "on-error",
|
|
278
334
|
) -> Dict:
|
|
279
335
|
"""
|
|
280
336
|
Fetch an organization's details from Proxycurl using either the organization domain or LinkedIn URL.
|
|
337
|
+
Additional keyword parameters map directly to the Enrichlayer Company Profile endpoint.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
organization_domain: Organization's domain name to resolve via Proxycurl.
|
|
341
|
+
organization_linkedin_url: LinkedIn company profile URL.
|
|
342
|
+
tool_config: Optional tool configuration metadata for credential lookup.
|
|
343
|
+
categories/funding_data/exit_data/acquisitions/extra: Set True to request
|
|
344
|
+
"include", False for "exclude", or None to omit.
|
|
345
|
+
use_cache: Controls Proxycurl caching behaviour (e.g. "if-present").
|
|
346
|
+
fallback_to_cache: Controls Proxycurl cache fallback behaviour (e.g. "on-error").
|
|
281
347
|
|
|
282
348
|
Returns:
|
|
283
349
|
dict: Transformed JSON response containing organization information,
|
|
@@ -285,9 +351,10 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
285
351
|
"""
|
|
286
352
|
logger.info("Entering enrich_organization_info_from_proxycurl")
|
|
287
353
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
354
|
+
try:
|
|
355
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
356
|
+
except ValueError as e:
|
|
357
|
+
return {"error": str(e)}
|
|
291
358
|
|
|
292
359
|
HEADERS = {
|
|
293
360
|
'Authorization': f'Bearer {API_KEY}',
|
|
@@ -298,6 +365,16 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
298
365
|
logger.warning("No organization domain or LinkedIn URL provided.")
|
|
299
366
|
return {}
|
|
300
367
|
|
|
368
|
+
profile_flags: Dict[str, Optional[str]] = {
|
|
369
|
+
"categories": _bool_to_include_exclude(categories),
|
|
370
|
+
"funding_data": _bool_to_include_exclude(funding_data),
|
|
371
|
+
"exit_data": _bool_to_include_exclude(exit_data),
|
|
372
|
+
"acquisitions": _bool_to_include_exclude(acquisitions),
|
|
373
|
+
"extra": _bool_to_include_exclude(extra),
|
|
374
|
+
"use_cache": use_cache,
|
|
375
|
+
"fallback_to_cache": fallback_to_cache,
|
|
376
|
+
}
|
|
377
|
+
|
|
301
378
|
# If LinkedIn URL is provided, standardize it and fetch data
|
|
302
379
|
if organization_linkedin_url:
|
|
303
380
|
logger.debug(f"Organization LinkedIn URL provided: {organization_linkedin_url}")
|
|
@@ -320,19 +397,17 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
320
397
|
if standardized_url and not standardized_url.endswith('/'):
|
|
321
398
|
standardized_url += '/'
|
|
322
399
|
|
|
400
|
+
cache_key = _build_company_cache_key(standardized_url, profile_flags)
|
|
323
401
|
# Check cache for standardized LinkedIn URL
|
|
324
|
-
cached_response = retrieve_output("enrich_organization_info_from_proxycurl",
|
|
402
|
+
cached_response = retrieve_output("enrich_organization_info_from_proxycurl", cache_key)
|
|
325
403
|
if cached_response is not None:
|
|
326
404
|
logger.info(f"Cache hit for organization LinkedIn URL: {standardized_url}")
|
|
405
|
+
cached_response = transform_company_data(cached_response)
|
|
327
406
|
return cached_response
|
|
328
407
|
|
|
329
408
|
# Fetch details using standardized LinkedIn URL
|
|
330
|
-
url = 'https://
|
|
331
|
-
params =
|
|
332
|
-
'url': standardized_url,
|
|
333
|
-
'use_cache': 'if-present',
|
|
334
|
-
'fallback_to_cache': 'on-error',
|
|
335
|
-
}
|
|
409
|
+
url = 'https://enrichlayer.com/api/v2/company'
|
|
410
|
+
params = _build_company_profile_params(standardized_url, profile_flags)
|
|
336
411
|
logger.debug(f"Making request to Proxycurl with params: {params}")
|
|
337
412
|
|
|
338
413
|
async with aiohttp.ClientSession() as session:
|
|
@@ -342,7 +417,7 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
342
417
|
if response.status == 200:
|
|
343
418
|
result = await response.json()
|
|
344
419
|
transformed_result = transform_company_data(result)
|
|
345
|
-
cache_output("enrich_organization_info_from_proxycurl",
|
|
420
|
+
cache_output("enrich_organization_info_from_proxycurl", cache_key, transformed_result)
|
|
346
421
|
logger.info("Successfully retrieved and transformed organization info from Proxycurl by LinkedIn URL.")
|
|
347
422
|
return transformed_result
|
|
348
423
|
elif response.status == 429:
|
|
@@ -350,9 +425,20 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
350
425
|
logger.warning(msg)
|
|
351
426
|
await asyncio.sleep(30)
|
|
352
427
|
return {"error": msg}
|
|
428
|
+
elif response.status == 404:
|
|
429
|
+
error_text = await response.text()
|
|
430
|
+
logger.warning(
|
|
431
|
+
f"Proxycurl organization profile not found for LinkedIn URL {standardized_url}: {error_text}"
|
|
432
|
+
)
|
|
433
|
+
cache_output(
|
|
434
|
+
"enrich_organization_info_from_proxycurl", cache_key, {}
|
|
435
|
+
)
|
|
436
|
+
return {}
|
|
353
437
|
else:
|
|
354
438
|
error_text = await response.text()
|
|
355
|
-
logger.error(
|
|
439
|
+
logger.error(
|
|
440
|
+
f"Error from Proxycurl organization info fetch by URL: {error_text}"
|
|
441
|
+
)
|
|
356
442
|
return {}
|
|
357
443
|
except Exception as e:
|
|
358
444
|
logger.exception("Exception occurred while fetching organization info from Proxycurl by LinkedIn URL.")
|
|
@@ -361,12 +447,13 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
361
447
|
# If organization domain is provided, resolve domain to LinkedIn URL and fetch data
|
|
362
448
|
if organization_domain:
|
|
363
449
|
logger.debug(f"Organization domain provided: {organization_domain}")
|
|
364
|
-
|
|
450
|
+
domain_cache_key = _build_company_cache_key(organization_domain, profile_flags)
|
|
451
|
+
cached_response = retrieve_output("enrich_organization_info_from_proxycurl", domain_cache_key)
|
|
365
452
|
if cached_response is not None:
|
|
366
453
|
logger.info(f"Cache hit for organization domain: {organization_domain}")
|
|
367
454
|
return cached_response
|
|
368
455
|
|
|
369
|
-
resolve_url = 'https://
|
|
456
|
+
resolve_url = 'https://enrichlayer.com/api/v2/company/resolve'
|
|
370
457
|
params = {'domain': organization_domain}
|
|
371
458
|
logger.debug(f"Making request to Proxycurl to resolve domain with params: {params}")
|
|
372
459
|
|
|
@@ -390,14 +477,15 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
390
477
|
else:
|
|
391
478
|
standardized_url = company_url
|
|
392
479
|
|
|
393
|
-
profile_url = 'https://
|
|
480
|
+
profile_url = 'https://enrichlayer.com/api/v2/company'
|
|
394
481
|
try:
|
|
395
|
-
|
|
482
|
+
profile_params = _build_company_profile_params(standardized_url, profile_flags)
|
|
483
|
+
async with session.get(profile_url, headers=HEADERS, params=profile_params) as profile_response:
|
|
396
484
|
logger.debug(f"Received profile response status: {profile_response.status}")
|
|
397
485
|
if profile_response.status == 200:
|
|
398
486
|
result = await profile_response.json()
|
|
399
487
|
transformed_result = transform_company_data(result)
|
|
400
|
-
cache_output("enrich_organization_info_from_proxycurl",
|
|
488
|
+
cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, transformed_result)
|
|
401
489
|
logger.info("Successfully retrieved and transformed organization info from Proxycurl by domain.")
|
|
402
490
|
return transformed_result
|
|
403
491
|
elif profile_response.status == 429:
|
|
@@ -423,7 +511,7 @@ async def enrich_organization_info_from_proxycurl(
|
|
|
423
511
|
elif response.status == 404:
|
|
424
512
|
msg = "Item not found"
|
|
425
513
|
logger.warning(msg)
|
|
426
|
-
cache_output("enrich_organization_info_from_proxycurl",
|
|
514
|
+
cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, {})
|
|
427
515
|
return {}
|
|
428
516
|
else:
|
|
429
517
|
error_text = await response.text()
|
|
@@ -456,9 +544,10 @@ async def enrich_job_info_from_proxycurl(
|
|
|
456
544
|
"""
|
|
457
545
|
logger.info("Entering enrich_job_info_from_proxycurl")
|
|
458
546
|
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
547
|
+
try:
|
|
548
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
549
|
+
except ValueError as e:
|
|
550
|
+
return {"error": str(e)}
|
|
462
551
|
|
|
463
552
|
HEADERS = {
|
|
464
553
|
'Authorization': f'Bearer {API_KEY}',
|
|
@@ -476,7 +565,7 @@ async def enrich_job_info_from_proxycurl(
|
|
|
476
565
|
return cached_response
|
|
477
566
|
|
|
478
567
|
params = {'url': job_url}
|
|
479
|
-
api_endpoint = 'https://
|
|
568
|
+
api_endpoint = 'https://enrichlayer.com/api/v2/job'
|
|
480
569
|
logger.debug(f"Making request to Proxycurl for job info with params: {params}")
|
|
481
570
|
|
|
482
571
|
async with aiohttp.ClientSession() as session:
|
|
@@ -529,9 +618,10 @@ async def search_recent_job_changes(
|
|
|
529
618
|
"""
|
|
530
619
|
logger.info("Entering search_recent_job_changes")
|
|
531
620
|
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
621
|
+
try:
|
|
622
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
623
|
+
except ValueError as e:
|
|
624
|
+
logger.error(str(e))
|
|
535
625
|
return []
|
|
536
626
|
|
|
537
627
|
HEADERS = {
|
|
@@ -539,7 +629,7 @@ async def search_recent_job_changes(
|
|
|
539
629
|
'Content-Type': 'application/json'
|
|
540
630
|
}
|
|
541
631
|
|
|
542
|
-
url = 'https://
|
|
632
|
+
url = 'https://enrichlayer.com/api/v2/search/person'
|
|
543
633
|
results = []
|
|
544
634
|
page = 1
|
|
545
635
|
per_page = min(max_items_to_return, 100)
|
|
@@ -582,7 +672,7 @@ async def search_recent_job_changes(
|
|
|
582
672
|
error_text = await response.text()
|
|
583
673
|
logger.error(f"Error while searching recent job changes: {error_text}")
|
|
584
674
|
break
|
|
585
|
-
except Exception
|
|
675
|
+
except Exception:
|
|
586
676
|
logger.exception("Exception occurred while searching recent job changes.")
|
|
587
677
|
break
|
|
588
678
|
|
|
@@ -628,11 +718,11 @@ async def find_matching_job_posting_proxy_curl(
|
|
|
628
718
|
logger.debug(f"Google search query: {query}")
|
|
629
719
|
|
|
630
720
|
# First Google search attempt
|
|
631
|
-
results = await
|
|
721
|
+
results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
|
|
632
722
|
if not isinstance(results, list) or len(results) == 0:
|
|
633
723
|
logger.info("No results found. Attempting fallback query without optional keywords.")
|
|
634
724
|
query = f'site:*linkedin.com/jobs/view/ "{company_name}" {keywords_str}'
|
|
635
|
-
results = await
|
|
725
|
+
results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
|
|
636
726
|
if not isinstance(results, list) or len(results) == 0:
|
|
637
727
|
logger.info("No job postings found in fallback search either.")
|
|
638
728
|
return job_posting_links
|
|
@@ -698,32 +788,34 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
|
|
|
698
788
|
# Checks for None, empty string, or string with only whitespace
|
|
699
789
|
return value is None or (isinstance(value, str) and not value.strip())
|
|
700
790
|
|
|
701
|
-
# Email
|
|
791
|
+
# Email - use first personal email if input is empty
|
|
702
792
|
if is_empty(input_user_properties.get("email")):
|
|
703
|
-
|
|
793
|
+
personal_emails = person_data.get("personal_emails")
|
|
794
|
+
if isinstance(personal_emails, list) and personal_emails:
|
|
795
|
+
input_user_properties["email"] = personal_emails[0]
|
|
704
796
|
|
|
705
797
|
# Phone
|
|
706
798
|
if is_empty(input_user_properties.get("phone")):
|
|
707
799
|
input_user_properties["phone"] = person_data.get("contact", {}).get("sanitized_phone", "")
|
|
708
800
|
|
|
709
801
|
# Full name
|
|
710
|
-
if
|
|
802
|
+
if person_data.get("full_name"):
|
|
711
803
|
input_user_properties["full_name"] = person_data["full_name"]
|
|
712
804
|
|
|
713
805
|
# First name
|
|
714
|
-
if
|
|
806
|
+
if person_data.get("first_name"):
|
|
715
807
|
input_user_properties["first_name"] = person_data["first_name"]
|
|
716
808
|
|
|
717
809
|
# Last name
|
|
718
|
-
if
|
|
810
|
+
if person_data.get("last_name"):
|
|
719
811
|
input_user_properties["last_name"] = person_data["last_name"]
|
|
720
812
|
|
|
721
813
|
# Occupation -> job_title
|
|
722
|
-
if
|
|
814
|
+
if person_data.get("occupation"):
|
|
723
815
|
input_user_properties["job_title"] = person_data["occupation"]
|
|
724
816
|
|
|
725
817
|
# Headline
|
|
726
|
-
if
|
|
818
|
+
if person_data.get("headline"):
|
|
727
819
|
input_user_properties["headline"] = person_data["headline"]
|
|
728
820
|
|
|
729
821
|
# Summary
|
|
@@ -734,8 +826,8 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
|
|
|
734
826
|
experiences = person_data.get("experiences", [])
|
|
735
827
|
if experiences:
|
|
736
828
|
# Current role data
|
|
737
|
-
|
|
738
|
-
|
|
829
|
+
|
|
830
|
+
input_user_properties["organization_name"] = experiences[0].get("company", "")
|
|
739
831
|
|
|
740
832
|
org_url = experiences[0].get("company_linkedin_profile_url", "")
|
|
741
833
|
if org_url and is_empty(input_user_properties.get("organization_linkedin_url")):
|
|
@@ -752,22 +844,39 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
|
|
|
752
844
|
if is_empty(input_user_properties.get("previous_organization_name")):
|
|
753
845
|
input_user_properties["previous_organization_name"] = previous_org.get("company", "")
|
|
754
846
|
|
|
755
|
-
# Combine city/state if available (and if lead_location is empty)
|
|
847
|
+
# Combine city/state if available (and if lead_location is empty); avoid literal "None"
|
|
756
848
|
if is_empty(input_user_properties.get("lead_location")):
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
849
|
+
city = person_data.get("city")
|
|
850
|
+
state = person_data.get("state")
|
|
851
|
+
parts = []
|
|
852
|
+
for value in (city, state):
|
|
853
|
+
if value is None:
|
|
854
|
+
continue
|
|
855
|
+
s = str(value).strip()
|
|
856
|
+
if not s or s.lower() == "none":
|
|
857
|
+
continue
|
|
858
|
+
parts.append(s)
|
|
859
|
+
if parts:
|
|
860
|
+
input_user_properties["lead_location"] = ", ".join(parts)
|
|
861
|
+
|
|
862
|
+
# LinkedIn Followers Count
|
|
863
|
+
if is_empty(input_user_properties.get("linkedin_follower_count")):
|
|
864
|
+
input_user_properties["linkedin_follower_count"] = person_data.get("follower_count", 0)
|
|
760
865
|
|
|
761
866
|
return input_user_properties
|
|
762
867
|
|
|
763
868
|
|
|
869
|
+
|
|
764
870
|
async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_config: Optional[List[Dict]] = None) -> dict:
|
|
765
871
|
"""
|
|
766
872
|
Enriches the user info (input_user_properties) with data from Proxycurl.
|
|
873
|
+
If the user_linkedin_url is determined to be a proxy (acw* and length > 10),
|
|
874
|
+
we skip calling enrich_person_info_from_proxycurl, keep the input as-is,
|
|
875
|
+
and only perform the organization enrichment logic.
|
|
767
876
|
|
|
768
877
|
Returns:
|
|
769
|
-
dict: Updated input_user_properties with enriched data
|
|
770
|
-
|
|
878
|
+
dict: Updated input_user_properties with enriched data or
|
|
879
|
+
with an error field if something goes wrong.
|
|
771
880
|
"""
|
|
772
881
|
logger.info("Entering enrich_user_info_with_proxy_curl")
|
|
773
882
|
|
|
@@ -781,7 +890,61 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
781
890
|
|
|
782
891
|
logger.debug(f"Attempting to enrich data for LinkedIn URL='{linkedin_url}', Email='{email}'")
|
|
783
892
|
|
|
784
|
-
#
|
|
893
|
+
# ---------------------------------------------------------------
|
|
894
|
+
# 1) Detect if the LinkedIn URL is a "proxy" URL (acw + length > 10)
|
|
895
|
+
# ---------------------------------------------------------------
|
|
896
|
+
def is_proxy_linkedin_url(url: str) -> bool:
|
|
897
|
+
"""
|
|
898
|
+
Checks if the LinkedIn URL has an /in/<profile_id> path
|
|
899
|
+
that starts with 'acw' and has length > 10, indicating a proxy.
|
|
900
|
+
"""
|
|
901
|
+
match = re.search(r"linkedin\.com/in/([^/]+)", url, re.IGNORECASE)
|
|
902
|
+
if match:
|
|
903
|
+
profile_id = match.group(1)
|
|
904
|
+
if profile_id.startswith("acw") and len(profile_id) > 10:
|
|
905
|
+
return True
|
|
906
|
+
return False
|
|
907
|
+
|
|
908
|
+
if is_proxy_linkedin_url(linkedin_url):
|
|
909
|
+
logger.info("The LinkedIn URL appears to be a proxy URL. Skipping user data enrichment from Proxycurl.")
|
|
910
|
+
# We do NOT call enrich_person_info_from_proxycurl for user data.
|
|
911
|
+
# We just set linkedin_url_match = False and enrich organization info if possible:
|
|
912
|
+
input_user_properties["linkedin_url_match"] = False
|
|
913
|
+
|
|
914
|
+
# Attempt organization enrichment if we have an organization_linkedin_url:
|
|
915
|
+
company_data = {}
|
|
916
|
+
if input_user_properties.get("organization_linkedin_url"):
|
|
917
|
+
company_data = await enrich_organization_info_from_proxycurl(
|
|
918
|
+
organization_linkedin_url=input_user_properties["organization_linkedin_url"],
|
|
919
|
+
tool_config=tool_config
|
|
920
|
+
)
|
|
921
|
+
if company_data and not company_data.get("error"):
|
|
922
|
+
if company_data.get("organization_linkedin_url"):
|
|
923
|
+
input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
|
|
924
|
+
if company_data.get("organization_name"):
|
|
925
|
+
input_user_properties["organization_name"] = company_data.get("organization_name", "")
|
|
926
|
+
input_user_properties["organization_size"] = str(
|
|
927
|
+
company_data.get("company_size_on_linkedin", "")
|
|
928
|
+
)
|
|
929
|
+
input_user_properties["company_size"] = str(
|
|
930
|
+
company_data.get("company_size_on_linkedin", "")
|
|
931
|
+
)
|
|
932
|
+
input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
|
|
933
|
+
input_user_properties["industry"] = company_data.get("organization_industry", "")
|
|
934
|
+
input_user_properties["organization_revenue"] = ""
|
|
935
|
+
|
|
936
|
+
# Always clean & store any returned org info:
|
|
937
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
938
|
+
company_data = cleanup_properties(company_data)
|
|
939
|
+
additional_props["pc_company_data"] = json.dumps(company_data)
|
|
940
|
+
input_user_properties["additional_properties"] = additional_props
|
|
941
|
+
|
|
942
|
+
logger.info("Returning after skipping user enrichment for proxy URL.")
|
|
943
|
+
return input_user_properties
|
|
944
|
+
|
|
945
|
+
# ----------------------------------------------------------------
|
|
946
|
+
# 2) If not proxy, proceed with normal user enrichment logic
|
|
947
|
+
# ----------------------------------------------------------------
|
|
785
948
|
if linkedin_url or email:
|
|
786
949
|
user_data = await enrich_person_info_from_proxycurl(
|
|
787
950
|
linkedin_url=linkedin_url,
|
|
@@ -858,7 +1021,9 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
858
1021
|
input_user_properties["linkedin_url_match"] = False
|
|
859
1022
|
return input_user_properties
|
|
860
1023
|
|
|
861
|
-
#
|
|
1024
|
+
# ------------------------------------------------------------------
|
|
1025
|
+
# 3) If user data was found, sanitize & fill user properties
|
|
1026
|
+
# ------------------------------------------------------------------
|
|
862
1027
|
url_pattern = re.compile(r'(https?://[^\s]+)', re.IGNORECASE)
|
|
863
1028
|
|
|
864
1029
|
def sanitize_urls_in_data(data):
|
|
@@ -900,11 +1065,13 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
900
1065
|
|
|
901
1066
|
input_user_properties = fill_in_missing_properties(input_user_properties, person_data)
|
|
902
1067
|
|
|
903
|
-
#
|
|
1068
|
+
# ------------------------------------------------------------------
|
|
1069
|
+
# 4) Attempt organization enrichment if we have an org LinkedIn URL
|
|
1070
|
+
# ------------------------------------------------------------------
|
|
904
1071
|
company_data = {}
|
|
905
1072
|
if input_user_properties.get("organization_linkedin_url"):
|
|
906
1073
|
company_data = await enrich_organization_info_from_proxycurl(
|
|
907
|
-
organization_linkedin_url=input_user_properties
|
|
1074
|
+
organization_linkedin_url=input_user_properties["organization_linkedin_url"],
|
|
908
1075
|
tool_config=tool_config
|
|
909
1076
|
)
|
|
910
1077
|
if company_data and not company_data.get("error"):
|
|
@@ -912,8 +1079,15 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
912
1079
|
input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
|
|
913
1080
|
if company_data.get("organization_name"):
|
|
914
1081
|
input_user_properties["organization_name"] = company_data.get("organization_name", "")
|
|
915
|
-
input_user_properties["organization_size"] =
|
|
916
|
-
|
|
1082
|
+
input_user_properties["organization_size"] = str(
|
|
1083
|
+
company_data.get("company_size_on_linkedin", "")
|
|
1084
|
+
)
|
|
1085
|
+
input_user_properties["company_size"] = str(
|
|
1086
|
+
company_data.get("company_size_on_linkedin", "")
|
|
1087
|
+
)
|
|
1088
|
+
input_user_properties["company_size_list"] = company_data.get("company_size", "")
|
|
1089
|
+
input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
|
|
1090
|
+
input_user_properties["industry"] = company_data.get("organization_industry", "")
|
|
917
1091
|
input_user_properties["organization_revenue"] = ""
|
|
918
1092
|
|
|
919
1093
|
person_data = cleanup_properties(person_data)
|
|
@@ -925,3 +1099,130 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
|
|
|
925
1099
|
|
|
926
1100
|
logger.info("Enrichment of user info with Proxycurl complete.")
|
|
927
1101
|
return input_user_properties
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
@assistant_tool
|
|
1108
|
+
async def find_leads_by_job_openings_proxy_curl(
|
|
1109
|
+
query_params: Dict[str, Any],
|
|
1110
|
+
hiring_manager_roles: List[str],
|
|
1111
|
+
tool_config: Optional[List[Dict]] = None,
|
|
1112
|
+
) -> List[Dict]:
|
|
1113
|
+
"""Search LinkedIn job postings using Proxycurl and find hiring manager leads.
|
|
1114
|
+
|
|
1115
|
+
Args:
|
|
1116
|
+
query_params: Dictionary of parameters to Proxycurl job search API. The
|
|
1117
|
+
key ``job_title`` is required. Other keys like ``location`` may also
|
|
1118
|
+
be supplied.
|
|
1119
|
+
hiring_manager_roles: List of job titles to lookup at the company for
|
|
1120
|
+
potential hiring managers.
|
|
1121
|
+
tool_config: Optional configuration containing Proxycurl credentials.
|
|
1122
|
+
|
|
1123
|
+
Returns:
|
|
1124
|
+
A list of lead dictionaries with normalized keys such as
|
|
1125
|
+
``first_name``, ``last_name``, ``user_linkedin_url``,
|
|
1126
|
+
``organization_name``, and ``organization_linkedin_url``.
|
|
1127
|
+
"""
|
|
1128
|
+
logger.info("Entering find_leads_by_job_openings_proxy_curl")
|
|
1129
|
+
|
|
1130
|
+
if not isinstance(query_params, dict) or not query_params.get("job_title"):
|
|
1131
|
+
logger.warning("query_params must include 'job_title'")
|
|
1132
|
+
return []
|
|
1133
|
+
|
|
1134
|
+
try:
|
|
1135
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
1136
|
+
except ValueError as e:
|
|
1137
|
+
logger.error(str(e))
|
|
1138
|
+
return []
|
|
1139
|
+
|
|
1140
|
+
headers = {
|
|
1141
|
+
"Authorization": f"Bearer {API_KEY}",
|
|
1142
|
+
"Content-Type": "application/json",
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
job_search_url = "https://enrichlayer.com/api/v2/company/job"
|
|
1146
|
+
leads: List[Dict] = []
|
|
1147
|
+
|
|
1148
|
+
# ------------------------------------------------------------------
|
|
1149
|
+
# 1) Look up job openings
|
|
1150
|
+
# ------------------------------------------------------------------
|
|
1151
|
+
try:
|
|
1152
|
+
async with aiohttp.ClientSession() as session:
|
|
1153
|
+
async with session.get(job_search_url, headers=headers, params=query_params) as resp:
|
|
1154
|
+
if resp.status == 200:
|
|
1155
|
+
job_result = await resp.json()
|
|
1156
|
+
jobs = job_result.get("results") or job_result.get("jobs") or []
|
|
1157
|
+
elif resp.status == 429:
|
|
1158
|
+
logger.warning("Rate limit exceeded on job search")
|
|
1159
|
+
await asyncio.sleep(30)
|
|
1160
|
+
return []
|
|
1161
|
+
else:
|
|
1162
|
+
error_text = await resp.text()
|
|
1163
|
+
logger.error("Job search error %s: %s", resp.status, error_text)
|
|
1164
|
+
return []
|
|
1165
|
+
except Exception:
|
|
1166
|
+
logger.exception("Exception while searching jobs on Proxycurl")
|
|
1167
|
+
return []
|
|
1168
|
+
|
|
1169
|
+
# ------------------------------------------------------------------
|
|
1170
|
+
# 2) For each job, find leads for specified hiring manager roles
|
|
1171
|
+
# ------------------------------------------------------------------
|
|
1172
|
+
for job in jobs:
|
|
1173
|
+
company = job.get("company", {}) if isinstance(job, dict) else {}
|
|
1174
|
+
company_name = company.get("name", "")
|
|
1175
|
+
company_url = company.get("url", "")
|
|
1176
|
+
if not company_name:
|
|
1177
|
+
continue
|
|
1178
|
+
|
|
1179
|
+
for role in hiring_manager_roles:
|
|
1180
|
+
employee_params = {
|
|
1181
|
+
"url": company_url,
|
|
1182
|
+
"role_search": role,
|
|
1183
|
+
"employment_status": "current",
|
|
1184
|
+
"page_size": 1,
|
|
1185
|
+
}
|
|
1186
|
+
employees = []
|
|
1187
|
+
try:
|
|
1188
|
+
async with aiohttp.ClientSession() as session:
|
|
1189
|
+
async with session.get(
|
|
1190
|
+
"https://enrichlayer.com/api/v2/company/employees",
|
|
1191
|
+
headers=headers,
|
|
1192
|
+
params=employee_params,
|
|
1193
|
+
) as e_resp:
|
|
1194
|
+
if e_resp.status == 200:
|
|
1195
|
+
data = await e_resp.json()
|
|
1196
|
+
employees = data.get("employees") or data.get("profiles") or []
|
|
1197
|
+
elif e_resp.status == 429:
|
|
1198
|
+
logger.warning("Rate limit exceeded while fetching employees")
|
|
1199
|
+
await asyncio.sleep(30)
|
|
1200
|
+
continue
|
|
1201
|
+
except Exception:
|
|
1202
|
+
logger.exception("Exception while fetching employees from Proxycurl")
|
|
1203
|
+
continue
|
|
1204
|
+
|
|
1205
|
+
for emp in employees:
|
|
1206
|
+
profile_url = emp.get("linkedin_profile_url") or emp.get("profile_url")
|
|
1207
|
+
if not profile_url:
|
|
1208
|
+
continue
|
|
1209
|
+
person = await enrich_person_info_from_proxycurl(
|
|
1210
|
+
linkedin_url=profile_url, tool_config=tool_config
|
|
1211
|
+
)
|
|
1212
|
+
if not person or person.get("error"):
|
|
1213
|
+
continue
|
|
1214
|
+
lead = {
|
|
1215
|
+
"first_name": person.get("first_name", ""),
|
|
1216
|
+
"last_name": person.get("last_name", ""),
|
|
1217
|
+
"full_name": person.get("full_name", ""),
|
|
1218
|
+
"user_linkedin_url": profile_url,
|
|
1219
|
+
"job_title": person.get("occupation", role),
|
|
1220
|
+
"organization_name": company_name,
|
|
1221
|
+
"organization_linkedin_url": company_url,
|
|
1222
|
+
}
|
|
1223
|
+
cleaned = cleanup_properties(lead)
|
|
1224
|
+
if cleaned:
|
|
1225
|
+
leads.append(cleaned)
|
|
1226
|
+
|
|
1227
|
+
logger.info("Returning %d leads from Proxycurl job search", len(leads))
|
|
1228
|
+
return leads
|