dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. dhisana/schemas/common.py +33 -0
  2. dhisana/schemas/sales.py +224 -23
  3. dhisana/utils/add_mapping.py +72 -63
  4. dhisana/utils/apollo_tools.py +739 -109
  5. dhisana/utils/built_with_api_tools.py +4 -2
  6. dhisana/utils/cache_output_tools.py +23 -23
  7. dhisana/utils/check_email_validity_tools.py +456 -458
  8. dhisana/utils/check_for_intent_signal.py +1 -2
  9. dhisana/utils/check_linkedin_url_validity.py +34 -8
  10. dhisana/utils/clay_tools.py +3 -2
  11. dhisana/utils/clean_properties.py +3 -1
  12. dhisana/utils/compose_salesnav_query.py +0 -1
  13. dhisana/utils/compose_search_query.py +7 -3
  14. dhisana/utils/composite_tools.py +0 -1
  15. dhisana/utils/dataframe_tools.py +2 -2
  16. dhisana/utils/email_body_utils.py +72 -0
  17. dhisana/utils/email_provider.py +375 -0
  18. dhisana/utils/enrich_lead_information.py +585 -85
  19. dhisana/utils/fetch_openai_config.py +129 -0
  20. dhisana/utils/field_validators.py +1 -1
  21. dhisana/utils/g2_tools.py +0 -1
  22. dhisana/utils/generate_content.py +0 -1
  23. dhisana/utils/generate_email.py +69 -16
  24. dhisana/utils/generate_email_response.py +298 -41
  25. dhisana/utils/generate_flow.py +0 -1
  26. dhisana/utils/generate_linkedin_connect_message.py +19 -6
  27. dhisana/utils/generate_linkedin_response_message.py +156 -65
  28. dhisana/utils/generate_structured_output_internal.py +351 -131
  29. dhisana/utils/google_custom_search.py +150 -44
  30. dhisana/utils/google_oauth_tools.py +721 -0
  31. dhisana/utils/google_workspace_tools.py +391 -25
  32. dhisana/utils/hubspot_clearbit.py +3 -1
  33. dhisana/utils/hubspot_crm_tools.py +771 -167
  34. dhisana/utils/instantly_tools.py +3 -1
  35. dhisana/utils/lusha_tools.py +10 -7
  36. dhisana/utils/mailgun_tools.py +150 -0
  37. dhisana/utils/microsoft365_tools.py +447 -0
  38. dhisana/utils/openai_assistant_and_file_utils.py +121 -177
  39. dhisana/utils/openai_helpers.py +19 -16
  40. dhisana/utils/parse_linkedin_messages_txt.py +2 -3
  41. dhisana/utils/profile.py +37 -0
  42. dhisana/utils/proxy_curl_tools.py +507 -206
  43. dhisana/utils/proxycurl_search_leads.py +426 -0
  44. dhisana/utils/research_lead.py +121 -68
  45. dhisana/utils/sales_navigator_crawler.py +1 -6
  46. dhisana/utils/salesforce_crm_tools.py +323 -50
  47. dhisana/utils/search_router.py +131 -0
  48. dhisana/utils/search_router_jobs.py +51 -0
  49. dhisana/utils/sendgrid_tools.py +126 -91
  50. dhisana/utils/serarch_router_local_business.py +75 -0
  51. dhisana/utils/serpapi_additional_tools.py +290 -0
  52. dhisana/utils/serpapi_google_jobs.py +117 -0
  53. dhisana/utils/serpapi_google_search.py +188 -0
  54. dhisana/utils/serpapi_local_business_search.py +129 -0
  55. dhisana/utils/serpapi_search_tools.py +363 -432
  56. dhisana/utils/serperdev_google_jobs.py +125 -0
  57. dhisana/utils/serperdev_local_business.py +154 -0
  58. dhisana/utils/serperdev_search.py +233 -0
  59. dhisana/utils/smtp_email_tools.py +576 -0
  60. dhisana/utils/test_connect.py +1765 -92
  61. dhisana/utils/trasform_json.py +95 -16
  62. dhisana/utils/web_download_parse_tools.py +0 -1
  63. dhisana/utils/zoominfo_tools.py +2 -3
  64. dhisana/workflow/test.py +1 -1
  65. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
  66. dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
  67. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
  68. dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
  69. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
  70. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
@@ -5,15 +5,12 @@ import os
5
5
  import re
6
6
  import aiohttp
7
7
  import backoff
8
- from typing import Dict, List, Optional
8
+ from typing import Any, Dict, List, Optional
9
9
 
10
- from bs4 import BeautifulSoup
11
10
  from dhisana.utils.assistant_tool_tag import assistant_tool
12
11
  from dhisana.utils.cache_output_tools import cache_output, retrieve_output
13
12
  from dhisana.utils.clean_properties import cleanup_properties
14
- from dhisana.utils.domain_parser import get_domain_from_website, is_excluded_domain
15
- from dhisana.utils.serpapi_search_tools import search_google
16
- from dhisana.utils.web_download_parse_tools import get_html_content_from_url
13
+ from dhisana.utils.search_router import search_google_with_tools
17
14
  from urllib.parse import urlparse, urlunparse
18
15
 
19
16
  logging.basicConfig(level=logging.INFO)
@@ -24,16 +21,8 @@ def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
24
21
  """
25
22
  Retrieves the PROXY_CURL_API_KEY access token from the provided tool configuration.
26
23
 
27
- Args:
28
- tool_config (list): A list of dictionaries containing the tool configuration.
29
- Each dictionary should have a "name" key and a "configuration" key,
30
- where "configuration" is a list of dictionaries containing "name" and "value" keys.
31
-
32
- Returns:
33
- str: The PROXY_CURL_API_KEY access token.
34
-
35
24
  Raises:
36
- ValueError: If the access token is not found in the tool configuration or environment variable.
25
+ ValueError: If the Proxycurl integration has not been configured.
37
26
  """
38
27
  PROXY_CURL_API_KEY = None
39
28
 
@@ -58,8 +47,11 @@ def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
58
47
  PROXY_CURL_API_KEY = PROXY_CURL_API_KEY or os.getenv("PROXY_CURL_API_KEY")
59
48
 
60
49
  if not PROXY_CURL_API_KEY:
61
- logger.error("PROXY_CURL_API_KEY not found in configuration or environment.")
62
- raise ValueError("PROXY_CURL_API_KEY access token not found in tool_config or environment variable")
50
+ logger.error("Proxycurl integration is not configured.")
51
+ raise ValueError(
52
+ "Proxycurl integration is not configured. Please configure the connection to Proxycurl in Integrations."
53
+ )
54
+
63
55
  return PROXY_CURL_API_KEY
64
56
 
65
57
 
@@ -76,34 +68,33 @@ async def enrich_person_info_from_proxycurl(
76
68
  email: Optional[str] = None,
77
69
  phone: Optional[str] = None,
78
70
  tool_config: Optional[List[Dict]] = None
79
- ):
71
+ ) -> Dict:
80
72
  """
81
73
  Fetch a person's details from Proxycurl using LinkedIn URL, email, or phone number.
82
74
 
83
- Parameters:
84
- - linkedin_url (str, optional): LinkedIn profile URL of the person.
85
- - email (str, optional): Email address of the person.
86
- - phone (str, optional): Phone number of the person.
87
-
88
75
  Returns:
89
- - dict: JSON response containing person information.
76
+ dict: JSON response containing person information or an error.
90
77
  """
91
78
  logger.info("Entering enrich_person_info_from_proxycurl")
92
79
 
93
- API_KEY = get_proxycurl_access_token(tool_config)
80
+ try:
81
+ API_KEY = get_proxycurl_access_token(tool_config)
82
+ except ValueError as e:
83
+ return {"error": str(e)}
84
+
94
85
  HEADERS = {
95
86
  'Authorization': f'Bearer {API_KEY}',
96
87
  'Content-Type': 'application/json'
97
88
  }
98
89
 
99
- if not linkedin_url and not email and not phone:
100
- logger.warning("No linkedin_url, email, or phone provided. At least one is required.")
101
- return {'error': "At least one of linkedin_url, email, or phone must be provided"}
102
-
90
+ if not linkedin_url:
91
+ logger.warning("No linkedin_url provided.")
92
+ return {'error': "linkedin_url must be provided"}
93
+
103
94
  # Check cache if linkedin_url is provided
104
95
  if linkedin_url:
105
96
  cached_response = retrieve_output("enrich_person_info_from_proxycurl", linkedin_url)
106
- if cached_response is not None:
97
+ if cached_response is not None and cached_response.get('error') is None:
107
98
  logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
108
99
  return cached_response
109
100
 
@@ -112,10 +103,13 @@ async def enrich_person_info_from_proxycurl(
112
103
  params['url'] = linkedin_url
113
104
  if email:
114
105
  params['email'] = email
106
+ else:
107
+ # Request Proxycurl to include personal emails when no email is provided
108
+ params['personal_email'] = 'include'
115
109
  if phone:
116
110
  params['phone'] = phone
117
111
 
118
- url = 'https://nubela.co/proxycurl/api/v2/linkedin'
112
+ url = 'https://enrichlayer.com/api/v2/profile'
119
113
  logger.debug(f"Making request to Proxycurl with params: {params}")
120
114
 
121
115
  async with aiohttp.ClientSession() as session:
@@ -131,27 +125,21 @@ async def enrich_person_info_from_proxycurl(
131
125
  elif response.status == 404:
132
126
  msg = "Person not found"
133
127
  logger.warning(msg)
134
- if linkedin_url:
135
- cache_output("enrich_person_info_from_proxycurl", linkedin_url, {'error': msg})
136
128
  return {'error': msg}
137
129
  elif response.status == 429:
138
130
  msg = "Rate limit exceeded"
139
131
  logger.warning(msg)
132
+ # Sleep and then return an error (no raise)
140
133
  await asyncio.sleep(30)
141
- raise aiohttp.ClientResponseError(
142
- request_info=response.request_info,
143
- history=response.history,
144
- status=response.status,
145
- message=msg,
146
- headers=response.headers
147
- )
134
+ return {'error': msg}
148
135
  else:
149
136
  error_text = await response.text()
150
137
  logger.error(f"Error from Proxycurl: {error_text}")
151
138
  return {'error': error_text}
152
139
  except Exception as e:
153
140
  logger.exception("Exception occurred while fetching person info from Proxycurl.")
154
- raise e
141
+ return {"error": str(e)}
142
+
155
143
 
156
144
  @assistant_tool
157
145
  @backoff.on_exception(
@@ -166,14 +154,24 @@ async def lookup_person_in_proxy_curl_by_name(
166
154
  last_name: str,
167
155
  company_name: Optional[str] = None,
168
156
  tool_config: Optional[List[Dict]] = None,
169
- ):
157
+ ) -> Dict:
158
+ """
159
+ Look up a person in Proxycurl by first and last name, optionally a company name.
160
+
161
+ Returns:
162
+ dict: JSON response containing search results or an error.
163
+ """
170
164
  logger.info("Entering lookup_person_in_proxy_curl_by_name")
171
165
 
172
166
  if not first_name or not last_name:
173
167
  logger.warning("First name or last name missing for lookup.")
174
168
  return {'error': "Full name is required"}
175
169
 
176
- API_KEY = get_proxycurl_access_token(tool_config)
170
+ try:
171
+ API_KEY = get_proxycurl_access_token(tool_config)
172
+ except ValueError as e:
173
+ return {"error": str(e)}
174
+
177
175
  headers = {'Authorization': f'Bearer {API_KEY}'}
178
176
  params = {
179
177
  'first_name': first_name,
@@ -190,7 +188,7 @@ async def lookup_person_in_proxy_curl_by_name(
190
188
  logger.info(f"Cache hit for name lookup key: {key}")
191
189
  return cached_response
192
190
 
193
- url = 'https://nubela.co/proxycurl/api/v2/search/person'
191
+ url = 'https://enrichlayer.com/api/v2/search/person'
194
192
  logger.debug(f"Making request to Proxycurl with params: {params}")
195
193
 
196
194
  async with aiohttp.ClientSession() as session:
@@ -212,43 +210,109 @@ async def lookup_person_in_proxy_curl_by_name(
212
210
  msg = "Rate limit exceeded"
213
211
  logger.warning(msg)
214
212
  await asyncio.sleep(30)
215
- raise aiohttp.ClientResponseError(
216
- request_info=response.request_info,
217
- history=response.history,
218
- status=response.status,
219
- message=msg,
220
- headers=response.headers
221
- )
213
+ return {'error': msg}
222
214
  else:
223
215
  result = await response.json()
224
216
  logger.warning(f"lookup_person_in_proxycurl_by_name error: {result}")
225
217
  return {'error': result}
226
218
  except Exception as e:
227
219
  logger.exception("Exception occurred while looking up person by name.")
228
- raise e
229
-
220
+ return {"error": str(e)}
230
221
 
231
222
 
232
223
  def transform_company_data(data: dict) -> dict:
233
224
  """
234
- Transform the company data by mapping 'name' to 'organization_name'
235
- and 'website' to 'organization_website', while copying over all other properties.
225
+ Transform the company data by mapping:
226
+ - 'name' to 'organization_name'
227
+ - 'website' to 'organization_website'
228
+ - 'industry' to 'organization_industry'
229
+ - 'hq' or 'headquarters' to 'organization_hq_location'
230
+ in the format "city, state, country" (skipping empty parts).
231
+ Copies over all other properties except the ones that are mapped.
236
232
  If data is empty, returns an empty dictionary.
237
233
  """
238
234
  if not data:
239
235
  return {}
236
+
240
237
  transformed = {}
241
- # Map specific keys
238
+
239
+ # Map name, website, and industry
242
240
  if "name" in data:
243
241
  transformed["organization_name"] = data["name"]
244
242
  if "website" in data:
245
243
  transformed["organization_website"] = data["website"]
246
- # Copy all other properties except the ones we mapped
244
+ if "industry" in data:
245
+ transformed["organization_industry"] = data["industry"]
246
+
247
+ if "company_size" in data:
248
+ transformed["company_size_list"] = data["company_size"]
249
+
250
+ if "company_size_on_linkedin" in data:
251
+ transformed["organization_size"] = data["company_size_on_linkedin"]
252
+ transformed["company_size"] = data["company_size_on_linkedin"]
253
+
254
+ # Determine headquarters info from "hq" or "headquarters"
255
+ hq_data = data.get("hq") or data.get("headquarters")
256
+ if hq_data:
257
+ if isinstance(hq_data, dict):
258
+ city = hq_data.get("city", "")
259
+ state = hq_data.get("geographic_area", "")
260
+ country = hq_data.get("country", "")
261
+ # Join non-empty parts with a comma and a space
262
+ parts = [part for part in (city, state, country) if part]
263
+ transformed["organization_hq_location"] = ", ".join(parts)
264
+ else:
265
+ # If hq_data is not a dict, assume it's already in the desired format
266
+ transformed["organization_hq_location"] = hq_data
267
+
268
+ # Copy all other properties, excluding those already mapped
247
269
  for key, value in data.items():
248
- if key not in ("name", "website"):
270
+ if key not in ("name", "website", "industry", "hq", "headquarters", "company_size"):
249
271
  transformed[key] = value
272
+
250
273
  return transformed
251
274
 
275
+
276
+ def _build_company_profile_params(
277
+ company_url: str,
278
+ profile_flags: Dict[str, Optional[str]],
279
+ ) -> Dict[str, str]:
280
+ """
281
+ Build request params for the Enrichlayer company profile endpoint,
282
+ ensuring we only forward flags that were explicitly provided.
283
+ """
284
+ params: Dict[str, str] = {'url': company_url}
285
+ for key, value in profile_flags.items():
286
+ if value is not None:
287
+ params[key] = value
288
+ return params
289
+
290
+
291
+ def _build_company_cache_key(identifier: str, profile_flags: Dict[str, Optional[str]]) -> str:
292
+ """
293
+ Builds a cache key that is unique for the combination of identifier
294
+ (LinkedIn URL or domain) and the optional enrichment flags.
295
+ """
296
+ suffix_bits = [
297
+ f"{key}={value}"
298
+ for key, value in sorted(profile_flags.items())
299
+ if value is not None
300
+ ]
301
+ if suffix_bits:
302
+ return f"{identifier}|{'&'.join(suffix_bits)}"
303
+ return identifier
304
+
305
+
306
+ def _bool_to_include_exclude(value: Optional[bool]) -> Optional[str]:
307
+ """
308
+ Convert a boolean flag into the string literals expected by Proxycurl.
309
+ True -> "include", False -> "exclude", None -> None (omit parameter).
310
+ """
311
+ if value is None:
312
+ return None
313
+ return "include" if value else "exclude"
314
+
315
+
252
316
  @backoff.on_exception(
253
317
  backoff.expo,
254
318
  aiohttp.ClientResponseError,
@@ -259,22 +323,39 @@ def transform_company_data(data: dict) -> dict:
259
323
  async def enrich_organization_info_from_proxycurl(
260
324
  organization_domain: Optional[str] = None,
261
325
  organization_linkedin_url: Optional[str] = None,
262
- tool_config: Optional[List[Dict]] = None
326
+ tool_config: Optional[List[Dict]] = None,
327
+ categories: Optional[bool] = None,
328
+ funding_data: Optional[bool] = None,
329
+ exit_data: Optional[bool] = None,
330
+ acquisitions: Optional[bool] = None,
331
+ extra: Optional[bool] = None,
332
+ use_cache: Optional[str] = "if-present",
333
+ fallback_to_cache: Optional[str] = "on-error",
263
334
  ) -> Dict:
264
335
  """
265
336
  Fetch an organization's details from Proxycurl using either the organization domain or LinkedIn URL.
337
+ Additional keyword parameters map directly to the Enrichlayer Company Profile endpoint.
266
338
 
267
- Parameters:
268
- - organization_domain (str, optional): Domain of the organization.
269
- - organization_linkedin_url (str, optional): LinkedIn URL of the organization.
339
+ Args:
340
+ organization_domain: Organization's domain name to resolve via Proxycurl.
341
+ organization_linkedin_url: LinkedIn company profile URL.
342
+ tool_config: Optional tool configuration metadata for credential lookup.
343
+ categories/funding_data/exit_data/acquisitions/extra: Set True to request
344
+ "include", False for "exclude", or None to omit.
345
+ use_cache: Controls Proxycurl caching behaviour (e.g. "if-present").
346
+ fallback_to_cache: Controls Proxycurl cache fallback behaviour (e.g. "on-error").
270
347
 
271
348
  Returns:
272
- - dict: Transformed JSON response containing organization information.
273
- Returns an empty dict if nothing is found.
349
+ dict: Transformed JSON response containing organization information,
350
+ or {'error': ...} on error, or empty dict if not found.
274
351
  """
275
352
  logger.info("Entering enrich_organization_info_from_proxycurl")
276
353
 
277
- API_KEY = get_proxycurl_access_token(tool_config)
354
+ try:
355
+ API_KEY = get_proxycurl_access_token(tool_config)
356
+ except ValueError as e:
357
+ return {"error": str(e)}
358
+
278
359
  HEADERS = {
279
360
  'Authorization': f'Bearer {API_KEY}',
280
361
  'Content-Type': 'application/json'
@@ -284,9 +365,22 @@ async def enrich_organization_info_from_proxycurl(
284
365
  logger.warning("No organization domain or LinkedIn URL provided.")
285
366
  return {}
286
367
 
368
+ profile_flags: Dict[str, Optional[str]] = {
369
+ "categories": _bool_to_include_exclude(categories),
370
+ "funding_data": _bool_to_include_exclude(funding_data),
371
+ "exit_data": _bool_to_include_exclude(exit_data),
372
+ "acquisitions": _bool_to_include_exclude(acquisitions),
373
+ "extra": _bool_to_include_exclude(extra),
374
+ "use_cache": use_cache,
375
+ "fallback_to_cache": fallback_to_cache,
376
+ }
377
+
287
378
  # If LinkedIn URL is provided, standardize it and fetch data
288
379
  if organization_linkedin_url:
289
380
  logger.debug(f"Organization LinkedIn URL provided: {organization_linkedin_url}")
381
+ if "linkedin.com/company" not in organization_linkedin_url:
382
+ logger.warning("Invalid LinkedIn URL provided." + organization_linkedin_url)
383
+ return {}
290
384
  parsed_url = urlparse(organization_linkedin_url)
291
385
  if parsed_url.netloc != 'www.linkedin.com':
292
386
  standardized_netloc = 'www.linkedin.com'
@@ -303,19 +397,17 @@ async def enrich_organization_info_from_proxycurl(
303
397
  if standardized_url and not standardized_url.endswith('/'):
304
398
  standardized_url += '/'
305
399
 
400
+ cache_key = _build_company_cache_key(standardized_url, profile_flags)
306
401
  # Check cache for standardized LinkedIn URL
307
- cached_response = retrieve_output("enrich_organization_info_from_proxycurl", standardized_url)
402
+ cached_response = retrieve_output("enrich_organization_info_from_proxycurl", cache_key)
308
403
  if cached_response is not None:
309
404
  logger.info(f"Cache hit for organization LinkedIn URL: {standardized_url}")
405
+ cached_response = transform_company_data(cached_response)
310
406
  return cached_response
311
407
 
312
408
  # Fetch details using standardized LinkedIn URL
313
- url = 'https://nubela.co/proxycurl/api/linkedin/company'
314
- params = {
315
- 'url': standardized_url,
316
- 'use_cache': 'if-present',
317
- 'fallback_to_cache': 'on-error',
318
- }
409
+ url = 'https://enrichlayer.com/api/v2/company'
410
+ params = _build_company_profile_params(standardized_url, profile_flags)
319
411
  logger.debug(f"Making request to Proxycurl with params: {params}")
320
412
 
321
413
  async with aiohttp.ClientSession() as session:
@@ -325,26 +417,43 @@ async def enrich_organization_info_from_proxycurl(
325
417
  if response.status == 200:
326
418
  result = await response.json()
327
419
  transformed_result = transform_company_data(result)
328
- cache_output("enrich_organization_info_from_proxycurl", standardized_url, transformed_result)
420
+ cache_output("enrich_organization_info_from_proxycurl", cache_key, transformed_result)
329
421
  logger.info("Successfully retrieved and transformed organization info from Proxycurl by LinkedIn URL.")
330
422
  return transformed_result
423
+ elif response.status == 429:
424
+ msg = "Rate limit exceeded"
425
+ logger.warning(msg)
426
+ await asyncio.sleep(30)
427
+ return {"error": msg}
428
+ elif response.status == 404:
429
+ error_text = await response.text()
430
+ logger.warning(
431
+ f"Proxycurl organization profile not found for LinkedIn URL {standardized_url}: {error_text}"
432
+ )
433
+ cache_output(
434
+ "enrich_organization_info_from_proxycurl", cache_key, {}
435
+ )
436
+ return {}
331
437
  else:
332
438
  error_text = await response.text()
333
- logger.error(f"Error from Proxycurl organization info fetch by URL: {error_text}")
439
+ logger.error(
440
+ f"Error from Proxycurl organization info fetch by URL: {error_text}"
441
+ )
334
442
  return {}
335
443
  except Exception as e:
336
444
  logger.exception("Exception occurred while fetching organization info from Proxycurl by LinkedIn URL.")
337
- raise e
445
+ return {"error": str(e)}
338
446
 
339
447
  # If organization domain is provided, resolve domain to LinkedIn URL and fetch data
340
448
  if organization_domain:
341
449
  logger.debug(f"Organization domain provided: {organization_domain}")
342
- cached_response = retrieve_output("enrich_organization_info_from_proxycurl", organization_domain)
450
+ domain_cache_key = _build_company_cache_key(organization_domain, profile_flags)
451
+ cached_response = retrieve_output("enrich_organization_info_from_proxycurl", domain_cache_key)
343
452
  if cached_response is not None:
344
453
  logger.info(f"Cache hit for organization domain: {organization_domain}")
345
454
  return cached_response
346
455
 
347
- resolve_url = 'https://nubela.co/proxycurl/api/linkedin/company/resolve'
456
+ resolve_url = 'https://enrichlayer.com/api/v2/company/resolve'
348
457
  params = {'domain': organization_domain}
349
458
  logger.debug(f"Making request to Proxycurl to resolve domain with params: {params}")
350
459
 
@@ -368,23 +477,29 @@ async def enrich_organization_info_from_proxycurl(
368
477
  else:
369
478
  standardized_url = company_url
370
479
 
371
- profile_url = 'https://nubela.co/proxycurl/api/v2/linkedin/company'
480
+ profile_url = 'https://enrichlayer.com/api/v2/company'
372
481
  try:
373
- async with session.get(profile_url, headers=HEADERS, params={'url': standardized_url}) as profile_response:
482
+ profile_params = _build_company_profile_params(standardized_url, profile_flags)
483
+ async with session.get(profile_url, headers=HEADERS, params=profile_params) as profile_response:
374
484
  logger.debug(f"Received profile response status: {profile_response.status}")
375
485
  if profile_response.status == 200:
376
486
  result = await profile_response.json()
377
487
  transformed_result = transform_company_data(result)
378
- cache_output("enrich_organization_info_from_proxycurl", organization_domain, transformed_result)
488
+ cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, transformed_result)
379
489
  logger.info("Successfully retrieved and transformed organization info from Proxycurl by domain.")
380
490
  return transformed_result
491
+ elif profile_response.status == 429:
492
+ msg = "Rate limit exceeded"
493
+ logger.warning(msg)
494
+ await asyncio.sleep(30)
495
+ return {"error": msg}
381
496
  else:
382
497
  error_text = await profile_response.text()
383
498
  logger.error(f"Error from Proxycurl organization profile fetch by resolved domain: {error_text}")
384
499
  return {}
385
500
  except Exception as e:
386
501
  logger.exception("Exception occurred while fetching organization profile data.")
387
- raise e
502
+ return {"error": str(e)}
388
503
  else:
389
504
  logger.warning("Company URL not found for the provided domain.")
390
505
  return {}
@@ -392,17 +507,11 @@ async def enrich_organization_info_from_proxycurl(
392
507
  msg = "Rate limit exceeded"
393
508
  logger.warning(msg)
394
509
  await asyncio.sleep(30)
395
- raise aiohttp.ClientResponseError(
396
- request_info=response.request_info,
397
- history=response.history,
398
- status=response.status,
399
- message=msg,
400
- headers=response.headers
401
- )
510
+ return {"error": msg}
402
511
  elif response.status == 404:
403
512
  msg = "Item not found"
404
513
  logger.warning(msg)
405
- cache_output("enrich_organization_info_from_proxycurl", organization_domain, {})
514
+ cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, {})
406
515
  return {}
407
516
  else:
408
517
  error_text = await response.text()
@@ -410,7 +519,10 @@ async def enrich_organization_info_from_proxycurl(
410
519
  return {}
411
520
  except Exception as e:
412
521
  logger.exception("Exception occurred while resolving organization domain on Proxycurl.")
413
- raise e
522
+ return {"error": str(e)}
523
+
524
+ return {}
525
+
414
526
 
415
527
  @assistant_tool
416
528
  @backoff.on_exception(
@@ -423,19 +535,20 @@ async def enrich_organization_info_from_proxycurl(
423
535
  async def enrich_job_info_from_proxycurl(
424
536
  job_url: Optional[str] = None,
425
537
  tool_config: Optional[List[Dict]] = None
426
- ):
538
+ ) -> Dict:
427
539
  """
428
540
  Fetch a job's details from Proxycurl using the job URL.
429
541
 
430
- Parameters:
431
- - job_url (str, optional): URL of the LinkedIn job posting.
432
-
433
542
  Returns:
434
- - dict: JSON response containing job information.
543
+ dict: JSON response containing job information or error.
435
544
  """
436
545
  logger.info("Entering enrich_job_info_from_proxycurl")
437
546
 
438
- API_KEY = get_proxycurl_access_token(tool_config)
547
+ try:
548
+ API_KEY = get_proxycurl_access_token(tool_config)
549
+ except ValueError as e:
550
+ return {"error": str(e)}
551
+
439
552
  HEADERS = {
440
553
  'Authorization': f'Bearer {API_KEY}',
441
554
  'Content-Type': 'application/json'
@@ -444,7 +557,7 @@ async def enrich_job_info_from_proxycurl(
444
557
  if not job_url:
445
558
  logger.warning("No job URL provided.")
446
559
  return {'error': "Job URL must be provided"}
447
-
560
+
448
561
  # Check cache
449
562
  cached_response = retrieve_output("enrich_job_info_from_proxycurl", job_url)
450
563
  if cached_response is not None:
@@ -452,7 +565,7 @@ async def enrich_job_info_from_proxycurl(
452
565
  return cached_response
453
566
 
454
567
  params = {'url': job_url}
455
- api_endpoint = 'https://nubela.co/proxycurl/api/linkedin/job'
568
+ api_endpoint = 'https://enrichlayer.com/api/v2/job'
456
569
  logger.debug(f"Making request to Proxycurl for job info with params: {params}")
457
570
 
458
571
  async with aiohttp.ClientSession() as session:
@@ -468,13 +581,7 @@ async def enrich_job_info_from_proxycurl(
468
581
  msg = "Rate limit exceeded"
469
582
  logger.warning(msg)
470
583
  await asyncio.sleep(30)
471
- raise aiohttp.ClientResponseError(
472
- request_info=response.request_info,
473
- history=response.history,
474
- status=response.status,
475
- message=msg,
476
- headers=response.headers
477
- )
584
+ return {'error': msg}
478
585
  elif response.status == 404:
479
586
  msg = "Job not found"
480
587
  logger.warning(msg)
@@ -486,7 +593,7 @@ async def enrich_job_info_from_proxycurl(
486
593
  return {'error': error_text}
487
594
  except Exception as e:
488
595
  logger.exception("Exception occurred while fetching job info from Proxycurl.")
489
- raise e
596
+ return {"error": str(e)}
490
597
 
491
598
 
492
599
  @assistant_tool
@@ -506,23 +613,23 @@ async def search_recent_job_changes(
506
613
  """
507
614
  Search for individuals with specified job titles and locations who have recently changed jobs.
508
615
 
509
- Parameters:
510
- - job_titles (List[str]): List of job titles to search for.
511
- - locations (List[str]): List of locations to search in.
512
- - max_items_to_return (int, optional): Maximum number of items to return. Defaults to 100.
513
-
514
616
  Returns:
515
- - List[dict]: List of individuals matching the criteria.
617
+ List[dict]: List of individuals matching the criteria, or empty list on failure/error.
516
618
  """
517
619
  logger.info("Entering search_recent_job_changes")
518
620
 
519
- API_KEY = get_proxycurl_access_token(tool_config)
621
+ try:
622
+ API_KEY = get_proxycurl_access_token(tool_config)
623
+ except ValueError as e:
624
+ logger.error(str(e))
625
+ return []
626
+
520
627
  HEADERS = {
521
628
  'Authorization': f'Bearer {API_KEY}',
522
629
  'Content-Type': 'application/json'
523
630
  }
524
631
 
525
- url = 'https://nubela.co/proxycurl/api/search/person'
632
+ url = 'https://enrichlayer.com/api/v2/search/person'
526
633
  results = []
527
634
  page = 1
528
635
  per_page = min(max_items_to_return, 100)
@@ -558,18 +665,14 @@ async def search_recent_job_changes(
558
665
  msg = "Rate limit exceeded"
559
666
  logger.warning(msg)
560
667
  await asyncio.sleep(30)
561
- raise aiohttp.ClientResponseError(
562
- request_info=response.request_info,
563
- history=response.history,
564
- status=response.status,
565
- message=msg,
566
- headers=response.headers
567
- )
668
+ # Without raising, won't trigger another backoff retry
669
+ # so just continue or break as desired:
670
+ continue
568
671
  else:
569
672
  error_text = await response.text()
570
673
  logger.error(f"Error while searching recent job changes: {error_text}")
571
674
  break
572
- except Exception as e:
675
+ except Exception:
573
676
  logger.exception("Exception occurred while searching recent job changes.")
574
677
  break
575
678
 
@@ -585,18 +688,11 @@ async def find_matching_job_posting_proxy_curl(
585
688
  tool_config: Optional[List[Dict]] = None
586
689
  ) -> List[str]:
587
690
  """
588
- Find job postings on LinkedIn for a given company using Google Custom Search.
589
- Double check the same with Proxycurl API.
691
+ Find job postings on LinkedIn for a given company using Google Custom Search,
692
+ then optionally validate those links with Proxycurl.
590
693
 
591
- Args:
592
- company_name (str): The name of the company.
593
- keywords_check (List[str]): A list of keywords to include in the search.
594
- optional_keywords (List[str]): A list of optional keywords to include in the search.
595
- organization_linkedin_url (Optional[str]): The LinkedIn URL of the company.
596
- tool_config (Optional[List[Dict]]): Proxycurl tool configuration.
597
-
598
694
  Returns:
599
- List[str]: A list of job posting links.
695
+ List[str]: A list of matching job posting links.
600
696
  """
601
697
  logger.info("Entering find_matching_job_posting_proxy_curl")
602
698
 
@@ -622,11 +718,11 @@ async def find_matching_job_posting_proxy_curl(
622
718
  logger.debug(f"Google search query: {query}")
623
719
 
624
720
  # First Google search attempt
625
- results = await search_google(query.strip(), 1, tool_config=tool_config)
721
+ results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
626
722
  if not isinstance(results, list) or len(results) == 0:
627
723
  logger.info("No results found. Attempting fallback query without optional keywords.")
628
724
  query = f'site:*linkedin.com/jobs/view/ "{company_name}" {keywords_str}'
629
- results = await search_google(query.strip(), 1, tool_config=tool_config)
725
+ results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
630
726
  if not isinstance(results, list) or len(results) == 0:
631
727
  logger.info("No job postings found in fallback search either.")
632
728
  return job_posting_links
@@ -655,18 +751,14 @@ async def find_matching_job_posting_proxy_curl(
655
751
 
656
752
  # Use Proxycurl to enrich job info
657
753
  logger.debug(f"Fetching job info from Proxycurl for link: {link}")
658
- try:
659
- json_result = await enrich_job_info_from_proxycurl(link, tool_config=tool_config)
660
- except Exception as e:
661
- logger.exception("Exception occurred while enriching job info from Proxycurl.")
662
- continue
663
-
664
- if not json_result:
665
- logger.debug("No job info returned; skipping.")
754
+ json_result = await enrich_job_info_from_proxycurl(link, tool_config=tool_config)
755
+ if not json_result or 'error' in json_result:
756
+ logger.debug("No valid job info returned; skipping.")
666
757
  continue
667
758
 
668
759
  text = json.dumps(json_result).lower()
669
760
 
761
+ # If the user gave an organization_linkedin_url, check if it matches
670
762
  company_match = False
671
763
  if organization_linkedin_url and json_result.get('company', {}):
672
764
  result_url = json_result.get('company', {}).get('url', '').lower()
@@ -685,43 +777,45 @@ async def find_matching_job_posting_proxy_curl(
685
777
  logger.info(f"Found {len(job_posting_links)} matching job postings.")
686
778
  return job_posting_links
687
779
 
780
+
688
781
  def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -> dict:
689
782
  """
690
783
  If input_user_properties has a non-empty value for a field, keep it.
691
784
  Otherwise, use that field from person_data.
692
785
  """
693
786
 
694
- # Helper function to determine if a property is considered "empty"
695
787
  def is_empty(value):
696
788
  # Checks for None, empty string, or string with only whitespace
697
789
  return value is None or (isinstance(value, str) and not value.strip())
698
790
 
699
- # Email
791
+ # Email - use first personal email if input is empty
700
792
  if is_empty(input_user_properties.get("email")):
701
- input_user_properties["email"] = person_data.get("email", "")
793
+ personal_emails = person_data.get("personal_emails")
794
+ if isinstance(personal_emails, list) and personal_emails:
795
+ input_user_properties["email"] = personal_emails[0]
702
796
 
703
797
  # Phone
704
798
  if is_empty(input_user_properties.get("phone")):
705
799
  input_user_properties["phone"] = person_data.get("contact", {}).get("sanitized_phone", "")
706
800
 
707
801
  # Full name
708
- if is_empty(input_user_properties.get("full_name")) and person_data.get("full_name"):
802
+ if person_data.get("full_name"):
709
803
  input_user_properties["full_name"] = person_data["full_name"]
710
804
 
711
805
  # First name
712
- if is_empty(input_user_properties.get("first_name")) and person_data.get("first_name"):
806
+ if person_data.get("first_name"):
713
807
  input_user_properties["first_name"] = person_data["first_name"]
714
808
 
715
809
  # Last name
716
- if is_empty(input_user_properties.get("last_name")) and person_data.get("last_name"):
810
+ if person_data.get("last_name"):
717
811
  input_user_properties["last_name"] = person_data["last_name"]
718
812
 
719
813
  # Occupation -> job_title
720
- if is_empty(input_user_properties.get("job_title")) and person_data.get("occupation"):
814
+ if person_data.get("occupation"):
721
815
  input_user_properties["job_title"] = person_data["occupation"]
722
816
 
723
817
  # Headline
724
- if is_empty(input_user_properties.get("headline")) and person_data.get("headline"):
818
+ if person_data.get("headline"):
725
819
  input_user_properties["headline"] = person_data["headline"]
726
820
 
727
821
  # Summary
@@ -732,11 +826,9 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
732
826
  experiences = person_data.get("experiences", [])
733
827
  if experiences:
734
828
  # Current role data
735
- # Organization Name
736
- if is_empty(input_user_properties.get("organization_name")):
737
- input_user_properties["organization_name"] = experiences[0].get("company", "")
829
+
830
+ input_user_properties["organization_name"] = experiences[0].get("company", "")
738
831
 
739
- # Organization Linkedin URL
740
832
  org_url = experiences[0].get("company_linkedin_profile_url", "")
741
833
  if org_url and is_empty(input_user_properties.get("organization_linkedin_url")):
742
834
  input_user_properties["organization_linkedin_url"] = org_url
@@ -752,27 +844,39 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
752
844
  if is_empty(input_user_properties.get("previous_organization_name")):
753
845
  input_user_properties["previous_organization_name"] = previous_org.get("company", "")
754
846
 
755
- # Combine city/state if available (and if lead_location is empty)
847
+ # Combine city/state if available (and if lead_location is empty); avoid literal "None"
756
848
  if is_empty(input_user_properties.get("lead_location")):
757
- if person_data.get("city") or person_data.get("state"):
758
- combined = f"{person_data.get('city', '')}, {person_data.get('state', '')}"
759
- input_user_properties["lead_location"] = combined.strip(", ")
849
+ city = person_data.get("city")
850
+ state = person_data.get("state")
851
+ parts = []
852
+ for value in (city, state):
853
+ if value is None:
854
+ continue
855
+ s = str(value).strip()
856
+ if not s or s.lower() == "none":
857
+ continue
858
+ parts.append(s)
859
+ if parts:
860
+ input_user_properties["lead_location"] = ", ".join(parts)
861
+
862
+ # LinkedIn Followers Count
863
+ if is_empty(input_user_properties.get("linkedin_follower_count")):
864
+ input_user_properties["linkedin_follower_count"] = person_data.get("follower_count", 0)
760
865
 
761
866
  return input_user_properties
762
867
 
763
868
 
869
+
764
870
  async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_config: Optional[List[Dict]] = None) -> dict:
765
871
  """
766
- Enriches the user info (input_user_properties) with data from Proxycurl using:
767
- 1. LinkedIn URL or email (if provided),
768
- 2. Otherwise by first name and last name, or full name.
769
-
770
- Args:
771
- input_user_properties (dict): Dictionary with user details (e.g. LinkedIn URL, email, names).
772
- tool_config (Optional[List[Dict]]): Proxycurl tool configuration.
872
+ Enriches the user info (input_user_properties) with data from Proxycurl.
873
+ If the user_linkedin_url is determined to be a proxy (acw* and length > 10),
874
+ we skip calling enrich_person_info_from_proxycurl, keep the input as-is,
875
+ and only perform the organization enrichment logic.
773
876
 
774
877
  Returns:
775
- dict: Updated input_user_properties with enriched data from Proxycurl.
878
+ dict: Updated input_user_properties with enriched data or
879
+ with an error field if something goes wrong.
776
880
  """
777
881
  logger.info("Entering enrich_user_info_with_proxy_curl")
778
882
 
@@ -784,23 +888,76 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
784
888
  email = input_user_properties.get("email", "")
785
889
  user_data_from_proxycurl = None
786
890
 
787
- logger.debug(
788
- f"Attempting to enrich data for LinkedIn URL='{linkedin_url}', Email='{email}'"
789
- )
891
+ logger.debug(f"Attempting to enrich data for LinkedIn URL='{linkedin_url}', Email='{email}'")
790
892
 
791
- # If linkedin url or email is present, lookup
792
- if linkedin_url or email:
793
- try:
794
- user_data_from_proxycurl = await enrich_person_info_from_proxycurl(
795
- linkedin_url=linkedin_url,
796
- email=email,
893
+ # ---------------------------------------------------------------
894
+ # 1) Detect if the LinkedIn URL is a "proxy" URL (acw + length > 10)
895
+ # ---------------------------------------------------------------
896
+ def is_proxy_linkedin_url(url: str) -> bool:
897
+ """
898
+ Checks if the LinkedIn URL has an /in/<profile_id> path
899
+ that starts with 'acw' and has length > 10, indicating a proxy.
900
+ """
901
+ match = re.search(r"linkedin\.com/in/([^/]+)", url, re.IGNORECASE)
902
+ if match:
903
+ profile_id = match.group(1)
904
+ if profile_id.startswith("acw") and len(profile_id) > 10:
905
+ return True
906
+ return False
907
+
908
+ if is_proxy_linkedin_url(linkedin_url):
909
+ logger.info("The LinkedIn URL appears to be a proxy URL. Skipping user data enrichment from Proxycurl.")
910
+ # We do NOT call enrich_person_info_from_proxycurl for user data.
911
+ # We just set linkedin_url_match = False and enrich organization info if possible:
912
+ input_user_properties["linkedin_url_match"] = False
913
+
914
+ # Attempt organization enrichment if we have an organization_linkedin_url:
915
+ company_data = {}
916
+ if input_user_properties.get("organization_linkedin_url"):
917
+ company_data = await enrich_organization_info_from_proxycurl(
918
+ organization_linkedin_url=input_user_properties["organization_linkedin_url"],
797
919
  tool_config=tool_config
798
920
  )
799
- if user_data_from_proxycurl and linkedin_url:
921
+ if company_data and not company_data.get("error"):
922
+ if company_data.get("organization_linkedin_url"):
923
+ input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
924
+ if company_data.get("organization_name"):
925
+ input_user_properties["organization_name"] = company_data.get("organization_name", "")
926
+ input_user_properties["organization_size"] = str(
927
+ company_data.get("company_size_on_linkedin", "")
928
+ )
929
+ input_user_properties["company_size"] = str(
930
+ company_data.get("company_size_on_linkedin", "")
931
+ )
932
+ input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
933
+ input_user_properties["industry"] = company_data.get("organization_industry", "")
934
+ input_user_properties["organization_revenue"] = ""
935
+
936
+ # Always clean & store any returned org info:
937
+ additional_props = input_user_properties.get("additional_properties") or {}
938
+ company_data = cleanup_properties(company_data)
939
+ additional_props["pc_company_data"] = json.dumps(company_data)
940
+ input_user_properties["additional_properties"] = additional_props
941
+
942
+ logger.info("Returning after skipping user enrichment for proxy URL.")
943
+ return input_user_properties
944
+
945
+ # ----------------------------------------------------------------
946
+ # 2) If not proxy, proceed with normal user enrichment logic
947
+ # ----------------------------------------------------------------
948
+ if linkedin_url or email:
949
+ user_data = await enrich_person_info_from_proxycurl(
950
+ linkedin_url=linkedin_url,
951
+ email=email,
952
+ tool_config=tool_config
953
+ )
954
+ if not user_data or 'error' in user_data:
955
+ logger.warning("No valid person data found by LinkedIn or email.")
956
+ else:
957
+ user_data_from_proxycurl = user_data
958
+ if linkedin_url:
800
959
  logger.info(f"User data found for LinkedIn URL: {linkedin_url}")
801
960
  input_user_properties["user_linkedin_url"] = linkedin_url
802
- except Exception as e:
803
- logger.exception("Exception occurred while enriching person info by LinkedIn or email.")
804
961
  else:
805
962
  # Otherwise, fallback to name-based lookup
806
963
  first_name = input_user_properties.get("first_name", "")
@@ -811,7 +968,8 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
811
968
  if full_name:
812
969
  name_parts = full_name.split(" ", 1)
813
970
  first_name = first_name or name_parts[0]
814
- last_name = last_name or (name_parts[1] if len(name_parts) > 1 else "")
971
+ if len(name_parts) > 1:
972
+ last_name = last_name or name_parts[1]
815
973
 
816
974
  if not full_name:
817
975
  full_name = f"{first_name} {last_name}".strip()
@@ -820,14 +978,15 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
820
978
  logger.debug(f"Looking up person by name: {first_name} {last_name}, company: {company}")
821
979
 
822
980
  if first_name and last_name:
823
- try:
824
- search_result = await lookup_person_in_proxy_curl_by_name(
825
- first_name=first_name,
826
- last_name=last_name,
827
- company_name=company,
828
- tool_config=tool_config
829
- )
830
- results = search_result.get("results", [])
981
+ lookup_result = await lookup_person_in_proxy_curl_by_name(
982
+ first_name=first_name,
983
+ last_name=last_name,
984
+ company_name=company,
985
+ tool_config=tool_config
986
+ )
987
+ # Expecting a dict (search_result)
988
+ if lookup_result and not lookup_result.get('error'):
989
+ results = lookup_result.get("results", [])
831
990
  person_company = ""
832
991
  for person in results:
833
992
  linkedin_profile_url = person.get("linkedin_profile_url", "")
@@ -836,7 +995,7 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
836
995
  linkedin_url=linkedin_profile_url,
837
996
  tool_config=tool_config
838
997
  )
839
- if data_from_proxycurl:
998
+ if data_from_proxycurl and not data_from_proxycurl.get('error'):
840
999
  person_name = data_from_proxycurl.get("name", "").lower()
841
1000
  person_first_name = data_from_proxycurl.get("first_name", "").lower()
842
1001
  person_last_name = data_from_proxycurl.get("last_name", "").lower()
@@ -846,7 +1005,7 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
846
1005
  if exp_company == company.lower():
847
1006
  person_company = exp_company
848
1007
  break
849
- # If there's a match for name/company, use the data
1008
+
850
1009
  if (
851
1010
  (person_name == full_name.lower() or
852
1011
  (person_first_name == first_name.lower() and person_last_name == last_name.lower()))
@@ -856,16 +1015,15 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
856
1015
  input_user_properties["user_linkedin_url"] = linkedin_profile_url
857
1016
  user_data_from_proxycurl = data_from_proxycurl
858
1017
  break
859
- except Exception as e:
860
- logger.exception("Exception occurred while looking up person by name.")
861
- pass
862
1018
 
863
1019
  if not user_data_from_proxycurl:
864
1020
  logger.debug("No user data returned from Proxycurl.")
865
1021
  input_user_properties["linkedin_url_match"] = False
866
1022
  return input_user_properties
867
1023
 
868
- # If user data was found, populate input_user_properties
1024
+ # ------------------------------------------------------------------
1025
+ # 3) If user data was found, sanitize & fill user properties
1026
+ # ------------------------------------------------------------------
869
1027
  url_pattern = re.compile(r'(https?://[^\s]+)', re.IGNORECASE)
870
1028
 
871
1029
  def sanitize_urls_in_data(data):
@@ -890,13 +1048,13 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
890
1048
 
891
1049
  person_data = sanitize_urls_in_data(user_data_from_proxycurl)
892
1050
  additional_props = input_user_properties.get("additional_properties") or {}
893
-
1051
+
894
1052
  # Check if there's a match on first/last name
895
- first_matched = bool(
1053
+ first_matched = (
896
1054
  input_user_properties.get("first_name")
897
1055
  and person_data.get("first_name") == input_user_properties["first_name"]
898
1056
  )
899
- last_matched = bool(
1057
+ last_matched = (
900
1058
  input_user_properties.get("last_name")
901
1059
  and person_data.get("last_name") == input_user_properties["last_name"]
902
1060
  )
@@ -904,24 +1062,167 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
904
1062
  if first_matched and last_matched:
905
1063
  input_user_properties["linkedin_url_match"] = True
906
1064
  input_user_properties["linkedin_validation_status"] = "valid"
907
-
908
1065
 
909
1066
  input_user_properties = fill_in_missing_properties(input_user_properties, person_data)
910
-
911
- company_data = await enrich_organization_info_from_proxycurl(
912
- organization_linkedin_url=input_user_properties.get("organization_linkedin_url"),
913
- tool_config=tool_config
914
- )
915
- company_data = {}
916
-
1067
+
1068
+ # ------------------------------------------------------------------
1069
+ # 4) Attempt organization enrichment if we have an org LinkedIn URL
1070
+ # ------------------------------------------------------------------
1071
+ company_data = {}
1072
+ if input_user_properties.get("organization_linkedin_url"):
1073
+ company_data = await enrich_organization_info_from_proxycurl(
1074
+ organization_linkedin_url=input_user_properties["organization_linkedin_url"],
1075
+ tool_config=tool_config
1076
+ )
1077
+ if company_data and not company_data.get("error"):
1078
+ if company_data.get("organization_linkedin_url"):
1079
+ input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
1080
+ if company_data.get("organization_name"):
1081
+ input_user_properties["organization_name"] = company_data.get("organization_name", "")
1082
+ input_user_properties["organization_size"] = str(
1083
+ company_data.get("company_size_on_linkedin", "")
1084
+ )
1085
+ input_user_properties["company_size"] = str(
1086
+ company_data.get("company_size_on_linkedin", "")
1087
+ )
1088
+ input_user_properties["company_size_list"] = company_data.get("company_size", "")
1089
+ input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
1090
+ input_user_properties["industry"] = company_data.get("organization_industry", "")
1091
+ input_user_properties["organization_revenue"] = ""
1092
+
917
1093
  person_data = cleanup_properties(person_data)
918
-
919
1094
  additional_props["pc_person_data"] = json.dumps(person_data)
920
-
1095
+
921
1096
  company_data = cleanup_properties(company_data)
922
1097
  additional_props["pc_company_data"] = json.dumps(company_data)
923
1098
  input_user_properties["additional_properties"] = additional_props
924
1099
 
925
-
926
1100
  logger.info("Enrichment of user info with Proxycurl complete.")
927
1101
  return input_user_properties
1102
+
1103
+
1104
+
1105
+
1106
+
1107
+ @assistant_tool
1108
+ async def find_leads_by_job_openings_proxy_curl(
1109
+ query_params: Dict[str, Any],
1110
+ hiring_manager_roles: List[str],
1111
+ tool_config: Optional[List[Dict]] = None,
1112
+ ) -> List[Dict]:
1113
+ """Search LinkedIn job postings using Proxycurl and find hiring manager leads.
1114
+
1115
+ Args:
1116
+ query_params: Dictionary of parameters to Proxycurl job search API. The
1117
+ key ``job_title`` is required. Other keys like ``location`` may also
1118
+ be supplied.
1119
+ hiring_manager_roles: List of job titles to lookup at the company for
1120
+ potential hiring managers.
1121
+ tool_config: Optional configuration containing Proxycurl credentials.
1122
+
1123
+ Returns:
1124
+ A list of lead dictionaries with normalized keys such as
1125
+ ``first_name``, ``last_name``, ``user_linkedin_url``,
1126
+ ``organization_name``, and ``organization_linkedin_url``.
1127
+ """
1128
+ logger.info("Entering find_leads_by_job_openings_proxy_curl")
1129
+
1130
+ if not isinstance(query_params, dict) or not query_params.get("job_title"):
1131
+ logger.warning("query_params must include 'job_title'")
1132
+ return []
1133
+
1134
+ try:
1135
+ API_KEY = get_proxycurl_access_token(tool_config)
1136
+ except ValueError as e:
1137
+ logger.error(str(e))
1138
+ return []
1139
+
1140
+ headers = {
1141
+ "Authorization": f"Bearer {API_KEY}",
1142
+ "Content-Type": "application/json",
1143
+ }
1144
+
1145
+ job_search_url = "https://enrichlayer.com/api/v2/company/job"
1146
+ leads: List[Dict] = []
1147
+
1148
+ # ------------------------------------------------------------------
1149
+ # 1) Look up job openings
1150
+ # ------------------------------------------------------------------
1151
+ try:
1152
+ async with aiohttp.ClientSession() as session:
1153
+ async with session.get(job_search_url, headers=headers, params=query_params) as resp:
1154
+ if resp.status == 200:
1155
+ job_result = await resp.json()
1156
+ jobs = job_result.get("results") or job_result.get("jobs") or []
1157
+ elif resp.status == 429:
1158
+ logger.warning("Rate limit exceeded on job search")
1159
+ await asyncio.sleep(30)
1160
+ return []
1161
+ else:
1162
+ error_text = await resp.text()
1163
+ logger.error("Job search error %s: %s", resp.status, error_text)
1164
+ return []
1165
+ except Exception:
1166
+ logger.exception("Exception while searching jobs on Proxycurl")
1167
+ return []
1168
+
1169
+ # ------------------------------------------------------------------
1170
+ # 2) For each job, find leads for specified hiring manager roles
1171
+ # ------------------------------------------------------------------
1172
+ for job in jobs:
1173
+ company = job.get("company", {}) if isinstance(job, dict) else {}
1174
+ company_name = company.get("name", "")
1175
+ company_url = company.get("url", "")
1176
+ if not company_name:
1177
+ continue
1178
+
1179
+ for role in hiring_manager_roles:
1180
+ employee_params = {
1181
+ "url": company_url,
1182
+ "role_search": role,
1183
+ "employment_status": "current",
1184
+ "page_size": 1,
1185
+ }
1186
+ employees = []
1187
+ try:
1188
+ async with aiohttp.ClientSession() as session:
1189
+ async with session.get(
1190
+ "https://enrichlayer.com/api/v2/company/employees",
1191
+ headers=headers,
1192
+ params=employee_params,
1193
+ ) as e_resp:
1194
+ if e_resp.status == 200:
1195
+ data = await e_resp.json()
1196
+ employees = data.get("employees") or data.get("profiles") or []
1197
+ elif e_resp.status == 429:
1198
+ logger.warning("Rate limit exceeded while fetching employees")
1199
+ await asyncio.sleep(30)
1200
+ continue
1201
+ except Exception:
1202
+ logger.exception("Exception while fetching employees from Proxycurl")
1203
+ continue
1204
+
1205
+ for emp in employees:
1206
+ profile_url = emp.get("linkedin_profile_url") or emp.get("profile_url")
1207
+ if not profile_url:
1208
+ continue
1209
+ person = await enrich_person_info_from_proxycurl(
1210
+ linkedin_url=profile_url, tool_config=tool_config
1211
+ )
1212
+ if not person or person.get("error"):
1213
+ continue
1214
+ lead = {
1215
+ "first_name": person.get("first_name", ""),
1216
+ "last_name": person.get("last_name", ""),
1217
+ "full_name": person.get("full_name", ""),
1218
+ "user_linkedin_url": profile_url,
1219
+ "job_title": person.get("occupation", role),
1220
+ "organization_name": company_name,
1221
+ "organization_linkedin_url": company_url,
1222
+ }
1223
+ cleaned = cleanup_properties(lead)
1224
+ if cleaned:
1225
+ leads.append(cleaned)
1226
+
1227
+ logger.info("Returning %d leads from Proxycurl job search", len(leads))
1228
+ return leads