dhisana 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. dhisana/schemas/common.py +10 -1
  2. dhisana/schemas/sales.py +203 -22
  3. dhisana/utils/add_mapping.py +0 -2
  4. dhisana/utils/apollo_tools.py +739 -119
  5. dhisana/utils/built_with_api_tools.py +4 -2
  6. dhisana/utils/check_email_validity_tools.py +35 -18
  7. dhisana/utils/check_for_intent_signal.py +1 -2
  8. dhisana/utils/check_linkedin_url_validity.py +34 -8
  9. dhisana/utils/clay_tools.py +3 -2
  10. dhisana/utils/clean_properties.py +1 -4
  11. dhisana/utils/compose_salesnav_query.py +0 -1
  12. dhisana/utils/compose_search_query.py +7 -3
  13. dhisana/utils/composite_tools.py +0 -1
  14. dhisana/utils/dataframe_tools.py +2 -2
  15. dhisana/utils/email_body_utils.py +72 -0
  16. dhisana/utils/email_provider.py +174 -35
  17. dhisana/utils/enrich_lead_information.py +183 -53
  18. dhisana/utils/fetch_openai_config.py +129 -0
  19. dhisana/utils/field_validators.py +1 -1
  20. dhisana/utils/g2_tools.py +0 -1
  21. dhisana/utils/generate_content.py +0 -1
  22. dhisana/utils/generate_email.py +68 -23
  23. dhisana/utils/generate_email_response.py +294 -46
  24. dhisana/utils/generate_flow.py +0 -1
  25. dhisana/utils/generate_linkedin_connect_message.py +9 -2
  26. dhisana/utils/generate_linkedin_response_message.py +137 -66
  27. dhisana/utils/generate_structured_output_internal.py +317 -164
  28. dhisana/utils/google_custom_search.py +150 -44
  29. dhisana/utils/google_oauth_tools.py +721 -0
  30. dhisana/utils/google_workspace_tools.py +278 -54
  31. dhisana/utils/hubspot_clearbit.py +3 -1
  32. dhisana/utils/hubspot_crm_tools.py +718 -272
  33. dhisana/utils/instantly_tools.py +3 -1
  34. dhisana/utils/lusha_tools.py +10 -7
  35. dhisana/utils/mailgun_tools.py +150 -0
  36. dhisana/utils/microsoft365_tools.py +447 -0
  37. dhisana/utils/openai_assistant_and_file_utils.py +121 -177
  38. dhisana/utils/openai_helpers.py +8 -6
  39. dhisana/utils/parse_linkedin_messages_txt.py +1 -3
  40. dhisana/utils/profile.py +37 -0
  41. dhisana/utils/proxy_curl_tools.py +377 -76
  42. dhisana/utils/proxycurl_search_leads.py +426 -0
  43. dhisana/utils/research_lead.py +3 -3
  44. dhisana/utils/sales_navigator_crawler.py +1 -6
  45. dhisana/utils/salesforce_crm_tools.py +323 -50
  46. dhisana/utils/search_router.py +131 -0
  47. dhisana/utils/search_router_jobs.py +51 -0
  48. dhisana/utils/sendgrid_tools.py +126 -91
  49. dhisana/utils/serarch_router_local_business.py +75 -0
  50. dhisana/utils/serpapi_additional_tools.py +290 -0
  51. dhisana/utils/serpapi_google_jobs.py +117 -0
  52. dhisana/utils/serpapi_google_search.py +188 -0
  53. dhisana/utils/serpapi_local_business_search.py +129 -0
  54. dhisana/utils/serpapi_search_tools.py +360 -432
  55. dhisana/utils/serperdev_google_jobs.py +125 -0
  56. dhisana/utils/serperdev_local_business.py +154 -0
  57. dhisana/utils/serperdev_search.py +233 -0
  58. dhisana/utils/smtp_email_tools.py +178 -18
  59. dhisana/utils/test_connect.py +1603 -130
  60. dhisana/utils/trasform_json.py +3 -3
  61. dhisana/utils/web_download_parse_tools.py +0 -1
  62. dhisana/utils/zoominfo_tools.py +2 -3
  63. dhisana/workflow/test.py +1 -1
  64. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +1 -1
  65. dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
  66. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
  67. dhisana-0.0.1.dev116.dist-info/RECORD +0 -83
  68. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
  69. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
@@ -5,15 +5,12 @@ import os
5
5
  import re
6
6
  import aiohttp
7
7
  import backoff
8
- from typing import Dict, List, Optional
8
+ from typing import Any, Dict, List, Optional
9
9
 
10
- from bs4 import BeautifulSoup
11
10
  from dhisana.utils.assistant_tool_tag import assistant_tool
12
11
  from dhisana.utils.cache_output_tools import cache_output, retrieve_output
13
12
  from dhisana.utils.clean_properties import cleanup_properties
14
- from dhisana.utils.domain_parser import get_domain_from_website, is_excluded_domain
15
- from dhisana.utils.serpapi_search_tools import search_google
16
- from dhisana.utils.web_download_parse_tools import get_html_content_from_url
13
+ from dhisana.utils.search_router import search_google_with_tools
17
14
  from urllib.parse import urlparse, urlunparse
18
15
 
19
16
  logging.basicConfig(level=logging.INFO)
@@ -23,9 +20,9 @@ logger = logging.getLogger(__name__)
23
20
  def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
24
21
  """
25
22
  Retrieves the PROXY_CURL_API_KEY access token from the provided tool configuration.
26
-
27
- Returns:
28
- str: The PROXY_CURL_API_KEY if found; otherwise returns an empty string.
23
+
24
+ Raises:
25
+ ValueError: If the Proxycurl integration has not been configured.
29
26
  """
30
27
  PROXY_CURL_API_KEY = None
31
28
 
@@ -50,8 +47,10 @@ def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
50
47
  PROXY_CURL_API_KEY = PROXY_CURL_API_KEY or os.getenv("PROXY_CURL_API_KEY")
51
48
 
52
49
  if not PROXY_CURL_API_KEY:
53
- logger.error("PROXY_CURL_API_KEY not found in configuration or environment.")
54
- return "" # Return empty string if not found
50
+ logger.error("Proxycurl integration is not configured.")
51
+ raise ValueError(
52
+ "Proxycurl integration is not configured. Please configure the connection to Proxycurl in Integrations."
53
+ )
55
54
 
56
55
  return PROXY_CURL_API_KEY
57
56
 
@@ -78,23 +77,24 @@ async def enrich_person_info_from_proxycurl(
78
77
  """
79
78
  logger.info("Entering enrich_person_info_from_proxycurl")
80
79
 
81
- API_KEY = get_proxycurl_access_token(tool_config)
82
- if not API_KEY:
83
- return {"error": "PROXY_CURL_API_KEY not found"}
80
+ try:
81
+ API_KEY = get_proxycurl_access_token(tool_config)
82
+ except ValueError as e:
83
+ return {"error": str(e)}
84
84
 
85
85
  HEADERS = {
86
86
  'Authorization': f'Bearer {API_KEY}',
87
87
  'Content-Type': 'application/json'
88
88
  }
89
89
 
90
- if not linkedin_url and not email and not phone:
91
- logger.warning("No linkedin_url, email, or phone provided. At least one is required.")
92
- return {'error': "At least one of linkedin_url, email, or phone must be provided"}
90
+ if not linkedin_url:
91
+ logger.warning("No linkedin_url provided.")
92
+ return {'error': "linkedin_url must be provided"}
93
93
 
94
94
  # Check cache if linkedin_url is provided
95
95
  if linkedin_url:
96
96
  cached_response = retrieve_output("enrich_person_info_from_proxycurl", linkedin_url)
97
- if cached_response is not None:
97
+ if cached_response is not None and cached_response.get('error') is None:
98
98
  logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
99
99
  return cached_response
100
100
 
@@ -103,10 +103,13 @@ async def enrich_person_info_from_proxycurl(
103
103
  params['url'] = linkedin_url
104
104
  if email:
105
105
  params['email'] = email
106
+ else:
107
+ # Request Proxycurl to include personal emails when no email is provided
108
+ params['personal_email'] = 'include'
106
109
  if phone:
107
110
  params['phone'] = phone
108
111
 
109
- url = 'https://nubela.co/proxycurl/api/v2/linkedin'
112
+ url = 'https://enrichlayer.com/api/v2/profile'
110
113
  logger.debug(f"Making request to Proxycurl with params: {params}")
111
114
 
112
115
  async with aiohttp.ClientSession() as session:
@@ -122,8 +125,6 @@ async def enrich_person_info_from_proxycurl(
122
125
  elif response.status == 404:
123
126
  msg = "Person not found"
124
127
  logger.warning(msg)
125
- if linkedin_url:
126
- cache_output("enrich_person_info_from_proxycurl", linkedin_url, {'error': msg})
127
128
  return {'error': msg}
128
129
  elif response.status == 429:
129
130
  msg = "Rate limit exceeded"
@@ -166,9 +167,10 @@ async def lookup_person_in_proxy_curl_by_name(
166
167
  logger.warning("First name or last name missing for lookup.")
167
168
  return {'error': "Full name is required"}
168
169
 
169
- API_KEY = get_proxycurl_access_token(tool_config)
170
- if not API_KEY:
171
- return {"error": "PROXY_CURL_API_KEY not found"}
170
+ try:
171
+ API_KEY = get_proxycurl_access_token(tool_config)
172
+ except ValueError as e:
173
+ return {"error": str(e)}
172
174
 
173
175
  headers = {'Authorization': f'Bearer {API_KEY}'}
174
176
  params = {
@@ -186,7 +188,7 @@ async def lookup_person_in_proxy_curl_by_name(
186
188
  logger.info(f"Cache hit for name lookup key: {key}")
187
189
  return cached_response
188
190
 
189
- url = 'https://nubela.co/proxycurl/api/v2/search/person'
191
+ url = 'https://enrichlayer.com/api/v2/search/person'
190
192
  logger.debug(f"Making request to Proxycurl with params: {params}")
191
193
 
192
194
  async with aiohttp.ClientSession() as session:
@@ -241,6 +243,13 @@ def transform_company_data(data: dict) -> dict:
241
243
  transformed["organization_website"] = data["website"]
242
244
  if "industry" in data:
243
245
  transformed["organization_industry"] = data["industry"]
246
+
247
+ if "company_size" in data:
248
+ transformed["company_size_list"] = data["company_size"]
249
+
250
+ if "company_size_on_linkedin" in data:
251
+ transformed["organization_size"] = data["company_size_on_linkedin"]
252
+ transformed["company_size"] = data["company_size_on_linkedin"]
244
253
 
245
254
  # Determine headquarters info from "hq" or "headquarters"
246
255
  hq_data = data.get("hq") or data.get("headquarters")
@@ -258,12 +267,52 @@ def transform_company_data(data: dict) -> dict:
258
267
 
259
268
  # Copy all other properties, excluding those already mapped
260
269
  for key, value in data.items():
261
- if key not in ("name", "website", "industry", "hq", "headquarters"):
270
+ if key not in ("name", "website", "industry", "hq", "headquarters", "company_size"):
262
271
  transformed[key] = value
263
272
 
264
273
  return transformed
265
274
 
266
275
 
276
+ def _build_company_profile_params(
277
+ company_url: str,
278
+ profile_flags: Dict[str, Optional[str]],
279
+ ) -> Dict[str, str]:
280
+ """
281
+ Build request params for the Enrichlayer company profile endpoint,
282
+ ensuring we only forward flags that were explicitly provided.
283
+ """
284
+ params: Dict[str, str] = {'url': company_url}
285
+ for key, value in profile_flags.items():
286
+ if value is not None:
287
+ params[key] = value
288
+ return params
289
+
290
+
291
+ def _build_company_cache_key(identifier: str, profile_flags: Dict[str, Optional[str]]) -> str:
292
+ """
293
+ Builds a cache key that is unique for the combination of identifier
294
+ (LinkedIn URL or domain) and the optional enrichment flags.
295
+ """
296
+ suffix_bits = [
297
+ f"{key}={value}"
298
+ for key, value in sorted(profile_flags.items())
299
+ if value is not None
300
+ ]
301
+ if suffix_bits:
302
+ return f"{identifier}|{'&'.join(suffix_bits)}"
303
+ return identifier
304
+
305
+
306
+ def _bool_to_include_exclude(value: Optional[bool]) -> Optional[str]:
307
+ """
308
+ Convert a boolean flag into the string literals expected by Proxycurl.
309
+ True -> "include", False -> "exclude", None -> None (omit parameter).
310
+ """
311
+ if value is None:
312
+ return None
313
+ return "include" if value else "exclude"
314
+
315
+
267
316
  @backoff.on_exception(
268
317
  backoff.expo,
269
318
  aiohttp.ClientResponseError,
@@ -274,10 +323,27 @@ def transform_company_data(data: dict) -> dict:
274
323
  async def enrich_organization_info_from_proxycurl(
275
324
  organization_domain: Optional[str] = None,
276
325
  organization_linkedin_url: Optional[str] = None,
277
- tool_config: Optional[List[Dict]] = None
326
+ tool_config: Optional[List[Dict]] = None,
327
+ categories: Optional[bool] = None,
328
+ funding_data: Optional[bool] = None,
329
+ exit_data: Optional[bool] = None,
330
+ acquisitions: Optional[bool] = None,
331
+ extra: Optional[bool] = None,
332
+ use_cache: Optional[str] = "if-present",
333
+ fallback_to_cache: Optional[str] = "on-error",
278
334
  ) -> Dict:
279
335
  """
280
336
  Fetch an organization's details from Proxycurl using either the organization domain or LinkedIn URL.
337
+ Additional keyword parameters map directly to the Enrichlayer Company Profile endpoint.
338
+
339
+ Args:
340
+ organization_domain: Organization's domain name to resolve via Proxycurl.
341
+ organization_linkedin_url: LinkedIn company profile URL.
342
+ tool_config: Optional tool configuration metadata for credential lookup.
343
+ categories/funding_data/exit_data/acquisitions/extra: Set True to request
344
+ "include", False for "exclude", or None to omit.
345
+ use_cache: Controls Proxycurl caching behaviour (e.g. "if-present").
346
+ fallback_to_cache: Controls Proxycurl cache fallback behaviour (e.g. "on-error").
281
347
 
282
348
  Returns:
283
349
  dict: Transformed JSON response containing organization information,
@@ -285,9 +351,10 @@ async def enrich_organization_info_from_proxycurl(
285
351
  """
286
352
  logger.info("Entering enrich_organization_info_from_proxycurl")
287
353
 
288
- API_KEY = get_proxycurl_access_token(tool_config)
289
- if not API_KEY:
290
- return {"error": "PROXY_CURL_API_KEY not found"}
354
+ try:
355
+ API_KEY = get_proxycurl_access_token(tool_config)
356
+ except ValueError as e:
357
+ return {"error": str(e)}
291
358
 
292
359
  HEADERS = {
293
360
  'Authorization': f'Bearer {API_KEY}',
@@ -298,6 +365,16 @@ async def enrich_organization_info_from_proxycurl(
298
365
  logger.warning("No organization domain or LinkedIn URL provided.")
299
366
  return {}
300
367
 
368
+ profile_flags: Dict[str, Optional[str]] = {
369
+ "categories": _bool_to_include_exclude(categories),
370
+ "funding_data": _bool_to_include_exclude(funding_data),
371
+ "exit_data": _bool_to_include_exclude(exit_data),
372
+ "acquisitions": _bool_to_include_exclude(acquisitions),
373
+ "extra": _bool_to_include_exclude(extra),
374
+ "use_cache": use_cache,
375
+ "fallback_to_cache": fallback_to_cache,
376
+ }
377
+
301
378
  # If LinkedIn URL is provided, standardize it and fetch data
302
379
  if organization_linkedin_url:
303
380
  logger.debug(f"Organization LinkedIn URL provided: {organization_linkedin_url}")
@@ -320,19 +397,17 @@ async def enrich_organization_info_from_proxycurl(
320
397
  if standardized_url and not standardized_url.endswith('/'):
321
398
  standardized_url += '/'
322
399
 
400
+ cache_key = _build_company_cache_key(standardized_url, profile_flags)
323
401
  # Check cache for standardized LinkedIn URL
324
- cached_response = retrieve_output("enrich_organization_info_from_proxycurl", standardized_url)
402
+ cached_response = retrieve_output("enrich_organization_info_from_proxycurl", cache_key)
325
403
  if cached_response is not None:
326
404
  logger.info(f"Cache hit for organization LinkedIn URL: {standardized_url}")
405
+ cached_response = transform_company_data(cached_response)
327
406
  return cached_response
328
407
 
329
408
  # Fetch details using standardized LinkedIn URL
330
- url = 'https://nubela.co/proxycurl/api/linkedin/company'
331
- params = {
332
- 'url': standardized_url,
333
- 'use_cache': 'if-present',
334
- 'fallback_to_cache': 'on-error',
335
- }
409
+ url = 'https://enrichlayer.com/api/v2/company'
410
+ params = _build_company_profile_params(standardized_url, profile_flags)
336
411
  logger.debug(f"Making request to Proxycurl with params: {params}")
337
412
 
338
413
  async with aiohttp.ClientSession() as session:
@@ -342,7 +417,7 @@ async def enrich_organization_info_from_proxycurl(
342
417
  if response.status == 200:
343
418
  result = await response.json()
344
419
  transformed_result = transform_company_data(result)
345
- cache_output("enrich_organization_info_from_proxycurl", standardized_url, transformed_result)
420
+ cache_output("enrich_organization_info_from_proxycurl", cache_key, transformed_result)
346
421
  logger.info("Successfully retrieved and transformed organization info from Proxycurl by LinkedIn URL.")
347
422
  return transformed_result
348
423
  elif response.status == 429:
@@ -350,9 +425,20 @@ async def enrich_organization_info_from_proxycurl(
350
425
  logger.warning(msg)
351
426
  await asyncio.sleep(30)
352
427
  return {"error": msg}
428
+ elif response.status == 404:
429
+ error_text = await response.text()
430
+ logger.warning(
431
+ f"Proxycurl organization profile not found for LinkedIn URL {standardized_url}: {error_text}"
432
+ )
433
+ cache_output(
434
+ "enrich_organization_info_from_proxycurl", cache_key, {}
435
+ )
436
+ return {}
353
437
  else:
354
438
  error_text = await response.text()
355
- logger.error(f"Error from Proxycurl organization info fetch by URL: {error_text}")
439
+ logger.error(
440
+ f"Error from Proxycurl organization info fetch by URL: {error_text}"
441
+ )
356
442
  return {}
357
443
  except Exception as e:
358
444
  logger.exception("Exception occurred while fetching organization info from Proxycurl by LinkedIn URL.")
@@ -361,12 +447,13 @@ async def enrich_organization_info_from_proxycurl(
361
447
  # If organization domain is provided, resolve domain to LinkedIn URL and fetch data
362
448
  if organization_domain:
363
449
  logger.debug(f"Organization domain provided: {organization_domain}")
364
- cached_response = retrieve_output("enrich_organization_info_from_proxycurl", organization_domain)
450
+ domain_cache_key = _build_company_cache_key(organization_domain, profile_flags)
451
+ cached_response = retrieve_output("enrich_organization_info_from_proxycurl", domain_cache_key)
365
452
  if cached_response is not None:
366
453
  logger.info(f"Cache hit for organization domain: {organization_domain}")
367
454
  return cached_response
368
455
 
369
- resolve_url = 'https://nubela.co/proxycurl/api/linkedin/company/resolve'
456
+ resolve_url = 'https://enrichlayer.com/api/v2/company/resolve'
370
457
  params = {'domain': organization_domain}
371
458
  logger.debug(f"Making request to Proxycurl to resolve domain with params: {params}")
372
459
 
@@ -390,14 +477,15 @@ async def enrich_organization_info_from_proxycurl(
390
477
  else:
391
478
  standardized_url = company_url
392
479
 
393
- profile_url = 'https://nubela.co/proxycurl/api/v2/linkedin/company'
480
+ profile_url = 'https://enrichlayer.com/api/v2/company'
394
481
  try:
395
- async with session.get(profile_url, headers=HEADERS, params={'url': standardized_url}) as profile_response:
482
+ profile_params = _build_company_profile_params(standardized_url, profile_flags)
483
+ async with session.get(profile_url, headers=HEADERS, params=profile_params) as profile_response:
396
484
  logger.debug(f"Received profile response status: {profile_response.status}")
397
485
  if profile_response.status == 200:
398
486
  result = await profile_response.json()
399
487
  transformed_result = transform_company_data(result)
400
- cache_output("enrich_organization_info_from_proxycurl", organization_domain, transformed_result)
488
+ cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, transformed_result)
401
489
  logger.info("Successfully retrieved and transformed organization info from Proxycurl by domain.")
402
490
  return transformed_result
403
491
  elif profile_response.status == 429:
@@ -423,7 +511,7 @@ async def enrich_organization_info_from_proxycurl(
423
511
  elif response.status == 404:
424
512
  msg = "Item not found"
425
513
  logger.warning(msg)
426
- cache_output("enrich_organization_info_from_proxycurl", organization_domain, {})
514
+ cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, {})
427
515
  return {}
428
516
  else:
429
517
  error_text = await response.text()
@@ -456,9 +544,10 @@ async def enrich_job_info_from_proxycurl(
456
544
  """
457
545
  logger.info("Entering enrich_job_info_from_proxycurl")
458
546
 
459
- API_KEY = get_proxycurl_access_token(tool_config)
460
- if not API_KEY:
461
- return {"error": "PROXY_CURL_API_KEY not found"}
547
+ try:
548
+ API_KEY = get_proxycurl_access_token(tool_config)
549
+ except ValueError as e:
550
+ return {"error": str(e)}
462
551
 
463
552
  HEADERS = {
464
553
  'Authorization': f'Bearer {API_KEY}',
@@ -476,7 +565,7 @@ async def enrich_job_info_from_proxycurl(
476
565
  return cached_response
477
566
 
478
567
  params = {'url': job_url}
479
- api_endpoint = 'https://nubela.co/proxycurl/api/linkedin/job'
568
+ api_endpoint = 'https://enrichlayer.com/api/v2/job'
480
569
  logger.debug(f"Making request to Proxycurl for job info with params: {params}")
481
570
 
482
571
  async with aiohttp.ClientSession() as session:
@@ -529,9 +618,10 @@ async def search_recent_job_changes(
529
618
  """
530
619
  logger.info("Entering search_recent_job_changes")
531
620
 
532
- API_KEY = get_proxycurl_access_token(tool_config)
533
- if not API_KEY:
534
- logger.error("No API key found; returning empty results.")
621
+ try:
622
+ API_KEY = get_proxycurl_access_token(tool_config)
623
+ except ValueError as e:
624
+ logger.error(str(e))
535
625
  return []
536
626
 
537
627
  HEADERS = {
@@ -539,7 +629,7 @@ async def search_recent_job_changes(
539
629
  'Content-Type': 'application/json'
540
630
  }
541
631
 
542
- url = 'https://nubela.co/proxycurl/api/search/person'
632
+ url = 'https://enrichlayer.com/api/v2/search/person'
543
633
  results = []
544
634
  page = 1
545
635
  per_page = min(max_items_to_return, 100)
@@ -582,7 +672,7 @@ async def search_recent_job_changes(
582
672
  error_text = await response.text()
583
673
  logger.error(f"Error while searching recent job changes: {error_text}")
584
674
  break
585
- except Exception as e:
675
+ except Exception:
586
676
  logger.exception("Exception occurred while searching recent job changes.")
587
677
  break
588
678
 
@@ -628,11 +718,11 @@ async def find_matching_job_posting_proxy_curl(
628
718
  logger.debug(f"Google search query: {query}")
629
719
 
630
720
  # First Google search attempt
631
- results = await search_google(query.strip(), 1, tool_config=tool_config)
721
+ results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
632
722
  if not isinstance(results, list) or len(results) == 0:
633
723
  logger.info("No results found. Attempting fallback query without optional keywords.")
634
724
  query = f'site:*linkedin.com/jobs/view/ "{company_name}" {keywords_str}'
635
- results = await search_google(query.strip(), 1, tool_config=tool_config)
725
+ results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
636
726
  if not isinstance(results, list) or len(results) == 0:
637
727
  logger.info("No job postings found in fallback search either.")
638
728
  return job_posting_links
@@ -698,32 +788,34 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
698
788
  # Checks for None, empty string, or string with only whitespace
699
789
  return value is None or (isinstance(value, str) and not value.strip())
700
790
 
701
- # Email
791
+ # Email - use first personal email if input is empty
702
792
  if is_empty(input_user_properties.get("email")):
703
- input_user_properties["email"] = person_data.get("email", "")
793
+ personal_emails = person_data.get("personal_emails")
794
+ if isinstance(personal_emails, list) and personal_emails:
795
+ input_user_properties["email"] = personal_emails[0]
704
796
 
705
797
  # Phone
706
798
  if is_empty(input_user_properties.get("phone")):
707
799
  input_user_properties["phone"] = person_data.get("contact", {}).get("sanitized_phone", "")
708
800
 
709
801
  # Full name
710
- if is_empty(input_user_properties.get("full_name")) and person_data.get("full_name"):
802
+ if person_data.get("full_name"):
711
803
  input_user_properties["full_name"] = person_data["full_name"]
712
804
 
713
805
  # First name
714
- if is_empty(input_user_properties.get("first_name")) and person_data.get("first_name"):
806
+ if person_data.get("first_name"):
715
807
  input_user_properties["first_name"] = person_data["first_name"]
716
808
 
717
809
  # Last name
718
- if is_empty(input_user_properties.get("last_name")) and person_data.get("last_name"):
810
+ if person_data.get("last_name"):
719
811
  input_user_properties["last_name"] = person_data["last_name"]
720
812
 
721
813
  # Occupation -> job_title
722
- if is_empty(input_user_properties.get("job_title")) and person_data.get("occupation"):
814
+ if person_data.get("occupation"):
723
815
  input_user_properties["job_title"] = person_data["occupation"]
724
816
 
725
817
  # Headline
726
- if is_empty(input_user_properties.get("headline")) and person_data.get("headline"):
818
+ if person_data.get("headline"):
727
819
  input_user_properties["headline"] = person_data["headline"]
728
820
 
729
821
  # Summary
@@ -734,8 +826,8 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
734
826
  experiences = person_data.get("experiences", [])
735
827
  if experiences:
736
828
  # Current role data
737
- if is_empty(input_user_properties.get("organization_name")):
738
- input_user_properties["organization_name"] = experiences[0].get("company", "")
829
+
830
+ input_user_properties["organization_name"] = experiences[0].get("company", "")
739
831
 
740
832
  org_url = experiences[0].get("company_linkedin_profile_url", "")
741
833
  if org_url and is_empty(input_user_properties.get("organization_linkedin_url")):
@@ -752,22 +844,39 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
752
844
  if is_empty(input_user_properties.get("previous_organization_name")):
753
845
  input_user_properties["previous_organization_name"] = previous_org.get("company", "")
754
846
 
755
- # Combine city/state if available (and if lead_location is empty)
847
+ # Combine city/state if available (and if lead_location is empty); avoid literal "None"
756
848
  if is_empty(input_user_properties.get("lead_location")):
757
- if person_data.get("city") or person_data.get("state"):
758
- combined = f"{person_data.get('city', '')}, {person_data.get('state', '')}"
759
- input_user_properties["lead_location"] = combined.strip(", ")
849
+ city = person_data.get("city")
850
+ state = person_data.get("state")
851
+ parts = []
852
+ for value in (city, state):
853
+ if value is None:
854
+ continue
855
+ s = str(value).strip()
856
+ if not s or s.lower() == "none":
857
+ continue
858
+ parts.append(s)
859
+ if parts:
860
+ input_user_properties["lead_location"] = ", ".join(parts)
861
+
862
+ # LinkedIn Followers Count
863
+ if is_empty(input_user_properties.get("linkedin_follower_count")):
864
+ input_user_properties["linkedin_follower_count"] = person_data.get("follower_count", 0)
760
865
 
761
866
  return input_user_properties
762
867
 
763
868
 
869
+
764
870
  async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_config: Optional[List[Dict]] = None) -> dict:
765
871
  """
766
872
  Enriches the user info (input_user_properties) with data from Proxycurl.
873
+ If the user_linkedin_url is determined to be a proxy (acw* and length > 10),
874
+ we skip calling enrich_person_info_from_proxycurl, keep the input as-is,
875
+ and only perform the organization enrichment logic.
767
876
 
768
877
  Returns:
769
- dict: Updated input_user_properties with enriched data from Proxycurl
770
- or with an error field if something goes wrong.
878
+ dict: Updated input_user_properties with enriched data or
879
+ with an error field if something goes wrong.
771
880
  """
772
881
  logger.info("Entering enrich_user_info_with_proxy_curl")
773
882
 
@@ -781,7 +890,61 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
781
890
 
782
891
  logger.debug(f"Attempting to enrich data for LinkedIn URL='{linkedin_url}', Email='{email}'")
783
892
 
784
- # If linkedin url or email is present, lookup
893
+ # ---------------------------------------------------------------
894
+ # 1) Detect if the LinkedIn URL is a "proxy" URL (acw + length > 10)
895
+ # ---------------------------------------------------------------
896
+ def is_proxy_linkedin_url(url: str) -> bool:
897
+ """
898
+ Checks if the LinkedIn URL has an /in/<profile_id> path
899
+ that starts with 'acw' and has length > 10, indicating a proxy.
900
+ """
901
+ match = re.search(r"linkedin\.com/in/([^/]+)", url, re.IGNORECASE)
902
+ if match:
903
+ profile_id = match.group(1)
904
+ if profile_id.startswith("acw") and len(profile_id) > 10:
905
+ return True
906
+ return False
907
+
908
+ if is_proxy_linkedin_url(linkedin_url):
909
+ logger.info("The LinkedIn URL appears to be a proxy URL. Skipping user data enrichment from Proxycurl.")
910
+ # We do NOT call enrich_person_info_from_proxycurl for user data.
911
+ # We just set linkedin_url_match = False and enrich organization info if possible:
912
+ input_user_properties["linkedin_url_match"] = False
913
+
914
+ # Attempt organization enrichment if we have an organization_linkedin_url:
915
+ company_data = {}
916
+ if input_user_properties.get("organization_linkedin_url"):
917
+ company_data = await enrich_organization_info_from_proxycurl(
918
+ organization_linkedin_url=input_user_properties["organization_linkedin_url"],
919
+ tool_config=tool_config
920
+ )
921
+ if company_data and not company_data.get("error"):
922
+ if company_data.get("organization_linkedin_url"):
923
+ input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
924
+ if company_data.get("organization_name"):
925
+ input_user_properties["organization_name"] = company_data.get("organization_name", "")
926
+ input_user_properties["organization_size"] = str(
927
+ company_data.get("company_size_on_linkedin", "")
928
+ )
929
+ input_user_properties["company_size"] = str(
930
+ company_data.get("company_size_on_linkedin", "")
931
+ )
932
+ input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
933
+ input_user_properties["industry"] = company_data.get("organization_industry", "")
934
+ input_user_properties["organization_revenue"] = ""
935
+
936
+ # Always clean & store any returned org info:
937
+ additional_props = input_user_properties.get("additional_properties") or {}
938
+ company_data = cleanup_properties(company_data)
939
+ additional_props["pc_company_data"] = json.dumps(company_data)
940
+ input_user_properties["additional_properties"] = additional_props
941
+
942
+ logger.info("Returning after skipping user enrichment for proxy URL.")
943
+ return input_user_properties
944
+
945
+ # ----------------------------------------------------------------
946
+ # 2) If not proxy, proceed with normal user enrichment logic
947
+ # ----------------------------------------------------------------
785
948
  if linkedin_url or email:
786
949
  user_data = await enrich_person_info_from_proxycurl(
787
950
  linkedin_url=linkedin_url,
@@ -858,7 +1021,9 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
858
1021
  input_user_properties["linkedin_url_match"] = False
859
1022
  return input_user_properties
860
1023
 
861
- # If user data was found, populate input_user_properties
1024
+ # ------------------------------------------------------------------
1025
+ # 3) If user data was found, sanitize & fill user properties
1026
+ # ------------------------------------------------------------------
862
1027
  url_pattern = re.compile(r'(https?://[^\s]+)', re.IGNORECASE)
863
1028
 
864
1029
  def sanitize_urls_in_data(data):
@@ -900,11 +1065,13 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
900
1065
 
901
1066
  input_user_properties = fill_in_missing_properties(input_user_properties, person_data)
902
1067
 
903
- # Attempt organization enrichment if we have an organization_linkedin_url
1068
+ # ------------------------------------------------------------------
1069
+ # 4) Attempt organization enrichment if we have an org LinkedIn URL
1070
+ # ------------------------------------------------------------------
904
1071
  company_data = {}
905
1072
  if input_user_properties.get("organization_linkedin_url"):
906
1073
  company_data = await enrich_organization_info_from_proxycurl(
907
- organization_linkedin_url=input_user_properties.get("organization_linkedin_url"),
1074
+ organization_linkedin_url=input_user_properties["organization_linkedin_url"],
908
1075
  tool_config=tool_config
909
1076
  )
910
1077
  if company_data and not company_data.get("error"):
@@ -912,8 +1079,15 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
912
1079
  input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
913
1080
  if company_data.get("organization_name"):
914
1081
  input_user_properties["organization_name"] = company_data.get("organization_name", "")
915
- input_user_properties["organization_size"] = company_data.get("company_size_on_linkedin", "")
916
- input_user_properties["organization_industry"] = company_data.get("industry", "")
1082
+ input_user_properties["organization_size"] = str(
1083
+ company_data.get("company_size_on_linkedin", "")
1084
+ )
1085
+ input_user_properties["company_size"] = str(
1086
+ company_data.get("company_size_on_linkedin", "")
1087
+ )
1088
+ input_user_properties["company_size_list"] = company_data.get("company_size", "")
1089
+ input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
1090
+ input_user_properties["industry"] = company_data.get("organization_industry", "")
917
1091
  input_user_properties["organization_revenue"] = ""
918
1092
 
919
1093
  person_data = cleanup_properties(person_data)
@@ -925,3 +1099,130 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
925
1099
 
926
1100
  logger.info("Enrichment of user info with Proxycurl complete.")
927
1101
  return input_user_properties
1102
+
1103
+
1104
+
1105
+
1106
+
1107
+ @assistant_tool
1108
+ async def find_leads_by_job_openings_proxy_curl(
1109
+ query_params: Dict[str, Any],
1110
+ hiring_manager_roles: List[str],
1111
+ tool_config: Optional[List[Dict]] = None,
1112
+ ) -> List[Dict]:
1113
+ """Search LinkedIn job postings using Proxycurl and find hiring manager leads.
1114
+
1115
+ Args:
1116
+ query_params: Dictionary of parameters to Proxycurl job search API. The
1117
+ key ``job_title`` is required. Other keys like ``location`` may also
1118
+ be supplied.
1119
+ hiring_manager_roles: List of job titles to lookup at the company for
1120
+ potential hiring managers.
1121
+ tool_config: Optional configuration containing Proxycurl credentials.
1122
+
1123
+ Returns:
1124
+ A list of lead dictionaries with normalized keys such as
1125
+ ``first_name``, ``last_name``, ``user_linkedin_url``,
1126
+ ``organization_name``, and ``organization_linkedin_url``.
1127
+ """
1128
+ logger.info("Entering find_leads_by_job_openings_proxy_curl")
1129
+
1130
+ if not isinstance(query_params, dict) or not query_params.get("job_title"):
1131
+ logger.warning("query_params must include 'job_title'")
1132
+ return []
1133
+
1134
+ try:
1135
+ API_KEY = get_proxycurl_access_token(tool_config)
1136
+ except ValueError as e:
1137
+ logger.error(str(e))
1138
+ return []
1139
+
1140
+ headers = {
1141
+ "Authorization": f"Bearer {API_KEY}",
1142
+ "Content-Type": "application/json",
1143
+ }
1144
+
1145
+ job_search_url = "https://enrichlayer.com/api/v2/company/job"
1146
+ leads: List[Dict] = []
1147
+
1148
+ # ------------------------------------------------------------------
1149
+ # 1) Look up job openings
1150
+ # ------------------------------------------------------------------
1151
+ try:
1152
+ async with aiohttp.ClientSession() as session:
1153
+ async with session.get(job_search_url, headers=headers, params=query_params) as resp:
1154
+ if resp.status == 200:
1155
+ job_result = await resp.json()
1156
+ jobs = job_result.get("results") or job_result.get("jobs") or []
1157
+ elif resp.status == 429:
1158
+ logger.warning("Rate limit exceeded on job search")
1159
+ await asyncio.sleep(30)
1160
+ return []
1161
+ else:
1162
+ error_text = await resp.text()
1163
+ logger.error("Job search error %s: %s", resp.status, error_text)
1164
+ return []
1165
+ except Exception:
1166
+ logger.exception("Exception while searching jobs on Proxycurl")
1167
+ return []
1168
+
1169
+ # ------------------------------------------------------------------
1170
+ # 2) For each job, find leads for specified hiring manager roles
1171
+ # ------------------------------------------------------------------
1172
+ for job in jobs:
1173
+ company = job.get("company", {}) if isinstance(job, dict) else {}
1174
+ company_name = company.get("name", "")
1175
+ company_url = company.get("url", "")
1176
+ if not company_name:
1177
+ continue
1178
+
1179
+ for role in hiring_manager_roles:
1180
+ employee_params = {
1181
+ "url": company_url,
1182
+ "role_search": role,
1183
+ "employment_status": "current",
1184
+ "page_size": 1,
1185
+ }
1186
+ employees = []
1187
+ try:
1188
+ async with aiohttp.ClientSession() as session:
1189
+ async with session.get(
1190
+ "https://enrichlayer.com/api/v2/company/employees",
1191
+ headers=headers,
1192
+ params=employee_params,
1193
+ ) as e_resp:
1194
+ if e_resp.status == 200:
1195
+ data = await e_resp.json()
1196
+ employees = data.get("employees") or data.get("profiles") or []
1197
+ elif e_resp.status == 429:
1198
+ logger.warning("Rate limit exceeded while fetching employees")
1199
+ await asyncio.sleep(30)
1200
+ continue
1201
+ except Exception:
1202
+ logger.exception("Exception while fetching employees from Proxycurl")
1203
+ continue
1204
+
1205
+ for emp in employees:
1206
+ profile_url = emp.get("linkedin_profile_url") or emp.get("profile_url")
1207
+ if not profile_url:
1208
+ continue
1209
+ person = await enrich_person_info_from_proxycurl(
1210
+ linkedin_url=profile_url, tool_config=tool_config
1211
+ )
1212
+ if not person or person.get("error"):
1213
+ continue
1214
+ lead = {
1215
+ "first_name": person.get("first_name", ""),
1216
+ "last_name": person.get("last_name", ""),
1217
+ "full_name": person.get("full_name", ""),
1218
+ "user_linkedin_url": profile_url,
1219
+ "job_title": person.get("occupation", role),
1220
+ "organization_name": company_name,
1221
+ "organization_linkedin_url": company_url,
1222
+ }
1223
+ cleaned = cleanup_properties(lead)
1224
+ if cleaned:
1225
+ leads.append(cleaned)
1226
+
1227
+ logger.info("Returning %d leads from Proxycurl job search", len(leads))
1228
+ return leads