PyPI - dhisana - Versions diffs - 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl - Mend

dhisana 0.0.1.dev116py3-none-any.whl → 0.0.1.dev236py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

dhisana/schemas/common.py +10 -1
dhisana/schemas/sales.py +203 -22
dhisana/utils/add_mapping.py +0 -2
dhisana/utils/apollo_tools.py +739 -119
dhisana/utils/built_with_api_tools.py +4 -2
dhisana/utils/check_email_validity_tools.py +35 -18
dhisana/utils/check_for_intent_signal.py +1 -2
dhisana/utils/check_linkedin_url_validity.py +34 -8
dhisana/utils/clay_tools.py +3 -2
dhisana/utils/clean_properties.py +1 -4
dhisana/utils/compose_salesnav_query.py +0 -1
dhisana/utils/compose_search_query.py +7 -3
dhisana/utils/composite_tools.py +0 -1
dhisana/utils/dataframe_tools.py +2 -2
dhisana/utils/email_body_utils.py +72 -0
dhisana/utils/email_provider.py +174 -35
dhisana/utils/enrich_lead_information.py +183 -53
dhisana/utils/fetch_openai_config.py +129 -0
dhisana/utils/field_validators.py +1 -1
dhisana/utils/g2_tools.py +0 -1
dhisana/utils/generate_content.py +0 -1
dhisana/utils/generate_email.py +68 -23
dhisana/utils/generate_email_response.py +294 -46
dhisana/utils/generate_flow.py +0 -1
dhisana/utils/generate_linkedin_connect_message.py +9 -2
dhisana/utils/generate_linkedin_response_message.py +137 -66
dhisana/utils/generate_structured_output_internal.py +317 -164
dhisana/utils/google_custom_search.py +150 -44
dhisana/utils/google_oauth_tools.py +721 -0
dhisana/utils/google_workspace_tools.py +278 -54
dhisana/utils/hubspot_clearbit.py +3 -1
dhisana/utils/hubspot_crm_tools.py +718 -272
dhisana/utils/instantly_tools.py +3 -1
dhisana/utils/lusha_tools.py +10 -7
dhisana/utils/mailgun_tools.py +150 -0
dhisana/utils/microsoft365_tools.py +447 -0
dhisana/utils/openai_assistant_and_file_utils.py +121 -177
dhisana/utils/openai_helpers.py +8 -6
dhisana/utils/parse_linkedin_messages_txt.py +1 -3
dhisana/utils/profile.py +37 -0
dhisana/utils/proxy_curl_tools.py +377 -76
dhisana/utils/proxycurl_search_leads.py +426 -0
dhisana/utils/research_lead.py +3 -3
dhisana/utils/sales_navigator_crawler.py +1 -6
dhisana/utils/salesforce_crm_tools.py +323 -50
dhisana/utils/search_router.py +131 -0
dhisana/utils/search_router_jobs.py +51 -0
dhisana/utils/sendgrid_tools.py +126 -91
dhisana/utils/serarch_router_local_business.py +75 -0
dhisana/utils/serpapi_additional_tools.py +290 -0
dhisana/utils/serpapi_google_jobs.py +117 -0
dhisana/utils/serpapi_google_search.py +188 -0
dhisana/utils/serpapi_local_business_search.py +129 -0
dhisana/utils/serpapi_search_tools.py +360 -432
dhisana/utils/serperdev_google_jobs.py +125 -0
dhisana/utils/serperdev_local_business.py +154 -0
dhisana/utils/serperdev_search.py +233 -0
dhisana/utils/smtp_email_tools.py +178 -18
dhisana/utils/test_connect.py +1603 -130
dhisana/utils/trasform_json.py +3 -3
dhisana/utils/web_download_parse_tools.py +0 -1
dhisana/utils/zoominfo_tools.py +2 -3
dhisana/workflow/test.py +1 -1
{dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +1 -1
dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
{dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
dhisana-0.0.1.dev116.dist-info/RECORD +0 -83
{dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
{dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0

dhisana/utils/proxy_curl_tools.py CHANGED Viewed

@@ -5,15 +5,12 @@ import os
 import re
 import aiohttp
 import backoff
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional
-from bs4 import BeautifulSoup
 from dhisana.utils.assistant_tool_tag import assistant_tool
 from dhisana.utils.cache_output_tools import cache_output, retrieve_output
 from dhisana.utils.clean_properties import cleanup_properties
-from dhisana.utils.domain_parser import get_domain_from_website, is_excluded_domain
-from dhisana.utils.serpapi_search_tools import search_google
-from dhisana.utils.web_download_parse_tools import get_html_content_from_url
+from dhisana.utils.search_router import search_google_with_tools
 from urllib.parse import urlparse, urlunparse
 logging.basicConfig(level=logging.INFO)
@@ -23,9 +20,9 @@ logger = logging.getLogger(__name__)
 def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
     """
     Retrieves the PROXY_CURL_API_KEY access token from the provided tool configuration.
-    Returns:
-        str: The PROXY_CURL_API_KEY if found; otherwise returns an empty string.
+    Raises:
+        ValueError: If the Proxycurl integration has not been configured.
     """
     PROXY_CURL_API_KEY = None
@@ -50,8 +47,10 @@ def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
     PROXY_CURL_API_KEY = PROXY_CURL_API_KEY or os.getenv("PROXY_CURL_API_KEY")
     if not PROXY_CURL_API_KEY:
-        logger.error("PROXY_CURL_API_KEY not found in configuration or environment.")
-        return ""  # Return empty string if not found
+        logger.error("Proxycurl integration is not configured.")
+        raise ValueError(
+            "Proxycurl integration is not configured. Please configure the connection to Proxycurl in Integrations."
+        )
     return PROXY_CURL_API_KEY
@@ -78,23 +77,24 @@ async def enrich_person_info_from_proxycurl(
     """
     logger.info("Entering enrich_person_info_from_proxycurl")
-    API_KEY = get_proxycurl_access_token(tool_config)
-    if not API_KEY:
-        return {"error": "PROXY_CURL_API_KEY not found"}
+    try:
+        API_KEY = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        return {"error": str(e)}
     HEADERS = {
         'Authorization': f'Bearer {API_KEY}',
         'Content-Type': 'application/json'
     }
-    if not linkedin_url and not email and not phone:
-        logger.warning("No linkedin_url, email, or phone provided. At least one is required.")
-        return {'error': "At least one of linkedin_url, email, or phone must be provided"}
+    if not linkedin_url:
+        logger.warning("No linkedin_url provided.")
+        return {'error': "linkedin_url must be provided"}
     # Check cache if linkedin_url is provided
     if linkedin_url:
         cached_response = retrieve_output("enrich_person_info_from_proxycurl", linkedin_url)
-        if cached_response is not None:
+        if cached_response is not None and cached_response.get('error') is None:
             logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
             return cached_response
@@ -103,10 +103,13 @@ async def enrich_person_info_from_proxycurl(
         params['url'] = linkedin_url
     if email:
         params['email'] = email
+    else:
+        # Request Proxycurl to include personal emails when no email is provided
+        params['personal_email'] = 'include'
     if phone:
         params['phone'] = phone
-    url = 'https://nubela.co/proxycurl/api/v2/linkedin'
+    url = 'https://enrichlayer.com/api/v2/profile'
     logger.debug(f"Making request to Proxycurl with params: {params}")
     async with aiohttp.ClientSession() as session:
@@ -122,8 +125,6 @@ async def enrich_person_info_from_proxycurl(
                 elif response.status == 404:
                     msg = "Person not found"
                     logger.warning(msg)
-                    if linkedin_url:
-                        cache_output("enrich_person_info_from_proxycurl", linkedin_url, {'error': msg})
                     return {'error': msg}
                 elif response.status == 429:
                     msg = "Rate limit exceeded"
@@ -166,9 +167,10 @@ async def lookup_person_in_proxy_curl_by_name(
         logger.warning("First name or last name missing for lookup.")
         return {'error': "Full name is required"}
-    API_KEY = get_proxycurl_access_token(tool_config)
-    if not API_KEY:
-        return {"error": "PROXY_CURL_API_KEY not found"}
+    try:
+        API_KEY = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        return {"error": str(e)}
     headers = {'Authorization': f'Bearer {API_KEY}'}
     params = {
@@ -186,7 +188,7 @@ async def lookup_person_in_proxy_curl_by_name(
             logger.info(f"Cache hit for name lookup key: {key}")
             return cached_response
-    url = 'https://nubela.co/proxycurl/api/v2/search/person'
+    url = 'https://enrichlayer.com/api/v2/search/person'
     logger.debug(f"Making request to Proxycurl with params: {params}")
     async with aiohttp.ClientSession() as session:
@@ -241,6 +243,13 @@ def transform_company_data(data: dict) -> dict:
         transformed["organization_website"] = data["website"]
     if "industry" in data:
         transformed["organization_industry"] = data["industry"]
+    if "company_size" in data:
+        transformed["company_size_list"] = data["company_size"]
+    if "company_size_on_linkedin" in data:
+        transformed["organization_size"] = data["company_size_on_linkedin"]
+        transformed["company_size"] = data["company_size_on_linkedin"]
     # Determine headquarters info from "hq" or "headquarters"
     hq_data = data.get("hq") or data.get("headquarters")
@@ -258,12 +267,52 @@ def transform_company_data(data: dict) -> dict:
     # Copy all other properties, excluding those already mapped
     for key, value in data.items():
-        if key not in ("name", "website", "industry", "hq", "headquarters"):
+        if key not in ("name", "website", "industry", "hq", "headquarters", "company_size"):
             transformed[key] = value
     return transformed
+def _build_company_profile_params(
+    company_url: str,
+    profile_flags: Dict[str, Optional[str]],
+) -> Dict[str, str]:
+    """
+    Build request params for the Enrichlayer company profile endpoint,
+    ensuring we only forward flags that were explicitly provided.
+    """
+    params: Dict[str, str] = {'url': company_url}
+    for key, value in profile_flags.items():
+        if value is not None:
+            params[key] = value
+    return params
+def _build_company_cache_key(identifier: str, profile_flags: Dict[str, Optional[str]]) -> str:
+    """
+    Builds a cache key that is unique for the combination of identifier
+    (LinkedIn URL or domain) and the optional enrichment flags.
+    """
+    suffix_bits = [
+        f"{key}={value}"
+        for key, value in sorted(profile_flags.items())
+        if value is not None
+    ]
+    if suffix_bits:
+        return f"{identifier}|{'&'.join(suffix_bits)}"
+    return identifier
+def _bool_to_include_exclude(value: Optional[bool]) -> Optional[str]:
+    """
+    Convert a boolean flag into the string literals expected by Proxycurl.
+    True -> "include", False -> "exclude", None -> None (omit parameter).
+    """
+    if value is None:
+        return None
+    return "include" if value else "exclude"
 @backoff.on_exception(
     backoff.expo,
     aiohttp.ClientResponseError,
@@ -274,10 +323,27 @@ def transform_company_data(data: dict) -> dict:
 async def enrich_organization_info_from_proxycurl(
     organization_domain: Optional[str] = None,
     organization_linkedin_url: Optional[str] = None,
-    tool_config: Optional[List[Dict]] = None
+    tool_config: Optional[List[Dict]] = None,
+    categories: Optional[bool] = None,
+    funding_data: Optional[bool] = None,
+    exit_data: Optional[bool] = None,
+    acquisitions: Optional[bool] = None,
+    extra: Optional[bool] = None,
+    use_cache: Optional[str] = "if-present",
+    fallback_to_cache: Optional[str] = "on-error",
 ) -> Dict:
     """
     Fetch an organization's details from Proxycurl using either the organization domain or LinkedIn URL.
+    Additional keyword parameters map directly to the Enrichlayer Company Profile endpoint.
+    Args:
+        organization_domain: Organization's domain name to resolve via Proxycurl.
+        organization_linkedin_url: LinkedIn company profile URL.
+        tool_config: Optional tool configuration metadata for credential lookup.
+        categories/funding_data/exit_data/acquisitions/extra: Set True to request
+            "include", False for "exclude", or None to omit.
+        use_cache: Controls Proxycurl caching behaviour (e.g. "if-present").
+        fallback_to_cache: Controls Proxycurl cache fallback behaviour (e.g. "on-error").
     Returns:
         dict: Transformed JSON response containing organization information,
@@ -285,9 +351,10 @@ async def enrich_organization_info_from_proxycurl(
     """
     logger.info("Entering enrich_organization_info_from_proxycurl")
-    API_KEY = get_proxycurl_access_token(tool_config)
-    if not API_KEY:
-        return {"error": "PROXY_CURL_API_KEY not found"}
+    try:
+        API_KEY = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        return {"error": str(e)}
     HEADERS = {
         'Authorization': f'Bearer {API_KEY}',
@@ -298,6 +365,16 @@ async def enrich_organization_info_from_proxycurl(
         logger.warning("No organization domain or LinkedIn URL provided.")
         return {}
+    profile_flags: Dict[str, Optional[str]] = {
+        "categories": _bool_to_include_exclude(categories),
+        "funding_data": _bool_to_include_exclude(funding_data),
+        "exit_data": _bool_to_include_exclude(exit_data),
+        "acquisitions": _bool_to_include_exclude(acquisitions),
+        "extra": _bool_to_include_exclude(extra),
+        "use_cache": use_cache,
+        "fallback_to_cache": fallback_to_cache,
+    }
     # If LinkedIn URL is provided, standardize it and fetch data
     if organization_linkedin_url:
         logger.debug(f"Organization LinkedIn URL provided: {organization_linkedin_url}")
@@ -320,19 +397,17 @@ async def enrich_organization_info_from_proxycurl(
             if standardized_url and not standardized_url.endswith('/'):
                 standardized_url += '/'
+        cache_key = _build_company_cache_key(standardized_url, profile_flags)
         # Check cache for standardized LinkedIn URL
-        cached_response = retrieve_output("enrich_organization_info_from_proxycurl", standardized_url)
+        cached_response = retrieve_output("enrich_organization_info_from_proxycurl", cache_key)
         if cached_response is not None:
             logger.info(f"Cache hit for organization LinkedIn URL: {standardized_url}")
+            cached_response = transform_company_data(cached_response)
             return cached_response
         # Fetch details using standardized LinkedIn URL
-        url = 'https://nubela.co/proxycurl/api/linkedin/company'
-        params = {
-            'url': standardized_url,
-            'use_cache': 'if-present',
-            'fallback_to_cache': 'on-error',
-        }
+        url = 'https://enrichlayer.com/api/v2/company'
+        params = _build_company_profile_params(standardized_url, profile_flags)
         logger.debug(f"Making request to Proxycurl with params: {params}")
         async with aiohttp.ClientSession() as session:
@@ -342,7 +417,7 @@ async def enrich_organization_info_from_proxycurl(
                     if response.status == 200:
                         result = await response.json()
                         transformed_result = transform_company_data(result)
-                        cache_output("enrich_organization_info_from_proxycurl", standardized_url, transformed_result)
+                        cache_output("enrich_organization_info_from_proxycurl", cache_key, transformed_result)
                         logger.info("Successfully retrieved and transformed organization info from Proxycurl by LinkedIn URL.")
                         return transformed_result
                     elif response.status == 429:
@@ -350,9 +425,20 @@ async def enrich_organization_info_from_proxycurl(
                         logger.warning(msg)
                         await asyncio.sleep(30)
                         return {"error": msg}
+                    elif response.status == 404:
+                        error_text = await response.text()
+                        logger.warning(
+                            f"Proxycurl organization profile not found for LinkedIn URL {standardized_url}: {error_text}"
+                        )
+                        cache_output(
+                            "enrich_organization_info_from_proxycurl", cache_key, {}
+                        )
+                        return {}
                     else:
                         error_text = await response.text()
-                        logger.error(f"Error from Proxycurl organization info fetch by URL: {error_text}")
+                        logger.error(
+                            f"Error from Proxycurl organization info fetch by URL: {error_text}"
+                        )
                         return {}
             except Exception as e:
                 logger.exception("Exception occurred while fetching organization info from Proxycurl by LinkedIn URL.")
@@ -361,12 +447,13 @@ async def enrich_organization_info_from_proxycurl(
     # If organization domain is provided, resolve domain to LinkedIn URL and fetch data
     if organization_domain:
         logger.debug(f"Organization domain provided: {organization_domain}")
-        cached_response = retrieve_output("enrich_organization_info_from_proxycurl", organization_domain)
+        domain_cache_key = _build_company_cache_key(organization_domain, profile_flags)
+        cached_response = retrieve_output("enrich_organization_info_from_proxycurl", domain_cache_key)
         if cached_response is not None:
             logger.info(f"Cache hit for organization domain: {organization_domain}")
             return cached_response
-        resolve_url = 'https://nubela.co/proxycurl/api/linkedin/company/resolve'
+        resolve_url = 'https://enrichlayer.com/api/v2/company/resolve'
         params = {'domain': organization_domain}
         logger.debug(f"Making request to Proxycurl to resolve domain with params: {params}")
@@ -390,14 +477,15 @@ async def enrich_organization_info_from_proxycurl(
                             else:
                                 standardized_url = company_url
-                            profile_url = 'https://nubela.co/proxycurl/api/v2/linkedin/company'
+                            profile_url = 'https://enrichlayer.com/api/v2/company'
                             try:
-                                async with session.get(profile_url, headers=HEADERS, params={'url': standardized_url}) as profile_response:
+                                profile_params = _build_company_profile_params(standardized_url, profile_flags)
+                                async with session.get(profile_url, headers=HEADERS, params=profile_params) as profile_response:
                                     logger.debug(f"Received profile response status: {profile_response.status}")
                                     if profile_response.status == 200:
                                         result = await profile_response.json()
                                         transformed_result = transform_company_data(result)
-                                        cache_output("enrich_organization_info_from_proxycurl", organization_domain, transformed_result)
+                                        cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, transformed_result)
                                         logger.info("Successfully retrieved and transformed organization info from Proxycurl by domain.")
                                         return transformed_result
                                     elif profile_response.status == 429:
@@ -423,7 +511,7 @@ async def enrich_organization_info_from_proxycurl(
                     elif response.status == 404:
                         msg = "Item not found"
                         logger.warning(msg)
-                        cache_output("enrich_organization_info_from_proxycurl", organization_domain, {})
+                        cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, {})
                         return {}
                     else:
                         error_text = await response.text()
@@ -456,9 +544,10 @@ async def enrich_job_info_from_proxycurl(
     """
     logger.info("Entering enrich_job_info_from_proxycurl")
-    API_KEY = get_proxycurl_access_token(tool_config)
-    if not API_KEY:
-        return {"error": "PROXY_CURL_API_KEY not found"}
+    try:
+        API_KEY = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        return {"error": str(e)}
     HEADERS = {
         'Authorization': f'Bearer {API_KEY}',
@@ -476,7 +565,7 @@ async def enrich_job_info_from_proxycurl(
         return cached_response
     params = {'url': job_url}
-    api_endpoint = 'https://nubela.co/proxycurl/api/linkedin/job'
+    api_endpoint = 'https://enrichlayer.com/api/v2/job'
     logger.debug(f"Making request to Proxycurl for job info with params: {params}")
     async with aiohttp.ClientSession() as session:
@@ -529,9 +618,10 @@ async def search_recent_job_changes(
     """
     logger.info("Entering search_recent_job_changes")
-    API_KEY = get_proxycurl_access_token(tool_config)
-    if not API_KEY:
-        logger.error("No API key found; returning empty results.")
+    try:
+        API_KEY = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        logger.error(str(e))
         return []
     HEADERS = {
@@ -539,7 +629,7 @@ async def search_recent_job_changes(
         'Content-Type': 'application/json'
     }
-    url = 'https://nubela.co/proxycurl/api/search/person'
+    url = 'https://enrichlayer.com/api/v2/search/person'
     results = []
     page = 1
     per_page = min(max_items_to_return, 100)
@@ -582,7 +672,7 @@ async def search_recent_job_changes(
                         error_text = await response.text()
                         logger.error(f"Error while searching recent job changes: {error_text}")
                         break
-            except Exception as e:
+            except Exception:
                 logger.exception("Exception occurred while searching recent job changes.")
                 break
@@ -628,11 +718,11 @@ async def find_matching_job_posting_proxy_curl(
     logger.debug(f"Google search query: {query}")
     # First Google search attempt
-    results = await search_google(query.strip(), 1, tool_config=tool_config)
+    results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
     if not isinstance(results, list) or len(results) == 0:
         logger.info("No results found. Attempting fallback query without optional keywords.")
         query = f'site:*linkedin.com/jobs/view/ "{company_name}" {keywords_str}'
-        results = await search_google(query.strip(), 1, tool_config=tool_config)
+        results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
         if not isinstance(results, list) or len(results) == 0:
             logger.info("No job postings found in fallback search either.")
             return job_posting_links
@@ -698,32 +788,34 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
         # Checks for None, empty string, or string with only whitespace
         return value is None or (isinstance(value, str) and not value.strip())
-    # Email
+    # Email - use first personal email if input is empty
     if is_empty(input_user_properties.get("email")):
-        input_user_properties["email"] = person_data.get("email", "")
+        personal_emails = person_data.get("personal_emails")
+        if isinstance(personal_emails, list) and personal_emails:
+            input_user_properties["email"] = personal_emails[0]
     # Phone
     if is_empty(input_user_properties.get("phone")):
         input_user_properties["phone"] = person_data.get("contact", {}).get("sanitized_phone", "")
     # Full name
-    if is_empty(input_user_properties.get("full_name")) and person_data.get("full_name"):
+    if person_data.get("full_name"):
         input_user_properties["full_name"] = person_data["full_name"]
     # First name
-    if is_empty(input_user_properties.get("first_name")) and person_data.get("first_name"):
+    if person_data.get("first_name"):
         input_user_properties["first_name"] = person_data["first_name"]
     # Last name
-    if is_empty(input_user_properties.get("last_name")) and person_data.get("last_name"):
+    if person_data.get("last_name"):
         input_user_properties["last_name"] = person_data["last_name"]
     # Occupation -> job_title
-    if is_empty(input_user_properties.get("job_title")) and person_data.get("occupation"):
+    if person_data.get("occupation"):
         input_user_properties["job_title"] = person_data["occupation"]
     # Headline
-    if is_empty(input_user_properties.get("headline")) and person_data.get("headline"):
+    if person_data.get("headline"):
         input_user_properties["headline"] = person_data["headline"]
     # Summary
@@ -734,8 +826,8 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
     experiences = person_data.get("experiences", [])
     if experiences:
         # Current role data
-        if is_empty(input_user_properties.get("organization_name")):
-            input_user_properties["organization_name"] = experiences[0].get("company", "")
+        input_user_properties["organization_name"] = experiences[0].get("company", "")
         org_url = experiences[0].get("company_linkedin_profile_url", "")
         if org_url and is_empty(input_user_properties.get("organization_linkedin_url")):
@@ -752,22 +844,39 @@ def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -
             if is_empty(input_user_properties.get("previous_organization_name")):
                 input_user_properties["previous_organization_name"] = previous_org.get("company", "")
-    # Combine city/state if available (and if lead_location is empty)
+    # Combine city/state if available (and if lead_location is empty); avoid literal "None"
     if is_empty(input_user_properties.get("lead_location")):
-        if person_data.get("city") or person_data.get("state"):
-            combined = f"{person_data.get('city', '')}, {person_data.get('state', '')}"
-            input_user_properties["lead_location"] = combined.strip(", ")
+        city = person_data.get("city")
+        state = person_data.get("state")
+        parts = []
+        for value in (city, state):
+            if value is None:
+                continue
+            s = str(value).strip()
+            if not s or s.lower() == "none":
+                continue
+            parts.append(s)
+        if parts:
+            input_user_properties["lead_location"] = ", ".join(parts)
+    # LinkedIn Followers Count
+    if is_empty(input_user_properties.get("linkedin_follower_count")):
+        input_user_properties["linkedin_follower_count"] = person_data.get("follower_count", 0)
     return input_user_properties
 async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_config: Optional[List[Dict]] = None) -> dict:
     """
     Enriches the user info (input_user_properties) with data from Proxycurl.
+    If the user_linkedin_url is determined to be a proxy (acw* and length > 10),
+    we skip calling enrich_person_info_from_proxycurl, keep the input as-is,
+    and only perform the organization enrichment logic.
     Returns:
-        dict: Updated input_user_properties with enriched data from Proxycurl
-              or with an error field if something goes wrong.
+        dict: Updated input_user_properties with enriched data or
+              with an error field if something goes wrong.
     """
     logger.info("Entering enrich_user_info_with_proxy_curl")
@@ -781,7 +890,61 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
     logger.debug(f"Attempting to enrich data for LinkedIn URL='{linkedin_url}', Email='{email}'")
-    # If linkedin url or email is present, lookup
+    # ---------------------------------------------------------------
+    # 1) Detect if the LinkedIn URL is a "proxy" URL (acw + length > 10)
+    # ---------------------------------------------------------------
+    def is_proxy_linkedin_url(url: str) -> bool:
+        """
+        Checks if the LinkedIn URL has an /in/<profile_id> path
+        that starts with 'acw' and has length > 10, indicating a proxy.
+        """
+        match = re.search(r"linkedin\.com/in/([^/]+)", url, re.IGNORECASE)
+        if match:
+            profile_id = match.group(1)
+            if profile_id.startswith("acw") and len(profile_id) > 10:
+                return True
+        return False
+    if is_proxy_linkedin_url(linkedin_url):
+        logger.info("The LinkedIn URL appears to be a proxy URL. Skipping user data enrichment from Proxycurl.")
+        # We do NOT call enrich_person_info_from_proxycurl for user data.
+        # We just set linkedin_url_match = False and enrich organization info if possible:
+        input_user_properties["linkedin_url_match"] = False
+        # Attempt organization enrichment if we have an organization_linkedin_url:
+        company_data = {}
+        if input_user_properties.get("organization_linkedin_url"):
+            company_data = await enrich_organization_info_from_proxycurl(
+                organization_linkedin_url=input_user_properties["organization_linkedin_url"],
+                tool_config=tool_config
+            )
+            if company_data and not company_data.get("error"):
+                if company_data.get("organization_linkedin_url"):
+                    input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
+                if company_data.get("organization_name"):
+                    input_user_properties["organization_name"] = company_data.get("organization_name", "")
+                input_user_properties["organization_size"] = str(
+                    company_data.get("company_size_on_linkedin", "")
+                )
+                input_user_properties["company_size"] = str(
+                    company_data.get("company_size_on_linkedin", "")
+                )
+                input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
+                input_user_properties["industry"] = company_data.get("organization_industry", "")
+                input_user_properties["organization_revenue"] = ""
+        # Always clean & store any returned org info:
+        additional_props = input_user_properties.get("additional_properties") or {}
+        company_data = cleanup_properties(company_data)
+        additional_props["pc_company_data"] = json.dumps(company_data)
+        input_user_properties["additional_properties"] = additional_props
+        logger.info("Returning after skipping user enrichment for proxy URL.")
+        return input_user_properties
+    # ----------------------------------------------------------------
+    # 2) If not proxy, proceed with normal user enrichment logic
+    # ----------------------------------------------------------------
     if linkedin_url or email:
         user_data = await enrich_person_info_from_proxycurl(
             linkedin_url=linkedin_url,
@@ -858,7 +1021,9 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
         input_user_properties["linkedin_url_match"] = False
         return input_user_properties
-    # If user data was found, populate input_user_properties
+    # ------------------------------------------------------------------
+    # 3) If user data was found, sanitize & fill user properties
+    # ------------------------------------------------------------------
     url_pattern = re.compile(r'(https?://[^\s]+)', re.IGNORECASE)
     def sanitize_urls_in_data(data):
@@ -900,11 +1065,13 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
     input_user_properties = fill_in_missing_properties(input_user_properties, person_data)
-    # Attempt organization enrichment if we have an organization_linkedin_url
+    # ------------------------------------------------------------------
+    # 4) Attempt organization enrichment if we have an org LinkedIn URL
+    # ------------------------------------------------------------------
     company_data = {}
     if input_user_properties.get("organization_linkedin_url"):
         company_data = await enrich_organization_info_from_proxycurl(
-            organization_linkedin_url=input_user_properties.get("organization_linkedin_url"),
+            organization_linkedin_url=input_user_properties["organization_linkedin_url"],
             tool_config=tool_config
         )
         if company_data and not company_data.get("error"):
@@ -912,8 +1079,15 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
                 input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
             if company_data.get("organization_name"):
                 input_user_properties["organization_name"] = company_data.get("organization_name", "")
-            input_user_properties["organization_size"] = company_data.get("company_size_on_linkedin", "")
-            input_user_properties["organization_industry"] = company_data.get("industry", "")
+            input_user_properties["organization_size"] = str(
+                company_data.get("company_size_on_linkedin", "")
+            )
+            input_user_properties["company_size"] = str(
+                company_data.get("company_size_on_linkedin", "")
+            )
+            input_user_properties["company_size_list"] = company_data.get("company_size", "")
+            input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
+            input_user_properties["industry"] = company_data.get("organization_industry", "")
             input_user_properties["organization_revenue"] = ""
     person_data = cleanup_properties(person_data)
@@ -925,3 +1099,130 @@ async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_con
     logger.info("Enrichment of user info with Proxycurl complete.")
     return input_user_properties
+@assistant_tool
+async def find_leads_by_job_openings_proxy_curl(
+    query_params: Dict[str, Any],
+    hiring_manager_roles: List[str],
+    tool_config: Optional[List[Dict]] = None,
+) -> List[Dict]:
+    """Search LinkedIn job postings using Proxycurl and find hiring manager leads.
+    Args:
+        query_params: Dictionary of parameters to Proxycurl job search API. The
+            key ``job_title`` is required. Other keys like ``location`` may also
+            be supplied.
+        hiring_manager_roles: List of job titles to lookup at the company for
+            potential hiring managers.
+        tool_config: Optional configuration containing Proxycurl credentials.
+    Returns:
+        A list of lead dictionaries with normalized keys such as
+        ``first_name``, ``last_name``, ``user_linkedin_url``,
+        ``organization_name``, and ``organization_linkedin_url``.
+    """
+    logger.info("Entering find_leads_by_job_openings_proxy_curl")
+    if not isinstance(query_params, dict) or not query_params.get("job_title"):
+        logger.warning("query_params must include 'job_title'")
+        return []
+    try:
+        API_KEY = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        logger.error(str(e))
+        return []
+    headers = {
+        "Authorization": f"Bearer {API_KEY}",
+        "Content-Type": "application/json",
+    }
+    job_search_url = "https://enrichlayer.com/api/v2/company/job"
+    leads: List[Dict] = []
+    # ------------------------------------------------------------------
+    # 1) Look up job openings
+    # ------------------------------------------------------------------
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(job_search_url, headers=headers, params=query_params) as resp:
+                if resp.status == 200:
+                    job_result = await resp.json()
+                    jobs = job_result.get("results") or job_result.get("jobs") or []
+                elif resp.status == 429:
+                    logger.warning("Rate limit exceeded on job search")
+                    await asyncio.sleep(30)
+                    return []
+                else:
+                    error_text = await resp.text()
+                    logger.error("Job search error %s: %s", resp.status, error_text)
+                    return []
+    except Exception:
+        logger.exception("Exception while searching jobs on Proxycurl")
+        return []
+    # ------------------------------------------------------------------
+    # 2) For each job, find leads for specified hiring manager roles
+    # ------------------------------------------------------------------
+    for job in jobs:
+        company = job.get("company", {}) if isinstance(job, dict) else {}
+        company_name = company.get("name", "")
+        company_url = company.get("url", "")
+        if not company_name:
+            continue
+        for role in hiring_manager_roles:
+            employee_params = {
+                "url": company_url,
+                "role_search": role,
+                "employment_status": "current",
+                "page_size": 1,
+            }
+            employees = []
+            try:
+                async with aiohttp.ClientSession() as session:
+                    async with session.get(
+                        "https://enrichlayer.com/api/v2/company/employees",
+                        headers=headers,
+                        params=employee_params,
+                    ) as e_resp:
+                        if e_resp.status == 200:
+                            data = await e_resp.json()
+                            employees = data.get("employees") or data.get("profiles") or []
+                        elif e_resp.status == 429:
+                            logger.warning("Rate limit exceeded while fetching employees")
+                            await asyncio.sleep(30)
+                            continue
+            except Exception:
+                logger.exception("Exception while fetching employees from Proxycurl")
+                continue
+            for emp in employees:
+                profile_url = emp.get("linkedin_profile_url") or emp.get("profile_url")
+                if not profile_url:
+                    continue
+                person = await enrich_person_info_from_proxycurl(
+                    linkedin_url=profile_url, tool_config=tool_config
+                )
+                if not person or person.get("error"):
+                    continue
+                lead = {
+                    "first_name": person.get("first_name", ""),
+                    "last_name": person.get("last_name", ""),
+                    "full_name": person.get("full_name", ""),
+                    "user_linkedin_url": profile_url,
+                    "job_title": person.get("occupation", role),
+                    "organization_name": company_name,
+                    "organization_linkedin_url": company_url,
+                }
+                cleaned = cleanup_properties(lead)
+                if cleaned:
+                    leads.append(cleaned)
+    logger.info("Returning %d leads from Proxycurl job search", len(leads))
+    return leads

dhisana 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

dhisana 0.0.1.dev116py3-none-any.whl → 0.0.1.dev236py3-none-any.whl