PyPI - dhisana - Versions diffs - 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl - Mend

dhisana 0.0.1.dev116py3-none-any.whl → 0.0.1.dev236py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

dhisana/schemas/common.py +10 -1
dhisana/schemas/sales.py +203 -22
dhisana/utils/add_mapping.py +0 -2
dhisana/utils/apollo_tools.py +739 -119
dhisana/utils/built_with_api_tools.py +4 -2
dhisana/utils/check_email_validity_tools.py +35 -18
dhisana/utils/check_for_intent_signal.py +1 -2
dhisana/utils/check_linkedin_url_validity.py +34 -8
dhisana/utils/clay_tools.py +3 -2
dhisana/utils/clean_properties.py +1 -4
dhisana/utils/compose_salesnav_query.py +0 -1
dhisana/utils/compose_search_query.py +7 -3
dhisana/utils/composite_tools.py +0 -1
dhisana/utils/dataframe_tools.py +2 -2
dhisana/utils/email_body_utils.py +72 -0
dhisana/utils/email_provider.py +174 -35
dhisana/utils/enrich_lead_information.py +183 -53
dhisana/utils/fetch_openai_config.py +129 -0
dhisana/utils/field_validators.py +1 -1
dhisana/utils/g2_tools.py +0 -1
dhisana/utils/generate_content.py +0 -1
dhisana/utils/generate_email.py +68 -23
dhisana/utils/generate_email_response.py +294 -46
dhisana/utils/generate_flow.py +0 -1
dhisana/utils/generate_linkedin_connect_message.py +9 -2
dhisana/utils/generate_linkedin_response_message.py +137 -66
dhisana/utils/generate_structured_output_internal.py +317 -164
dhisana/utils/google_custom_search.py +150 -44
dhisana/utils/google_oauth_tools.py +721 -0
dhisana/utils/google_workspace_tools.py +278 -54
dhisana/utils/hubspot_clearbit.py +3 -1
dhisana/utils/hubspot_crm_tools.py +718 -272
dhisana/utils/instantly_tools.py +3 -1
dhisana/utils/lusha_tools.py +10 -7
dhisana/utils/mailgun_tools.py +150 -0
dhisana/utils/microsoft365_tools.py +447 -0
dhisana/utils/openai_assistant_and_file_utils.py +121 -177
dhisana/utils/openai_helpers.py +8 -6
dhisana/utils/parse_linkedin_messages_txt.py +1 -3
dhisana/utils/profile.py +37 -0
dhisana/utils/proxy_curl_tools.py +377 -76
dhisana/utils/proxycurl_search_leads.py +426 -0
dhisana/utils/research_lead.py +3 -3
dhisana/utils/sales_navigator_crawler.py +1 -6
dhisana/utils/salesforce_crm_tools.py +323 -50
dhisana/utils/search_router.py +131 -0
dhisana/utils/search_router_jobs.py +51 -0
dhisana/utils/sendgrid_tools.py +126 -91
dhisana/utils/serarch_router_local_business.py +75 -0
dhisana/utils/serpapi_additional_tools.py +290 -0
dhisana/utils/serpapi_google_jobs.py +117 -0
dhisana/utils/serpapi_google_search.py +188 -0
dhisana/utils/serpapi_local_business_search.py +129 -0
dhisana/utils/serpapi_search_tools.py +360 -432
dhisana/utils/serperdev_google_jobs.py +125 -0
dhisana/utils/serperdev_local_business.py +154 -0
dhisana/utils/serperdev_search.py +233 -0
dhisana/utils/smtp_email_tools.py +178 -18
dhisana/utils/test_connect.py +1603 -130
dhisana/utils/trasform_json.py +3 -3
dhisana/utils/web_download_parse_tools.py +0 -1
dhisana/utils/zoominfo_tools.py +2 -3
dhisana/workflow/test.py +1 -1
{dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +1 -1
dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
{dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
dhisana-0.0.1.dev116.dist-info/RECORD +0 -83
{dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
{dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0

dhisana/utils/serpapi_search_tools.py CHANGED Viewed

@@ -1,385 +1,150 @@
 import json
-import os
 import re
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set
 from urllib.parse import urlparse
+import urllib.parse
 import aiohttp
 from bs4 import BeautifulSoup
 import urllib
+from pydantic import BaseModel
-from dhisana.utils.assistant_tool_tag import assistant_tool
-from dhisana.utils.cache_output_tools import cache_output, retrieve_output
-from dhisana.utils.web_download_parse_tools import fetch_html_content, get_html_content_from_url
+from dhisana.utils.serperdev_search import search_google_serper
+from dhisana.utils.generate_structured_output_internal import (
+    get_structured_output_internal,
+)
 import logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+from dhisana.utils.search_router import search_google_with_tools
+from dhisana.utils.assistant_tool_tag import assistant_tool
-def get_serp_api_access_token(tool_config: Optional[List[Dict]] = None) -> str:
-    """
-    Retrieves the SERPAPI_KEY access token from the provided tool configuration.
-    Args:
-        tool_config (list): A list of dictionaries containing the tool configuration.
-                            Each dictionary should have a "name" key and a "configuration" key,
-                            where "configuration" is a list of dictionaries containing "name" and "value" keys.
-    Returns:
-        str: The SERPAPI_KEY access token.
-    Raises:
-        ValueError: If the access token is not found in the tool configuration or environment variable.
-    """
-    logger.info("Entering get_serp_api_access_token")
-    SERPAPI_KEY = None
-    if tool_config:
-        logger.debug(f"Tool config provided: {tool_config}")
-        serpapi_config = next(
-            (item for item in tool_config if item.get("name") == "serpapi"), None
-        )
-        if serpapi_config:
-            config_map = {
-                item["name"]: item["value"]
-                for item in serpapi_config.get("configuration", [])
-                if item
-            }
-            SERPAPI_KEY = config_map.get("apiKey")
-        else:
-            logger.warning("No 'serpapi' config item found in tool_config.")
-    else:
-        logger.debug("No tool_config provided or it's None.")
-    SERPAPI_KEY = SERPAPI_KEY or os.getenv("SERPAPI_KEY")
-    if not SERPAPI_KEY:
-        logger.error("SERPAPI_KEY not found in configuration or environment.")
-        raise ValueError("SERPAPI_KEY access token not found in tool_config or environment variable")
-    logger.info("Retrieved SERPAPI_KEY successfully.")
-    return SERPAPI_KEY
-@assistant_tool
-async def search_google(
-    query: str,
-    number_of_results: int = 10,
-    offset: int = 0,
-    tool_config: Optional[List[Dict]] = None,
-    as_oq: Optional[str] = None  # <-- NEW PARAM for optional keywords
-) -> List[str]:
-    """
-    Search Google using SERP API, supporting pagination and an explicit 'offset'
-    parameter to start from a specific result index.
-    Now also supports 'as_oq' for optional query terms in SERP API.
-    Parameters:
-    - query (str): The search query.
-    - number_of_results (int): The total number of results to return. Default is 10.
-    - offset (int): The starting index for the first result returned (Google pagination).
-    - tool_config (Optional[List[Dict]]): Configuration containing SERP API token, etc.
-    - as_oq (Optional[str]): Optional query terms for SerpAPI (if supported).
-    Returns:
-    - List[str]: A list of organic search results, each serialized as a JSON string.
-    """
-    logger.info("Entering search_google")
-    if not query:
-        logger.warning("Empty query string provided.")
-        return []
-    # Use 'as_oq' in the cache key too, so different optional terms don't conflict
-    cache_key = f"{query}_{number_of_results}_{offset}_{as_oq or ''}"
-    cached_response = retrieve_output("search_google_serp", cache_key)
-    if cached_response is not None:
-        logger.info("Cache hit for search_google.")
-        return cached_response
-    SERPAPI_KEY = get_serp_api_access_token(tool_config)
-    url = "https://serpapi.com/search"
-    page_size = 100
-    all_results: List[Dict[str, Any]] = []
-    start_index = offset
-    logger.debug(f"Requesting up to {number_of_results} results for '{query}' starting at offset {offset}.")
-    async with aiohttp.ClientSession() as session:
-        while len(all_results) < number_of_results:
-            to_fetch = min(page_size, number_of_results - len(all_results))
-            params = {
-                "q": query,
-                "num": to_fetch,
-                "start": start_index,
-                "api_key": SERPAPI_KEY,
-                "engine": "google",
-                "location": "United States"
-            }
-            # If we have optional terms, add them
-            if as_oq:
-                params["as_oq"] = as_oq
-            logger.debug(f"SERP API GET request with params: {params}")
-            try:
-                async with session.get(url, params=params) as response:
-                    logger.debug(f"Received response status: {response.status}")
-                    if response.status != 200:
-                        try:
-                            error_content = await response.json()
-                        except Exception:
-                            error_content = await response.text()
-                        logger.warning(f"Non-200 response from SERP API: {error_content}")
-                        return [json.dumps({"error": error_content})]
-                    result = await response.json()
-            except Exception as e:
-                logger.exception("Exception during SERP API request.")
-                return [json.dumps({"error": str(e)})]
-            organic_results = result.get('organic_results', [])
-            if not organic_results:
-                logger.debug("No more organic results returned; stopping.")
-                break
-            all_results.extend(organic_results)
-            start_index += to_fetch
-            if len(all_results) >= number_of_results:
-                break
-    all_results = all_results[:number_of_results]
-    logger.info(f"Found {len(all_results)} results for query '{query}'.")
-    serialized_results = [json.dumps(item) for item in all_results]
-    cache_output("search_google_serp", cache_key, serialized_results)
-    return serialized_results
-@assistant_tool
-async def search_google_maps(
-    query: str,
-    number_of_results: int = 3,
-    tool_config: Optional[List[Dict]] = None
-) -> List[str]:
-    """
-    Search Google Maps using SERP API and return the results as an array of serialized JSON strings.
-    Parameters:
-    - query (str): The search query.
-    - number_of_results (int): The number of results to return.
-    """
-    logger.info("Entering search_google_maps")
-    if not query:
-        logger.warning("Empty query string provided for search_google_maps.")
-        return []
-    SERPAPI_KEY = get_serp_api_access_token(tool_config)
-    params = {
-        "q": query,
-        "num": number_of_results,
-        "api_key": SERPAPI_KEY,
-        "engine": "google_maps"
-    }
-    url = "https://serpapi.com/search"
-    logger.debug(f"Searching Google Maps with params: {params}")
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(url, params=params) as response:
-                logger.debug(f"Received status: {response.status}")
-                result = await response.json()
-                if response.status != 200:
-                    logger.warning(f"Non-200 response from SERP API: {result}")
-                    return [json.dumps({"error": result})]
-                serialized_results = [json.dumps(item) for item in result.get('local_results', [])]
-                logger.info(f"Returning {len(serialized_results)} map results.")
-                return serialized_results
-    except Exception as e:
-        logger.exception("Exception during search_google_maps request.")
-        return [json.dumps({"error": str(e)})]
+from dhisana.utils.web_download_parse_tools import fetch_html_content
-@assistant_tool
-async def search_google_news(
-    query: str,
-    number_of_results: int = 3,
-    tool_config: Optional[List[Dict]] = None
-) -> List[str]:
-    """
-    Search Google News using SERP API and return the results as an array of serialized JSON strings.
-    Parameters:
-    - query (str): The search query.
-    - number_of_results (int): The number of results to return.
-    """
-    logger.info("Entering search_google_news")
-    if not query:
-        logger.warning("Empty query string provided for search_google_news.")
-        return []
-    SERPAPI_KEY = get_serp_api_access_token(tool_config)
-    params = {
-        "q": query,
-        "num": number_of_results,
-        "api_key": SERPAPI_KEY,
-        "engine": "google_news"
-    }
-    url = "https://serpapi.com/search"
+class LeadSearchResult(BaseModel):
+    first_name: str = ""
+    last_name: str = ""
+    full_name: str = ""
+    job_title: str = ""
+    linkedin_follower_count: int = 0
+    lead_location: str = ""
+    summary_about_lead: str = ""
+    user_linkedin_url: str = ""
-    logger.debug(f"Searching Google News with params: {params}")
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(url, params=params) as response:
-                logger.debug(f"Received status: {response.status}")
-                result = await response.json()
-                if response.status != 200:
-                    logger.warning(f"Non-200 response from SERP API: {result}")
-                    return [json.dumps({"error": result})]
-                serialized_results = [json.dumps(item) for item in result.get('news_results', [])]
-                logger.info(f"Returning {len(serialized_results)} news results.")
-                return serialized_results
-    except Exception as e:
-        logger.exception("Exception during search_google_news request.")
-        return [json.dumps({"error": str(e)})]
+class LinkedinCandidateChoice(BaseModel):
+    chosen_link: str = ""
+    confidence: float = 0.0
+    reasoning: str = ""
-@assistant_tool
-async def search_job_postings(
-    query: str,
-    number_of_results: int,
-    tool_config: Optional[List[Dict]] = None
-) -> List[str]:
-    """
-    Search for job postings using SERP API and return the results as an array of serialized JSON strings.
-    Parameters:
-    - query (str): The search query.
-    - number_of_results (int): The number of results to return.
-    """
-    logger.info("Entering search_job_postings")
-    if not query:
-        logger.warning("Empty query string provided for search_job_postings.")
-        return []
-    SERPAPI_KEY = get_serp_api_access_token(tool_config)
-    params = {
-        "q": query,
-        "num": number_of_results,
-        "api_key": SERPAPI_KEY,
-        "engine": "google_jobs"
-    }
-    url = "https://serpapi.com/search"
+async def get_structured_output(text: str, tool_config: Optional[List[Dict]] = None) -> LeadSearchResult:
+    """Parse text snippet into ``LeadSearchResult`` using OpenAI."""
-    logger.debug(f"Searching Google Jobs with params: {params}")
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(url, params=params) as response:
-                logger.debug(f"Received status: {response.status}")
-                result = await response.json()
-                if response.status != 200:
-                    logger.warning(f"Non-200 response from SERP API: {result}")
-                    return [json.dumps({"error": result})]
-                serialized_results = [json.dumps(item) for item in result.get('jobs_results', [])]
-                logger.info(f"Returning {len(serialized_results)} job posting results.")
-                return serialized_results
-    except Exception as e:
-        logger.exception("Exception during search_job_postings request.")
-        return [json.dumps({"error": str(e)})]
+    prompt = (
+        "Extract lead details from the text below.\n"
+        "If follower counts are mentioned, convert values like '1.5k+ followers' to an integer (e.g. 1500).\n"
+        f"Return JSON matching this schema:\n{json.dumps(LeadSearchResult.model_json_schema(), indent=2)}\n\n"
+        f"Text:\n{text}"
+    )
+    result, status = await get_structured_output_internal(
+        prompt, LeadSearchResult, model = "gpt-5.1-chat", tool_config=tool_config
+    )
+    if status != "SUCCESS" or result is None:
+        return LeadSearchResult()
+    return result
 @assistant_tool
-async def search_google_images(
-    query: str,
-    number_of_results: int,
-    tool_config: Optional[List[Dict]] = None
-) -> List[str]:
-    """
-    Search Google Images using SERP API and return the results as an array of serialized JSON strings.
-    Parameters:
-    - query (str): The search query.
-    - number_of_results (int): The number of results to return.
-    """
-    logger.info("Entering search_google_images")
-    if not query:
-        logger.warning("Empty query string provided for search_google_images.")
-        return []
-    SERPAPI_KEY = get_serp_api_access_token(tool_config)
-    params = {
-        "q": query,
-        "num": number_of_results,
-        "api_key": SERPAPI_KEY,
-        "engine": "google_images"
-    }
-    url = "https://serpapi.com/search"
-    logger.debug(f"Searching Google Images with params: {params}")
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(url, params=params) as response:
-                logger.debug(f"Received status: {response.status}")
-                result = await response.json()
-                if response.status != 200:
-                    logger.warning(f"Non-200 response from SERP API: {result}")
-                    return [json.dumps({"error": result})]
-                serialized_results = [json.dumps(item) for item in result.get('images_results', [])]
-                logger.info(f"Returning {len(serialized_results)} image results.")
-                return serialized_results
-    except Exception as e:
-        logger.exception("Exception during search_google_images request.")
-        return [json.dumps({"error": str(e)})]
+async def find_user_linkedin_url_with_serper(
+    user_linkedin_url: str,
+    tool_config: Optional[List[Dict]] = None,
+) -> Optional[Dict]:
+    """Search Google via Serper.dev for ``user_linkedin_url`` and parse lead details."""
-@assistant_tool
-async def search_google_videos(
-    query: str,
-    number_of_results: int,
-    tool_config: Optional[List[Dict]] = None
-) -> List[str]:
-    """
-    Search Google Videos using SERP API and return the results as an array of serialized JSON strings.
-    Parameters:
-    - query (str): The search query.
-    - number_of_results (int): The number of results to return.
-    """
-    logger.info("Entering search_google_videos")
-    if not query:
-        logger.warning("Empty query string provided for search_google_videos.")
-        return []
+    if not user_linkedin_url:
+        return None
-    SERPAPI_KEY = get_serp_api_access_token(tool_config)
-    params = {
-        "q": query,
-        "num": number_of_results,
-        "api_key": SERPAPI_KEY,
-        "engine": "google_videos"
-    }
-    url = "https://serpapi.com/search"
+    normalized_input = extract_user_linkedin_page(user_linkedin_url)
+    results = await search_google_serper(user_linkedin_url, 10, tool_config=tool_config)
+    for item_json in results:
+        try:
+            item = json.loads(item_json)
+        except Exception:
+            continue
+        link = item.get("link", "")
+        if not link:
+            continue
+        if extract_user_linkedin_page(link) == normalized_input:
+            text = " ".join(
+                [item.get("title", ""), item.get("subtitle", ""), item.get("snippet", "")]
+            ).strip()
+            structured = await get_structured_output(text, tool_config=tool_config)
+            structured.user_linkedin_url = normalized_input
+            return json.loads(structured.model_dump_json())
+    return None
+async def pick_best_linkedin_candidate_with_llm(
+    email: str,
+    user_name: str,
+    user_title: str,
+    user_location: str,
+    user_company: str,
+    candidates: List[Dict],
+    tool_config: Optional[List[Dict]] = None,
+) -> Optional[LinkedinCandidateChoice]:
+    """Ask the LLM to assess candidate LinkedIn URLs and pick the best match."""
+    if not candidates:
+        return None
+    candidates_sorted = candidates[-3:]
+    candidate_lines = []
+    for idx, candidate in enumerate(candidates_sorted, start=1):
+        candidate_lines.append(
+            "\n".join(
+                [
+                    f"Candidate {idx}:",
+                    f"  Link: {candidate.get('link', '')}",
+                    f"  Title: {candidate.get('title', '')}",
+                    f"  Snippet: {candidate.get('snippet', '')}",
+                    f"  Subtitle: {candidate.get('subtitle', '')}",
+                    f"  Query: {candidate.get('query', '')}",
+                ]
+            )
+        )
-    logger.debug(f"Searching Google Videos with params: {params}")
-    try:
-        async with aiohttp.ClientSession() as session:
-            async with session.get(url, params=params) as response:
-                logger.debug(f"Received status: {response.status}")
-                result = await response.json()
-                if response.status != 200:
-                    logger.warning(f"Non-200 response from SERP API: {result}")
-                    return [json.dumps({"error": result})]
-                serialized_results = [json.dumps(item) for item in result.get('video_results', [])]
-                logger.info(f"Returning {len(serialized_results)} video results.")
-                return serialized_results
-    except Exception as e:
-        logger.exception("Exception during search_google_videos request.")
-        return [json.dumps({"error": str(e)})]
+    prompt = (
+        "You are validating LinkedIn profile matches for a lead enrichment workflow.\n"
+        "Given the lead context and candidate search results, pick the most likely LinkedIn profile.\n"
+        "If no candidate seems appropriate, return an empty link and confidence 0.\n"
+        "Consider whether the email, name, company, title, or location aligns with the candidate.\n"
+        "Lead context:\n"
+        f"- Email: {email or 'unknown'}\n"
+        f"- Name: {user_name or 'unknown'}\n"
+        f"- Title: {user_title or 'unknown'}\n"
+        f"- Company: {user_company or 'unknown'}\n"
+        f"- Location: {user_location or 'unknown'}\n\n"
+        "Candidates:\n"
+        f"{chr(10).join(candidate_lines)}\n\n"
+        "Return JSON with fields: chosen_link (string), confidence (0-1 float), reasoning (short string)."
+    )
+    result, status = await get_structured_output_internal(
+        prompt,
+        LinkedinCandidateChoice,
+        model="gpt-5.1-chat",
+        tool_config=tool_config,
+    )
+    if status != "SUCCESS" or result is None:
+        return None
+    return result
 @assistant_tool
@@ -389,14 +154,7 @@ async def get_company_domain_from_google_search(
     tool_config: Optional[List[Dict]] = None
 ) -> str:
     """
-    Tries to find the company domain from the company name using Google search.
-    Args:
-        company_name (str): The name of the company to search for.
-        location (str, optional): A location to include in the query.
-    Returns:
-        str: The domain of the company's official website if found, otherwise an empty string.
+    Tries to find the company domain from the company name using Google (SerpAPI or Serper.dev).
     """
     logger.info("Entering get_company_domain_from_google_search")
@@ -405,22 +163,21 @@ async def get_company_domain_from_google_search(
         logger.debug("Invalid or excluded company_name provided.")
         return ""
-    exclude_company_names = ["linkedin", "wikipedia", "facebook", "instagram", "twitter", "youtube", "netflix", "zoominfo", "reditt"]
     query = f"\"{company_name}\" official website"
     if location:
         query = f"\"{company_name}\" official website, {location}"
     try:
         logger.debug(f"Performing search with query: {query}")
-        result = await search_google(query, 1, tool_config=tool_config)
+        result = await search_google_with_tools(query, 1, tool_config=tool_config)
         if not isinstance(result, list) or len(result) == 0:
             logger.debug("No results for first attempt, retrying with fallback query.")
             query = f"{company_name} official website"
-            result = await search_google(query, 1, tool_config=tool_config)
+            result = await search_google_with_tools(query, 1, tool_config=tool_config)
             if not isinstance(result, list) or len(result) == 0:
                 logger.debug("No results from fallback query either.")
                 return ''
-    except Exception as e:
+    except Exception:
         logger.exception("Exception during get_company_domain_from_google_search.")
         return ''
@@ -472,16 +229,6 @@ async def get_signal_strength(
     """
     Find how strong a match for the keywords in search is by checking
     how many search results contain all desired keywords in the snippet.
-    Args:
-        domain_to_search (str): The domain to search inside.
-        keywords (List[str]): The keywords to search for.
-        in_title (List[str]): Keywords that must appear in the title.
-        not_in_title (List[str]): Keywords that must not appear in the title.
-        negative_keywords (List[str]): Keywords to exclude from results.
-    Returns:
-        int: A strength score on a scale of 0 to 5.
     """
     logger.info("Entering get_signal_strength")
@@ -508,8 +255,8 @@ async def get_signal_strength(
     logger.debug(f"Performing get_signal_strength search with query: {final_query}")
     try:
-        results = await search_google(final_query, 5, tool_config=tool_config)
-    except Exception as e:
+        results = await search_google_with_tools(final_query, 5, tool_config=tool_config)
+    except Exception:
         logger.exception("Exception occurred while searching for signal strength.")
         return 0
@@ -518,9 +265,9 @@ async def get_signal_strength(
         return 0
     score = 0
-    for result in results:
+    for result_item in results:
         try:
-            result_json = json.loads(result)
+            result_json = json.loads(result_item)
             snippet_text = result_json.get('snippet', '').lower()
             if all(kw.lower() in snippet_text for kw in keywords):
                 logger.debug(f"Found match in snippet: {snippet_text[:60]}...")
@@ -544,8 +291,8 @@ def extract_user_linkedin_page(url: str) -> str:
     if not url:
         return ""
-    normalized_url = re.sub(r"(https?://)?([\w\-]+\.)?linkedin\.com", "https://www.linkedin.com", url)
-    match = re.match(r"https://www.linkedin.com/in/([\w\-]+)", normalized_url)
+    normalized_url = re.sub(r"^(https?://)?([\w\-]+\.)?linkedin\.com", "https://www.linkedin.com", url)
+    match = re.match(r"https://www\.linkedin\.com/in/([^/?#]+)", normalized_url)
     if match:
         page = f"https://www.linkedin.com/in/{match.group(1)}"
         logger.debug(f"Extracted user LinkedIn page: {page}")
@@ -567,16 +314,6 @@ async def find_user_linkedin_url_google(
 ) -> str:
     """
     Find the LinkedIn URL for a user based on their name, title, location, and company.
-    Args:
-        user_name (str): The name of the user.
-        user_title (str): The title of the user.
-        user_location (str): The location of the user.
-        user_company (str): The company of the user.
-        use_strict_check (bool): Whether to use a strict single query or a series of relaxed queries.
-    Returns:
-        str: The LinkedIn URL if found, otherwise an empty string.
     """
     logger.info("Entering find_user_linkedin_url_google")
@@ -596,14 +333,14 @@ async def find_user_linkedin_url_google(
             f'site:linkedin.com/in "{user_name}" intitle:"{user_name}"'
         ]
-    async with aiohttp.ClientSession() as session:  # Not strictly necessary here, but kept for parallel structure
+    async with aiohttp.ClientSession() as session:
         for query in queries:
             if not query.strip():
                 continue
             logger.debug(f"Searching with query: {query}")
             try:
-                results = await search_google(query.strip(), 1, tool_config=tool_config)
-            except Exception as e:
+                results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
+            except Exception:
                 logger.exception("Error searching for LinkedIn user URL.")
                 continue
@@ -632,6 +369,221 @@ async def find_user_linkedin_url_google(
     return ""
+@assistant_tool
+async def find_user_linkedin_url_by_email_google(
+    email: str,
+    user_name: str = "",
+    user_title: str = "",
+    user_location: str = "",
+    user_company: str = "",
+    tool_config: Optional[List[Dict]] = None,
+) -> Optional[Dict[str, Any]]:
+    """
+    Find the LinkedIn URL for a user based primarily on their email address.
+    Additional profile hints (name, title, location, company) improve query precision
+    when supplied. Returns a dict with the best LinkedIn URL, LLM confidence score,
+    and short reasoning when a match clears the confidence threshold; otherwise ``None``.
+    """
+    logger.info("Entering find_user_linkedin_url_by_email_google")
+    if not email:
+        logger.warning("No email provided.")
+        return None
+    normalized_email = email.strip().lower()
+    email_local_part = normalized_email.split("@")[0] if "@" in normalized_email else normalized_email
+    email_local_humanized = re.sub(r"[._-]+", " ", email_local_part).strip()
+    queries: List[str] = []
+    def add_query(query: str) -> None:
+        query = query.strip()
+        if query and query not in queries:
+            queries.append(query)
+    def add_query_parts(*parts: str) -> None:
+        tokens = [part.strip() for part in parts if part and part.strip()]
+        if not tokens:
+            return
+        add_query(" ".join(tokens))
+    enriched_terms = []
+    if user_name:
+        enriched_terms.append(f'"{user_name}"')
+    if user_company:
+        enriched_terms.append(f'"{user_company}"')
+    if user_title:
+        enriched_terms.append(f'"{user_title}"')
+    if user_location:
+        enriched_terms.append(f'"{user_location}"')
+    base_hint = " ".join(enriched_terms)
+    # Prioritise the direct email search variants before broader fallbacks.
+    add_query_parts(normalized_email, "linkedin.com/in", base_hint)
+    add_query_parts(normalized_email, "linkedin.com", base_hint)
+    add_query_parts(normalized_email, "linkedin", base_hint)
+    add_query_parts(normalized_email, base_hint)
+    add_query(f'"{normalized_email}" "linkedin.com/in" {base_hint}')
+    add_query(f'"{normalized_email}" "linkedin.com" {base_hint}')
+    add_query(f'"{normalized_email}" linkedin {base_hint}')
+    if email_local_part and email_local_part != normalized_email:
+        add_query_parts(email_local_part, "linkedin.com/in", base_hint)
+        add_query_parts(email_local_part, "linkedin.com", base_hint)
+        add_query_parts(email_local_part, "linkedin", base_hint)
+        add_query(f'"{email_local_part}" "linkedin.com/in" {base_hint}')
+        add_query(f'"{email_local_part}" "linkedin.com" {base_hint}')
+    if email_local_humanized and email_local_humanized not in {email_local_part, normalized_email}:
+        add_query_parts(email_local_humanized, "linkedin", base_hint)
+        add_query(f'"{email_local_humanized}" linkedin {base_hint}')
+    if normalized_email:
+        add_query(f'site:linkedin.com/in "{normalized_email}" {base_hint}')
+    if email_local_part:
+        add_query(f'site:linkedin.com/in "{email_local_part}" {base_hint}')
+    if email_local_humanized and email_local_humanized != email_local_part:
+        add_query(f'site:linkedin.com/in "{email_local_humanized}" {base_hint}')
+    if base_hint:
+        lookup_hint = user_name or email_local_humanized or email_local_part or normalized_email
+        add_query(
+            f'site:linkedin.com/in "{normalized_email}" {base_hint} '
+            f'intitle:"{lookup_hint}" -intitle:"profiles"'
+        )
+        if email_local_humanized:
+            add_query(
+                f'site:linkedin.com/in "{email_local_humanized}" {base_hint} '
+                f'intitle:"{lookup_hint}" -intitle:"profiles"'
+            )
+    candidate_records: List[Dict[str, str]] = []
+    seen_links: Set[str] = set()
+    best_llm_choice: Optional[LinkedinCandidateChoice] = None
+    best_llm_link: str = ""
+    HIGH_CONFIDENCE_THRESHOLD = 0.8
+    MIN_CONFIDENCE_THRESHOLD = 0.75
+    async def evaluate_with_llm() -> Optional[LinkedinCandidateChoice]:
+        nonlocal best_llm_choice, best_llm_link
+        llm_choice = await pick_best_linkedin_candidate_with_llm(
+            email=email,
+            user_name=user_name,
+            user_title=user_title,
+            user_location=user_location,
+            user_company=user_company,
+            candidates=candidate_records,
+            tool_config=tool_config,
+        )
+        if not llm_choice or not llm_choice.chosen_link:
+            return None
+        chosen_link = extract_user_linkedin_page(llm_choice.chosen_link)
+        if not chosen_link:
+            return None
+        llm_choice.chosen_link = chosen_link
+        if best_llm_choice is None or llm_choice.confidence > best_llm_choice.confidence:
+            best_llm_choice = llm_choice
+            best_llm_link = chosen_link
+            logger.debug(
+                "LLM updated best candidate: %s (confidence %.2f) reason: %s",
+                chosen_link,
+                llm_choice.confidence,
+                llm_choice.reasoning,
+            )
+        if llm_choice.confidence >= HIGH_CONFIDENCE_THRESHOLD:
+            logger.info(
+                "Returning LinkedIn user page by email via LLM scoring: %s (confidence %.2f)",
+                chosen_link,
+                llm_choice.confidence,
+            )
+            return llm_choice
+        return None
+    async with aiohttp.ClientSession() as session:
+        for query in queries:
+            query = query.strip()
+            if not query:
+                continue
+            logger.debug(f"Searching with query: {query}")
+            try:
+                results = await search_google_with_tools(query, 5, tool_config=tool_config)
+            except Exception:
+                logger.exception("Error searching for LinkedIn user URL by email.")
+                continue
+            if not isinstance(results, list) or len(results) == 0:
+                logger.debug("No results for this query, moving to next.")
+                continue
+            for result_item in results:
+                try:
+                    result_json = json.loads(result_item)
+                except (json.JSONDecodeError, IndexError):
+                    logger.debug("Failed to parse JSON from the search result.")
+                    continue
+                link = result_json.get('link', '')
+                if not link:
+                    continue
+                parsed_url = urlparse(link)
+                if 'linkedin.com/in' in (parsed_url.netloc + parsed_url.path):
+                    link = extract_user_linkedin_page(link)
+                    if not link or link in seen_links:
+                        continue
+                    title = result_json.get('title', '')
+                    snippet = result_json.get('snippet', '')
+                    subtitle = result_json.get('subtitle', '')
+                    candidate_records.append(
+                        {
+                            "link": link,
+                            "title": title,
+                            "snippet": snippet,
+                            "subtitle": subtitle,
+                            "query": query,
+                        }
+                    )
+                    if len(candidate_records) > 6:
+                        candidate_records.pop(0)
+                    seen_links.add(link)
+                    high_conf_choice = await evaluate_with_llm()
+                    if high_conf_choice:
+                        return {
+                            "linkedin_url": high_conf_choice.chosen_link,
+                            "confidence": high_conf_choice.confidence,
+                            "reasoning": high_conf_choice.reasoning,
+                        }
+    if best_llm_choice and best_llm_link and best_llm_choice.confidence >= MIN_CONFIDENCE_THRESHOLD:
+        logger.info(
+            "Returning LinkedIn user page by email via LLM scoring (best overall): %s (confidence %.2f)",
+            best_llm_link,
+            best_llm_choice.confidence,
+        )
+        return {
+            "linkedin_url": best_llm_link,
+            "confidence": best_llm_choice.confidence,
+            "reasoning": best_llm_choice.reasoning,
+        }
+    logger.info("No matching LinkedIn user page found using email queries.")
+    return None
 @assistant_tool
 async def find_user_linkedin_url_by_job_title_google(
     user_title: str,
@@ -641,14 +593,6 @@ async def find_user_linkedin_url_by_job_title_google(
 ) -> str:
     """
     Find the LinkedIn URL for a user based on their job_title, location, and company.
-    Args:
-        user_title (str): The title of the user.
-        user_location (str): The location of the user.
-        user_company (str): The company of the user.
-    Returns:
-        str: The LinkedIn URL if found, otherwise an empty string.
     """
     logger.info("Entering find_user_linkedin_url_by_job_title_google")
@@ -656,15 +600,15 @@ async def find_user_linkedin_url_by_job_title_google(
         f'site:linkedin.com/in "{user_company}" AND "{user_title}" -intitle:"profiles" ',
     ]
-    async with aiohttp.ClientSession() as session:
+    async with aiohttp.ClientSession() as session:
         for query in queries:
             if not query.strip():
                 continue
             logger.debug(f"Searching with query: {query}")
             try:
-                results = await search_google(query.strip(), 1, tool_config=tool_config)
-            except Exception as e:
+                results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
+            except Exception:
                 logger.exception("Error searching for LinkedIn URL by job title.")
                 continue
@@ -701,14 +645,6 @@ async def find_user_linkedin_url_by_google_search(
 ) -> List[str]:
     """
     Find LinkedIn user URLs based on provided Google search queries.
-    Args:
-        queries (List[str]): A list of Google search queries.
-        number_of_results (int): Number of results to return from each query (default is 5).
-        tool_config (Optional[List[Dict]]): Optional configuration for the SERP API.
-    Returns:
-        List[str]: A list of matching LinkedIn user URLs found, or an empty list if none.
     """
     logger.info("Entering find_user_linkedin_url_by_google_search")
     found_urls = []
@@ -719,8 +655,8 @@ async def find_user_linkedin_url_by_google_search(
         logger.debug(f"Searching with query: {query}")
         try:
-            results = await search_google(query.strip(), number_of_results, tool_config=tool_config)
-        except Exception as e:
+            results = await search_google_with_tools(query.strip(), number_of_results, tool_config=tool_config)
+        except Exception:
             logger.exception("Error searching for LinkedIn URL using Google search.")
             continue
@@ -780,14 +716,6 @@ async def find_organization_linkedin_url_with_google_search(
 ) -> str:
     """
     Find the LinkedIn URL for a company based on its name and optional location using Google search.
-    Args:
-        company_name (str): The name of the company.
-        company_location (str, optional): The location of the company.
-        use_strict_check (bool): Whether to use stricter or multiple queries.
-    Returns:
-        str: The LinkedIn URL if found, otherwise an empty string.
     """
     logger.info("Entering find_organization_linkedin_url_with_google_search")
@@ -796,7 +724,7 @@ async def find_organization_linkedin_url_with_google_search(
         return ""
     if use_strict_check:
-        queries = [f'site:linkedin.com/company "{company_name}" {company_domain} -intitle:"jobs" ']
+        queries = [f'site:linkedin.com/company "{company_name}" {company_domain} ']
     else:
         if company_location:
             queries = [
@@ -817,8 +745,8 @@ async def find_organization_linkedin_url_with_google_search(
             logger.debug(f"Searching with query: {query}")
             try:
-                results = await search_google(query.strip(), 1, tool_config=tool_config)
-            except Exception as e:
+                results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
+            except Exception:
                 logger.exception("Error searching for organization LinkedIn URL.")
                 continue
@@ -871,7 +799,7 @@ async def get_external_links(url: str) -> List[str]:
                 else:
                     logger.warning(f"Non-200 status ({response.status}) while fetching external links.")
                     return []
-    except Exception as e:
+    except Exception:
         logger.exception("Exception occurred while fetching external links.")
         return []
@@ -883,7 +811,7 @@ async def get_resolved_linkedin_links(url: str) -> List[str]:
     logger.debug(f"Entering get_resolved_linkedin_links for URL: {url}")
     try:
         content = await fetch_html_content(url)
-    except Exception as e:
+    except Exception:
         logger.exception("Exception occurred while fetching HTML content.")
         return []
@@ -907,7 +835,7 @@ async def get_company_website_from_linkedin_url(linkedin_url: str) -> str:
     try:
         links = await get_external_links(linkedin_url)
-    except Exception as e:
+    except Exception:
         logger.exception("Exception occurred while getting external links for LinkedIn URL.")
         return ""

dhisana 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

dhisana 0.0.1.dev116py3-none-any.whl → 0.0.1.dev236py3-none-any.whl