PyPI - dhisana - Versions diffs - 0.0.1.dev243__py3-none-any.whl - Mend

dhisana 0.0.1.dev243__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

dhisana/__init__.py +1 -0
dhisana/cli/__init__.py +1 -0
dhisana/cli/cli.py +20 -0
dhisana/cli/datasets.py +27 -0
dhisana/cli/models.py +26 -0
dhisana/cli/predictions.py +20 -0
dhisana/schemas/__init__.py +1 -0
dhisana/schemas/common.py +399 -0
dhisana/schemas/sales.py +965 -0
dhisana/ui/__init__.py +1 -0
dhisana/ui/components.py +472 -0
dhisana/utils/__init__.py +1 -0
dhisana/utils/add_mapping.py +352 -0
dhisana/utils/agent_tools.py +51 -0
dhisana/utils/apollo_tools.py +1597 -0
dhisana/utils/assistant_tool_tag.py +4 -0
dhisana/utils/built_with_api_tools.py +282 -0
dhisana/utils/cache_output_tools.py +98 -0
dhisana/utils/cache_output_tools_local.py +78 -0
dhisana/utils/check_email_validity_tools.py +717 -0
dhisana/utils/check_for_intent_signal.py +107 -0
dhisana/utils/check_linkedin_url_validity.py +209 -0
dhisana/utils/clay_tools.py +43 -0
dhisana/utils/clean_properties.py +135 -0
dhisana/utils/company_utils.py +60 -0
dhisana/utils/compose_salesnav_query.py +259 -0
dhisana/utils/compose_search_query.py +759 -0
dhisana/utils/compose_three_step_workflow.py +234 -0
dhisana/utils/composite_tools.py +137 -0
dhisana/utils/dataframe_tools.py +237 -0
dhisana/utils/domain_parser.py +45 -0
dhisana/utils/email_body_utils.py +72 -0
dhisana/utils/email_parse_helpers.py +132 -0
dhisana/utils/email_provider.py +375 -0
dhisana/utils/enrich_lead_information.py +933 -0
dhisana/utils/extract_email_content_for_llm.py +101 -0
dhisana/utils/fetch_openai_config.py +129 -0
dhisana/utils/field_validators.py +426 -0
dhisana/utils/g2_tools.py +104 -0
dhisana/utils/generate_content.py +41 -0
dhisana/utils/generate_custom_message.py +271 -0
dhisana/utils/generate_email.py +278 -0
dhisana/utils/generate_email_response.py +465 -0
dhisana/utils/generate_flow.py +102 -0
dhisana/utils/generate_leads_salesnav.py +303 -0
dhisana/utils/generate_linkedin_connect_message.py +224 -0
dhisana/utils/generate_linkedin_response_message.py +317 -0
dhisana/utils/generate_structured_output_internal.py +462 -0
dhisana/utils/google_custom_search.py +267 -0
dhisana/utils/google_oauth_tools.py +727 -0
dhisana/utils/google_workspace_tools.py +1294 -0
dhisana/utils/hubspot_clearbit.py +96 -0
dhisana/utils/hubspot_crm_tools.py +2440 -0
dhisana/utils/instantly_tools.py +149 -0
dhisana/utils/linkedin_crawler.py +168 -0
dhisana/utils/lusha_tools.py +333 -0
dhisana/utils/mailgun_tools.py +156 -0
dhisana/utils/mailreach_tools.py +123 -0
dhisana/utils/microsoft365_tools.py +455 -0
dhisana/utils/openai_assistant_and_file_utils.py +267 -0
dhisana/utils/openai_helpers.py +977 -0
dhisana/utils/openapi_spec_to_tools.py +45 -0
dhisana/utils/openapi_tool/__init__.py +1 -0
dhisana/utils/openapi_tool/api_models.py +633 -0
dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
dhisana/utils/openapi_tool/openapi_tool.py +319 -0
dhisana/utils/parse_linkedin_messages_txt.py +100 -0
dhisana/utils/profile.py +37 -0
dhisana/utils/proxy_curl_tools.py +1226 -0
dhisana/utils/proxycurl_search_leads.py +426 -0
dhisana/utils/python_function_to_tools.py +83 -0
dhisana/utils/research_lead.py +176 -0
dhisana/utils/sales_navigator_crawler.py +1103 -0
dhisana/utils/salesforce_crm_tools.py +477 -0
dhisana/utils/search_router.py +131 -0
dhisana/utils/search_router_jobs.py +51 -0
dhisana/utils/sendgrid_tools.py +162 -0
dhisana/utils/serarch_router_local_business.py +75 -0
dhisana/utils/serpapi_additional_tools.py +290 -0
dhisana/utils/serpapi_google_jobs.py +117 -0
dhisana/utils/serpapi_google_search.py +188 -0
dhisana/utils/serpapi_local_business_search.py +129 -0
dhisana/utils/serpapi_search_tools.py +852 -0
dhisana/utils/serperdev_google_jobs.py +125 -0
dhisana/utils/serperdev_local_business.py +154 -0
dhisana/utils/serperdev_search.py +233 -0
dhisana/utils/smtp_email_tools.py +582 -0
dhisana/utils/test_connect.py +2087 -0
dhisana/utils/trasform_json.py +173 -0
dhisana/utils/web_download_parse_tools.py +189 -0
dhisana/utils/workflow_code_model.py +5 -0
dhisana/utils/zoominfo_tools.py +357 -0
dhisana/workflow/__init__.py +1 -0
dhisana/workflow/agent.py +18 -0
dhisana/workflow/flow.py +44 -0
dhisana/workflow/task.py +43 -0
dhisana/workflow/test.py +90 -0
dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0

dhisana/utils/check_for_intent_signal.py ADDED Viewed

@@ -0,0 +1,107 @@
+import datetime
+import logging
+from typing import Any, Dict, List, Optional, cast
+from pydantic import BaseModel
+from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
+from dhisana.utils.compose_search_query import (
+    get_search_results_for_insights
+)
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+class IntentSignalScoring(BaseModel):
+    score_based_on_intent_signal: int
+    reasoning_for_score_being_high: str
+    summary_of_lead_and_company: str
+async def check_for_intent_signal(
+    lead: Dict[str, Any],
+    signal_to_look_for_in_plan_english: str,
+    intent_signal_type: str,
+    add_search_results: Optional[bool] = False,
+    tool_config: Optional[List[Dict[str, Any]]] = None
+) -> int:
+    """
+    Evaluate a 'lead' for a specific intent signal and return an integer score from 0–5.
+    """
+    logger.info("check_for_intent_signal called with lead=%s, intent_signal_type=%s", lead.get("full_name"), intent_signal_type)
+    search_results_text = ""
+    if add_search_results:
+        logger.info("Fetching search results for lead='%s' with signal='%s'", lead.get("full_name"), intent_signal_type)
+        search_results = await get_search_results_for_insights(
+            lead=lead,
+            english_description=signal_to_look_for_in_plan_english,
+            intent_signal_type=intent_signal_type,
+            tool_config=tool_config
+        )
+        logger.info("Received search results count: %d", len(search_results))
+        for item in search_results:
+            query_str = item.get("query", "")
+            results_str = item.get("results", "")
+            logger.info("Search query: %s", query_str)
+            logger.info("Search results snippet: %s", results_str[:100])  # Show partial snippet
+            search_results_text += f"Query: {query_str}\nResults: {results_str}\n\n"
+    datetime.datetime.now().isoformat()
+    user_prompt = f"""
+    Hi AI Assistant,
+    You are an expert in scoring leads based on intent signals.
+    You have the following lead and user requirements to provide a  qulifying lead score score between 0 and 5
+    based on the intent signal the user is looking for.
+    Do the following step by step:
+    1. This about the summary of the lead and the company lead is working for.
+    2. Create a summary of the search results obtained.
+    3. Think about the signal user is looking for to qualify and score the lead.
+    4. Use the lead information, summary of search results and signal user is looking for to score the lead.
+    5. Go back and check if the score makes sense. Score between 0-5 based on the confidence of the signal.
+    Lead Data:
+    {lead}
+    Description of the signal user is looking for:
+    {signal_to_look_for_in_plan_english}
+    Following is some search results I found online. Use them if they are relevant for scoring:
+    {search_results_text}
+    Return your answer in valid JSON with the key 'score_based_on_intent_signal'.
+    Make sure it is an integer between 0 and 5.
+    Add small reasoning_for_score_bing_high describing why you gave the score score_based_on_intent_signal as high if you are giving high score.
+    in summary_of_lead_and_company field provide a summary of the lead (like role, experience, tenure, locaion) and details about the company lead is working for currently.
+    """
+    logger.info("Constructed user prompt for LLM.")
+    response_any, status = await get_structured_output_internal(
+        user_prompt,
+        IntentSignalScoring,
+        effort="low",
+        tool_config=tool_config
+    )
+    logger.info("Intent signal scoring call completed with status=%s", status)
+    if status != "SUCCESS" or response_any is None:
+        logger.error("Failed to generate an intent signal score from the LLM.")
+        raise Exception("Failed to generate an intent signal score from the LLM.")
+    response = cast(IntentSignalScoring, response_any)
+    score = response.score_based_on_intent_signal
+    reasoning = response.reasoning_for_score_being_high[:100]  # Show partial if very long
+    lead["qualification_score"] = score
+    lead["qualification_reason"] = response.reasoning_for_score_being_high
+    lead["summary_about_lead"] = response.summary_of_lead_and_company
+    logger.info(
+        "Lead '%s' scored %d for intent signal '%s'. Reason partial: %s",
+        lead.get("full_name", "Unknown"),
+        score,
+        intent_signal_type,
+        reasoning
+    )
+    return score

dhisana/utils/check_linkedin_url_validity.py ADDED Viewed

@@ -0,0 +1,209 @@
+import re
+from typing import Dict, List, Optional, Any
+from pydantic import BaseModel
+from dhisana.utils.apollo_tools import enrich_person_info_from_apollo
+from dhisana.utils.assistant_tool_tag import assistant_tool
+from dhisana.utils.proxy_curl_tools import enrich_person_info_from_proxycurl
+# --------------------------------------------------------------------------------
+# 1. Data Model
+# --------------------------------------------------------------------------------
+class LeadLinkedInMatch(BaseModel):
+    first_name_matched: bool = False
+    last_name_matched: bool = False
+    linkedin_url_valid: bool = False
+    title_matched: bool = False
+    location_matched: bool = False
+# --------------------------------------------------------------------------------
+# 2. Helper: Compare Single Field
+# --------------------------------------------------------------------------------
+def compare_field(
+    lead_properties: Dict[str, Any],
+    person_data: Dict[str, Any],
+    lead_key: str,
+    person_key: str
+) -> bool:
+    if not lead_properties.get(lead_key):
+        # If the lead doesn't have the field at all, let's consider it "matched" by default
+        return True
+    lead_value = lead_properties.get(lead_key, "")
+    person_value = person_data.get(person_key, "")
+    if isinstance(lead_value, str) and isinstance(person_value, str):
+        return lead_value.strip().lower() == person_value.strip().lower()
+    return person_value == lead_value
+# --------------------------------------------------------------------------------
+# 3. Apollo Validation Function
+# --------------------------------------------------------------------------------
+@assistant_tool
+async def validate_linkedin_url_with_apollo(
+    lead_properties: Dict[str, Any],
+    tool_config: Optional[List[Dict]] = None
+) -> Dict[str, bool]:
+    """
+    Validates the LinkedIn URL and user information using the Apollo API.
+    Args:
+        lead_properties (dict): Contains keys like:
+            'first_name', 'last_name', 'job_title', 'lead_location', 'user_linkedin_url'.
+        tool_config (Optional[List[Dict]]): Contains configuration for the Apollo tool.
+    Returns:
+        Dict[str, bool]: A dictionary with matching status:
+            {
+              "first_name_matched": bool,
+              "last_name_matched": bool,
+              "linkedin_url_valid": bool,
+              "title_matched": bool,
+              "location_matched": bool
+            }
+    """
+    linkedin_url = lead_properties.get("user_linkedin_url", "")
+    match_result = LeadLinkedInMatch()
+    linkedin_data = await enrich_person_info_from_apollo(
+        linkedin_url=linkedin_url,
+        tool_config=tool_config
+    )
+    # If no data is returned from Apollo, return defaults
+    if not linkedin_data:
+        return match_result.model_dump()
+    person_data = linkedin_data.get("person", {})
+    # Compare each field systematically
+    match_result.first_name_matched = compare_field(lead_properties, person_data, "first_name", "first_name")
+    match_result.last_name_matched = compare_field(lead_properties, person_data, "last_name", "last_name")
+    match_result.title_matched = compare_field(lead_properties, person_data, "job_title", "title")
+    match_result.location_matched = compare_field(lead_properties, person_data, "lead_location", "location")
+    # If we got data, we consider the LinkedIn URL valid
+    match_result.linkedin_url_valid = True
+    return match_result.model_dump()
+@assistant_tool
+async def validate_linkedin_url_with_proxy_curl(
+    lead_properties: Dict[str, Any],
+    tool_config: Optional[List[Dict]] = None
+) -> Dict[str, bool]:
+    """
+    Validates the LinkedIn URL and user information using the Proxy Curl API.
+    Args:
+        lead_properties (dict): Contains keys like:
+            'first_name', 'last_name', 'job_title', 'lead_location', 'user_linkedin_url'.
+        tool_config (Optional[List[Dict]]): Contains configuration for the Apollo tool.
+    Returns:
+        Dict[str, bool]: A dictionary with matching status:
+            {
+              "first_name_matched": bool,
+              "last_name_matched": bool,
+              "linkedin_url_valid": bool,
+              "title_matched": bool,
+              "location_matched": bool
+            }
+    """
+    linkedin_url = lead_properties.get("user_linkedin_url", "")
+    match_result = LeadLinkedInMatch()
+    linkedin_data = await enrich_person_info_from_proxycurl(
+        linkedin_url=linkedin_url,
+        tool_config=tool_config
+    )
+    # If no data is returned from Proxycurl, return defaults
+    if not linkedin_data:
+        return match_result.model_dump()
+    person_data = linkedin_data
+    # Compare each field systematically
+    match_result.first_name_matched = compare_field(lead_properties, person_data, "first_name", "first_name")
+    match_result.last_name_matched = compare_field(lead_properties, person_data, "last_name", "last_name")
+    match_result.title_matched = compare_field(lead_properties, person_data, "job_title", "occupation")
+    # match_result.location_matched = compare_field(lead_properties, person_data, "lead_location", "location")
+    # If we got data, we consider the LinkedIn URL valid
+    match_result.linkedin_url_valid = True
+    return match_result.model_dump()
+# --------------------------------------------------------------------------------
+# 4. High-Level Validation Router
+# --------------------------------------------------------------------------------
+ALLOWED_CHECK_LINKEDIN_TOOLS = ["apollo", "proxycurl", "zoominfo"]
+LINKEDIN_VALIDATE_TOOL_NAME_TO_FUNCTION_MAP = {
+    "apollo": validate_linkedin_url_with_apollo,
+    "proxycurl": validate_linkedin_url_with_proxy_curl
+}
+def is_proxy_linkedin_url(url: str) -> bool:
+    """
+    Determines if a LinkedIn URL is "proxy-like":
+    specifically, if /in/<profile_id> starts with 'acw' and is > 10 chars total.
+    """
+    match = re.search(r"linkedin\.com/in/([^/]+)", url, re.IGNORECASE)
+    if match:
+        profile_id = match.group(1).strip()
+        if profile_id.startswith("acw") and len(profile_id) > 10:
+            return True
+    return False
+@assistant_tool
+async def check_linkedin_url_validity(
+    lead_properties: Dict[str, Any],
+    tool_config: Optional[List[Dict]] = None
+) -> Dict[str, bool]:
+    """
+    Validates LinkedIn URL (and related fields) by choosing the appropriate tool
+    from the tool_config. If the LinkedIn URL is detected as a "proxy" URL,
+    we skip calling any external tool and directly return 'linkedin_url_valid' = True.
+    Args:
+        lead_properties (dict): Lead info (e.g. first_name, last_name, job_title,
+                                lead_location, user_linkedin_url).
+        tool_config (Optional[List[Dict]]): Configuration to identify which tool is available.
+    Returns:
+        Dict[str, bool]: Standardized response from the chosen validation function.
+    Raises:
+        ValueError: If no tool configuration or no suitable validation tool is found.
+    """
+    if not tool_config:
+        raise ValueError("No tool configuration found.")
+    # ---------------------------------------------------------
+    # 1) If it’s a "proxy" LinkedIn URL, just return valid = True
+    # ---------------------------------------------------------
+    linkedin_url = lead_properties.get("user_linkedin_url", "")
+    if is_proxy_linkedin_url(linkedin_url):
+        match_result = LeadLinkedInMatch()
+        match_result.linkedin_url_valid = True
+        # The other fields remain their default (False) unless
+        # you want to set them otherwise. For now, we just do:
+        return match_result.model_dump()
+    # ---------------------------------------------------------
+    # 2) Otherwise, pick the correct tool and validate normally
+    # ---------------------------------------------------------
+    chosen_tool_func = None
+    for item in tool_config:
+        tool_name = item.get("name")
+        if tool_name in LINKEDIN_VALIDATE_TOOL_NAME_TO_FUNCTION_MAP and tool_name in ALLOWED_CHECK_LINKEDIN_TOOLS:
+            chosen_tool_func = LINKEDIN_VALIDATE_TOOL_NAME_TO_FUNCTION_MAP[tool_name]
+            break
+    if not chosen_tool_func:
+        raise ValueError("No suitable LinkedIn validation tool found in tool_config.")
+    return await chosen_tool_func(lead_properties, tool_config)

dhisana/utils/clay_tools.py ADDED Viewed

@@ -0,0 +1,43 @@
+import aiohttp
+import logging
+from typing import Optional
+from dhisana.utils.assistant_tool_tag import assistant_tool
+@assistant_tool
+async def push_to_clay_table(
+    data: dict,
+    webhook: Optional[str] = None,
+    api_key: Optional[str] = None,
+):
+    """
+    Push data to the Clay webhook.
+    Parameters:
+    - **data** (*dict*): Data to send to the webhook.
+    - **webhook** (*str*, optional): The webhook URL.
+    - **api_key** (*str*, optional): The authentication token.
+    Returns:
+    - **dict**: Response message or error.
+    """
+    if not api_key:
+        return {
+            'error': "Clay integration is not configured. Please configure the connection to Clay in Integrations."
+        }
+    if not webhook:
+        return {'error': "Webhook URL not provided"}
+    headers = {
+        "Content-Type": "application/json",
+        "x-clay-webhook-auth": api_key
+    }
+    async with aiohttp.ClientSession() as session:
+        async with session.post(webhook, headers=headers, json=data) as response:
+            result = await response.text()
+            if response.status == 200:
+                return {'message': result}
+            else:
+                logging.warning(f"push_to_clay_table failed: {result}")
+                return {'error': result}

dhisana/utils/clean_properties.py ADDED Viewed

@@ -0,0 +1,135 @@
+from typing import Any, Dict, List
+import copy
+from typing import Any, Dict, List, Optional
+def remove_empty(data: Any) -> Any:
+    """
+    Recursively remove empty or null-like values from JSON/dict data.
+    - Removes None or 'null' (case-insensitive) strings.
+    - Removes empty strings.
+    - Removes empty lists and dicts.
+    - Returns `None` if the entire structure becomes empty.
+    """
+    if isinstance(data, dict):
+        cleaned_dict: Dict[str, Any] = {}
+        for key, value in data.items():
+            cleaned_value = remove_empty(value)
+            if cleaned_value is not None:
+                cleaned_dict[key] = cleaned_value
+        # Return None if dictionary is empty after cleaning
+        return cleaned_dict if cleaned_dict else None
+    elif isinstance(data, list):
+        cleaned_list: List[Any] = []
+        for item in data:
+            cleaned_item = remove_empty(item)
+            if cleaned_item is not None:
+                cleaned_list.append(cleaned_item)
+        # Return None if list is empty after cleaning
+        return cleaned_list if cleaned_list else None
+    else:
+        # Base/primitive case
+        # Remove None or empty strings or strings "null" (case-insensitive)
+        if data is None:
+            return None
+        if isinstance(data, str):
+            if not data.strip() or data.lower() == "null":
+                return None
+        return data
+def cleanup_properties(properties: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    In-place style: returns a cleaned copy (so the original isn't mutated).
+    """
+    cleaned = remove_empty(properties)
+    return cleaned if cleaned is not None else {}
+def cleanup_email_context(user_properties: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Return a cleaned copy of user_properties:
+      - Removes null/empty values recursively.
+      - Removes fields with keys that look like an 'id' or 'guid'.
+      - Explicitly sets external_openai_vector_store_id to None if present.
+    """
+    clone_context = copy.deepcopy(user_properties)
+    if isinstance(clone_context.get('external_known_data'), dict) \
+       and 'external_openai_vector_store_id' in clone_context['external_known_data']:
+        clone_context['external_known_data']['external_openai_vector_store_id'] = None
+    cleaned = _remove_empty_and_ids(clone_context)
+    return cleaned if cleaned is not None else {}
+def _remove_empty_and_ids(data: Any) -> Optional[Any]:
+    """
+    Recursively remove:
+      - None values
+      - Empty strings or "null" strings (case-insensitive)
+      - Empty lists/dicts
+      - Keys whose names look like IDs (e.g., containing "id" or "guid")
+    Returns None if the resulting object is empty.
+    """
+    if isinstance(data, dict):
+        result: Dict[str, Any] = {}
+        for key, value in data.items():
+            if _is_id_key(key):
+                continue
+            cleaned_value = _remove_empty_and_ids(value)
+            if not _is_empty_value(cleaned_value):
+                result[key] = cleaned_value
+        return result if result else None
+    elif isinstance(data, list):
+        result: List[Any] = []
+        for item in data:
+            cleaned_item = _remove_empty_and_ids(item)
+            if not _is_empty_value(cleaned_item):
+                result.append(cleaned_item)
+        return result if result else None
+    else:
+        if _is_empty_value(data):
+            return None
+        return data
+def _is_id_key(key: str) -> bool:
+    """
+    Identify if a key is ID-like by checking if 'id' or 'guid' appears in its name (case-insensitive),
+    or if it ends with _id, _ids, or _by.
+    """
+    key_lower = key.lower()
+    return (
+        'id' in key_lower
+        or 'guid' in key_lower
+        or key_lower.endswith('_id')
+        or key_lower.endswith('_ids')
+        or key_lower.endswith('_by')
+    )
+def _is_empty_value(value: Any) -> bool:
+    """
+    Determine if a value is considered "empty" for removal.
+    This includes:
+     - None
+     - Empty string
+     - String "null" (case-insensitive)
+     - Empty list or dict
+    """
+    if value is None:
+        return True
+    if isinstance(value, str):
+        if not value.strip() or value.lower() == "null":
+            return True
+    if isinstance(value, (list, dict)) and len(value) == 0:
+        return True
+    return False

dhisana/utils/company_utils.py ADDED Viewed

@@ -0,0 +1,60 @@
+import re
+def normalize_company_name(name: str) -> str:
+    """
+    Normalize a company name while preserving the original letter case of
+    alphanumeric characters. Returns '' if the name is invalid, a common placeholder,
+    or contains disallowed keywords.
+    Steps:
+      1) Reject if name is None, not a string, or in typical placeholder words (e.g. 'none', 'na', etc.).
+      2) If 'freelance', 'consulting', 'startup', etc. appear anywhere (case-insensitive), return ''.
+      3) Remove parentheses and their contents.
+      4) Remove anything after the first '|'.
+      5) Strip out non-alphanumeric characters (but preserve the case of letters that remain).
+      6) Trim whitespace. Return the result.
+    """
+    # 1. Quick checks for invalid inputs
+    if not name or not isinstance(name, str):
+        return ""
+    # Convert to lowercase for checks while keeping original in `name`
+    lower_str = name.strip().lower()
+    invalid_placeholders = {
+        "null", "none", "na", "n.a", "notfound", "error",
+        "notavilable", "notavailable", ""
+    }
+    if lower_str in invalid_placeholders:
+        return ""
+    # 2. Disallowed keywords => entire name is made empty
+    #    (Case-insensitive substring check)
+    disallowed_keywords = [
+        "freelance",
+        "freelancer",
+        "consulting",
+        "not working",
+        "taking break",
+        "startup",
+        "stealth startup",
+        "sealth startup",
+    ]
+    for keyword in disallowed_keywords:
+        if keyword in lower_str:
+            return ""
+    # 3. Remove parentheses and their contents
+    no_paren = re.sub(r"\(.*?\)", "", name)
+    # 4. Remove content after '|'
+    #    Splits on the first '|'; keeps only the left side
+    no_pipe = no_paren.split("|", 1)[0]
+    # 5. Strip out non-alphanumeric chars (preserve letters' original case)
+    #    Keep letters (a-z, A-Z), digits, and whitespace
+    cleaned = re.sub(r"[^a-zA-Z0-9\s]", "", no_pipe)
+    # 6. Final trim
+    final_str = cleaned.strip()
+    return final_str