PyPI - dhisana - Versions diffs - 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl - Mend

dhisana 0.0.1.dev85py3-none-any.whl → 0.0.1.dev236py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

dhisana/schemas/common.py +33 -0
dhisana/schemas/sales.py +224 -23
dhisana/utils/add_mapping.py +72 -63
dhisana/utils/apollo_tools.py +739 -109
dhisana/utils/built_with_api_tools.py +4 -2
dhisana/utils/cache_output_tools.py +23 -23
dhisana/utils/check_email_validity_tools.py +456 -458
dhisana/utils/check_for_intent_signal.py +1 -2
dhisana/utils/check_linkedin_url_validity.py +34 -8
dhisana/utils/clay_tools.py +3 -2
dhisana/utils/clean_properties.py +3 -1
dhisana/utils/compose_salesnav_query.py +0 -1
dhisana/utils/compose_search_query.py +7 -3
dhisana/utils/composite_tools.py +0 -1
dhisana/utils/dataframe_tools.py +2 -2
dhisana/utils/email_body_utils.py +72 -0
dhisana/utils/email_provider.py +375 -0
dhisana/utils/enrich_lead_information.py +585 -85
dhisana/utils/fetch_openai_config.py +129 -0
dhisana/utils/field_validators.py +1 -1
dhisana/utils/g2_tools.py +0 -1
dhisana/utils/generate_content.py +0 -1
dhisana/utils/generate_email.py +69 -16
dhisana/utils/generate_email_response.py +298 -41
dhisana/utils/generate_flow.py +0 -1
dhisana/utils/generate_linkedin_connect_message.py +19 -6
dhisana/utils/generate_linkedin_response_message.py +156 -65
dhisana/utils/generate_structured_output_internal.py +351 -131
dhisana/utils/google_custom_search.py +150 -44
dhisana/utils/google_oauth_tools.py +721 -0
dhisana/utils/google_workspace_tools.py +391 -25
dhisana/utils/hubspot_clearbit.py +3 -1
dhisana/utils/hubspot_crm_tools.py +771 -167
dhisana/utils/instantly_tools.py +3 -1
dhisana/utils/lusha_tools.py +10 -7
dhisana/utils/mailgun_tools.py +150 -0
dhisana/utils/microsoft365_tools.py +447 -0
dhisana/utils/openai_assistant_and_file_utils.py +121 -177
dhisana/utils/openai_helpers.py +19 -16
dhisana/utils/parse_linkedin_messages_txt.py +2 -3
dhisana/utils/profile.py +37 -0
dhisana/utils/proxy_curl_tools.py +507 -206
dhisana/utils/proxycurl_search_leads.py +426 -0
dhisana/utils/research_lead.py +121 -68
dhisana/utils/sales_navigator_crawler.py +1 -6
dhisana/utils/salesforce_crm_tools.py +323 -50
dhisana/utils/search_router.py +131 -0
dhisana/utils/search_router_jobs.py +51 -0
dhisana/utils/sendgrid_tools.py +126 -91
dhisana/utils/serarch_router_local_business.py +75 -0
dhisana/utils/serpapi_additional_tools.py +290 -0
dhisana/utils/serpapi_google_jobs.py +117 -0
dhisana/utils/serpapi_google_search.py +188 -0
dhisana/utils/serpapi_local_business_search.py +129 -0
dhisana/utils/serpapi_search_tools.py +363 -432
dhisana/utils/serperdev_google_jobs.py +125 -0
dhisana/utils/serperdev_local_business.py +154 -0
dhisana/utils/serperdev_search.py +233 -0
dhisana/utils/smtp_email_tools.py +576 -0
dhisana/utils/test_connect.py +1765 -92
dhisana/utils/trasform_json.py +95 -16
dhisana/utils/web_download_parse_tools.py +0 -1
dhisana/utils/zoominfo_tools.py +2 -3
dhisana/workflow/test.py +1 -1
{dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
{dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
{dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
{dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0

dhisana/utils/proxycurl_search_leads.py ADDED Viewed

@@ -0,0 +1,426 @@
+import json
+import logging
+from typing import Any, Dict, List, Optional
+import aiohttp
+from pydantic import BaseModel
+from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
+from dhisana.utils.proxy_curl_tools import (
+    get_proxycurl_access_token,
+    fill_in_missing_properties,
+    transform_company_data,
+)
+from dhisana.utils.cache_output_tools import cache_output
+from urllib.parse import urlparse, urlunparse
+from dhisana.utils.clean_properties import cleanup_properties
+from dhisana.utils.assistant_tool_tag import assistant_tool
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# ────────────────────────────
+# 🛠  Small generic helpers
+# ────────────────────────────
+def _remove_empty_values(d: Dict[str, Any]) -> Dict[str, Any]:
+    """Return a copy of *d* without keys whose value is empty, None, or zero for integers."""
+    cleaned = {}
+    for k, v in d.items():
+        # Skip None values
+        if v is None:
+            continue
+        # Skip empty strings or whitespace-only strings
+        elif isinstance(v, str) and v.strip() == "":
+            continue
+        # Skip empty lists/arrays
+        elif isinstance(v, list) and len(v) == 0:
+            continue
+        # Skip zero values for integer fields (assuming they're not meaningful for search)
+        elif isinstance(v, int) and v == 0:
+            continue
+        # Keep all other values
+        else:
+            cleaned[k] = v
+    return cleaned
+def _build_common_params(
+    search_params: BaseModel,
+    max_entries: int,
+    enrich_profiles: bool,
+) -> Dict[str, Any]:
+    """Convert a Pydantic model into Proxycurl query params, removing empty/None values."""
+    params = search_params.model_dump(exclude_none=True)
+    params = _remove_empty_values(params)
+    params["page_size"] = max_entries if max_entries > 0 else 5
+    params["enrich_profiles"] = "enrich" if enrich_profiles else "skip"
+    params["use_cache"] = "if-present"
+    return params
+# ────────────────────────────
+# 📄  Search parameter schemas
+# ────────────────────────────
+class PeopleSearchParams(BaseModel):
+    current_role_title: Optional[str] = None
+    current_company_industry: Optional[str] = None
+    current_company_employee_count_min: Optional[int] = None
+    current_company_employee_count_max: Optional[int] = None
+    country: Optional[str] = None
+    region: Optional[str] = None
+    city: Optional[str] = None
+    summary: Optional[str] = None
+    current_job_description: Optional[str] = None
+    past_job_description: Optional[str] = None
+class CompanySearchParams(BaseModel):
+    country: Optional[str] = None
+    region: Optional[str] = None
+    city: Optional[str] = None
+    type: Optional[str] = None
+    follower_count_min: Optional[int] = None
+    follower_count_max: Optional[int] = None
+    name: Optional[str] = None
+    industry: Optional[str] = None
+    employee_count_max: Optional[int] = None
+    employee_count_min: Optional[int] = None
+    description: Optional[str] = None
+    founded_after_year: Optional[int] = None
+    founded_before_year: Optional[int] = None
+    funding_amount_max: Optional[int] = None
+    funding_amount_min: Optional[int] = None
+    funding_raised_after: Optional[str] = None
+    funding_raised_before: Optional[str] = None
+    public_identifier_in_list: Optional[str] = None
+    public_identifier_not_in_list: Optional[str] = None
+class JobSearchParams(BaseModel):
+    job_type: Optional[str] = None
+    experience_level: Optional[str] = None
+    when: Optional[str] = None
+    flexibility: Optional[str] = None
+    geo_id: Optional[int] = None
+    keyword: Optional[str] = None
+    search_id: Optional[str] = None
+# ────────────────────────────
+# 👤  People search
+# ────────────────────────────
+@assistant_tool
+async def proxycurl_people_search_leads(
+    search_params: PeopleSearchParams,
+    max_entries: int = 5,
+    enrich_profiles: bool = False,
+    tool_config: Optional[List[Dict[str, Any]]] = None,
+) -> List[Dict[str, Any]]:
+    """Search for leads on Proxycurl based on a plain‑English ICP description."""
+    params = _build_common_params(search_params, max_entries, enrich_profiles)
+    try:
+        api_key = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        logger.error(str(e))
+        return []
+    headers = {"Authorization": f"Bearer {api_key}"}
+    url = "https://enrichlayer.com/api/v2/search/person"
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=headers, params=params) as resp:
+                if resp.status != 200:
+                    logger.error("Proxycurl search error %s", resp.status)
+                    return []
+                data = await resp.json()
+    except Exception as exc:
+        logger.exception("Exception during Proxycurl search: %s", exc)
+        return []
+    leads: List[Dict[str, Any]] = []
+    for item in (data.get("results") or [])[:max_entries]:
+        lead: Dict[str, Any] = {
+            "user_linkedin_url": item.get("linkedin_profile_url"),
+        }
+        profile = item.get("profile") or {}
+        if profile:
+            # Fill lead fields using profile data
+            lead = fill_in_missing_properties(lead, profile)
+            first_exp = (profile.get("experiences") or [{}])[0]
+            lead.setdefault("organization_name", first_exp.get("company", ""))
+            lead.setdefault(
+                "organization_linkedin_url",
+                first_exp.get("company_linkedin_profile_url", ""),
+            )
+            additional_props = lead.get("additional_properties") or {}
+            additional_props["pc_person_data"] = json.dumps(
+                cleanup_properties(profile)
+            )
+            lead["additional_properties"] = additional_props
+            linkedin_url = lead.get("user_linkedin_url")
+            if linkedin_url:
+                cache_output(
+                    "enrich_person_info_from_proxycurl", linkedin_url, profile
+                )
+        if cleaned := cleanup_properties(lead):
+            leads.append(cleaned)
+    return leads
+# ────────────────────────────
+# 🏢  Company search
+# ────────────────────────────
+@assistant_tool
+async def proxycurl_company_search_leads(
+    search_params: CompanySearchParams,
+    max_entries: int = 5,
+    enrich_profiles: bool = False,
+    tool_config: Optional[List[Dict[str, Any]]] = None,
+) -> List[Dict[str, Any]]:
+    """Search for companies on Proxycurl based on given parameters."""
+    params = _build_common_params(search_params, max_entries, enrich_profiles)
+    try:
+        api_key = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        logger.error(str(e))
+        return []
+    headers = {"Authorization": f"Bearer {api_key}"}
+    url = "https://enrichlayer.com/api/v2/search/company"
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=headers, params=params) as resp:
+                if resp.status != 200:
+                    logger.error("Proxycurl company search error %s", resp.status)
+                    return []
+                data = await resp.json()
+    except Exception as exc:
+        logger.exception("Exception during Proxycurl company search: %s", exc)
+        return []
+    companies: List[Dict[str, Any]] = []
+    for item in (data.get("results") or [])[:max_entries]:
+        company: Dict[str, Any] = {
+            "organization_linkedin_url": item.get("linkedin_profile_url"),
+        }
+        profile = item.get("profile") or {}
+        if profile:
+            # Copy mapped properties from the enriched profile
+            transformed = transform_company_data(profile)
+            company.update(transformed)
+            # Store the raw profile JSON for reference
+            additional_props = company.get("additional_properties") or {}
+            additional_props["pc_company_data"] = json.dumps(
+                cleanup_properties(profile)
+            )
+            company["additional_properties"] = additional_props
+            linkedin_url = company.get("organization_linkedin_url") or ""
+            if linkedin_url and "linkedin.com/company" in linkedin_url:
+                parsed_url = urlparse(linkedin_url)
+                if parsed_url.netloc != "www.linkedin.com":
+                    standardized_netloc = "www.linkedin.com"
+                    standardized_path = parsed_url.path
+                    if not standardized_path.startswith("/company/"):
+                        standardized_path = "/company" + standardized_path
+                    standardized_url = urlunparse(
+                        parsed_url._replace(
+                            netloc=standardized_netloc,
+                            path=standardized_path,
+                        )
+                    )
+                else:
+                    standardized_url = linkedin_url
+                if standardized_url and not standardized_url.endswith("/"):
+                    standardized_url += "/"
+                cache_output(
+                    "enrich_organization_info_from_proxycurl",
+                    standardized_url,
+                    transformed,
+                )
+        if cleaned := cleanup_properties(company):
+            companies.append(cleaned)
+    return companies
+# ────────────────────────────
+# 💼  Job search
+# ────────────────────────────
+@assistant_tool
+async def proxycurl_job_search(
+    search_params: JobSearchParams,
+    max_entries: int = 5,
+    enrich_profiles: bool = False,
+    tool_config: Optional[List[Dict[str, Any]]] = None,
+) -> List[Dict[str, Any]]:
+    """List jobs posted by a company using Proxycurl's job search API."""
+    # Job search endpoint does not support enrich_profiles
+    params = _build_common_params(search_params, max_entries, enrich_profiles=enrich_profiles)
+    try:
+        api_key = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        logger.error(str(e))
+        return []
+    headers = {"Authorization": f"Bearer {api_key}"}
+    url = "https://enrichlayer.com/api/v2/company/job"
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=headers, params=params) as resp:
+                if resp.status != 200:
+                    logger.error("Proxycurl job search error %s", resp.status)
+                    return []
+                data = await resp.json()
+    except Exception as exc:
+        logger.exception("Exception during Proxycurl job search: %s", exc)
+        return []
+    job_entries: List[Dict[str, Any]] = []
+    for item in (data.get("job") or data.get("jobs") or [])[:max_entries]:
+        job: Dict[str, Any] = {
+            "organization_name": item.get("company"),
+            "organization_linkedin_url": item.get("company_url"),
+            "job_title": item.get("job_title"),
+            "job_posting_url": item.get("job_url"),
+            "list_date": item.get("list_date"),
+            "location": item.get("location"),
+        }
+        additional_props = job.get("additional_properties") or {}
+        additional_props["pc_job_data"] = json.dumps(item)
+        job["additional_properties"] = additional_props
+        job_url = job.get("job_posting_url")
+        if job_url:
+            cache_output("enrich_job_info_from_proxycurl", job_url, item)
+        if cleaned := cleanup_properties(job):
+            job_entries.append(cleaned)
+    return job_entries
+# ────────────────────────────
+# 📊  Job count
+# ────────────────────────────
+@assistant_tool
+async def proxycurl_job_count(
+    search_params: JobSearchParams,
+    tool_config: Optional[List[Dict[str, Any]]] = None,
+) -> Dict[str, Any]:
+    """Get the count of jobs posted by a company using Proxycurl's job count API."""
+    # Job count endpoint does not support enrich_profiles or max_entries
+    params = search_params.model_dump(exclude_none=True)
+    params = _remove_empty_values(params)
+    # Job count endpoint doesn't need page_size or enrich_profiles
+    if "page_size" in params:
+        del params["page_size"]
+    try:
+        api_key = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        logger.error(str(e))
+        return {"count": 0}
+    headers = {"Authorization": f"Bearer {api_key}"}
+    url = "https://enrichlayer.com/api/v2/company/job/count"
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=headers, params=params) as resp:
+                if resp.status != 200:
+                    logger.error("Proxycurl job count error %s", resp.status)
+                    return {"count": 0}
+                data = await resp.json()
+    except Exception as exc:
+        logger.exception("Exception during Proxycurl job count: %s", exc)
+        return {"count": 0}
+    return {"count": data.get("count", 0)}
+# ────────────────────────────
+# 🔍  Company Profile - Get Search ID
+# ────────────────────────────
+@assistant_tool
+async def proxycurl_get_company_search_id(
+    company_url: str,
+    tool_config: Optional[List[Dict[str, Any]]] = None,
+) -> Dict[str, Any]:
+    """Get a company's search ID using Proxycurl's Company Profile endpoint.
+    The search_id is required for other Proxycurl endpoints like job search and job count.
+    Args:
+        company_url: LinkedIn company profile URL (e.g., "https://www.linkedin.com/company/microsoft/")
+        tool_config: Optional tool configuration containing API key
+    Returns:
+        Dictionary containing search_id and basic company info, or error info if failed
+    """
+    try:
+        api_key = get_proxycurl_access_token(tool_config)
+    except ValueError as e:
+        logger.error(str(e))
+        return {"error": str(e), "search_id": None}
+    headers = {"Authorization": f"Bearer {api_key}"}
+    url = "https://enrichlayer.com/api/v2/company"
+    params = {
+        "url": company_url,
+        "use_cache": "if-present",
+        "fallback_to_cache": "on-error"
+    }
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, headers=headers, params=params) as resp:
+                if resp.status != 200:
+                    logger.error("Proxycurl company profile error %s", resp.status)
+                    return {"error": f"HTTP {resp.status}", "search_id": None}
+                data = await resp.json()
+    except Exception as exc:
+        logger.exception("Exception during Proxycurl company profile lookup: %s", exc)
+        return {"error": str(exc), "search_id": None}
+    # Extract the key information
+    search_id = data.get("search_id")
+    name = data.get("name")
+    linkedin_internal_id = data.get("linkedin_internal_id")
+    industry = data.get("industry")
+    result = {
+        "search_id": search_id,
+        "name": name,
+        "linkedin_internal_id": linkedin_internal_id,
+        "industry": industry,
+        "company_url": company_url
+    }
+    if search_id:
+        logger.info(f"Successfully retrieved search_id '{search_id}' for company '{name}'")
+    else:
+        logger.warning(f"No search_id found for company at {company_url}")
+        result["error"] = "No search_id found in response"
+    return result

dhisana/utils/research_lead.py CHANGED Viewed

@@ -1,86 +1,50 @@
-# Write up a research summary about the lead using AI.
-# Use the provided user information, ICP to summarize the research
 from typing import Dict, List, Optional
 from pydantic import BaseModel
 from dhisana.utils.assistant_tool_tag import assistant_tool
 from dhisana.utils.clean_properties import cleanup_email_context
-from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
+from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
-# Define a model for lead research information
-class LeadResearchInformation(BaseModel):
-    research_summary: str
-    icp_match_score: int
+def clean_nul_bytes(s: str) -> str:
+    s = s.replace('```markdown', '')
+    return s.replace('\x00', '')
-@assistant_tool
-async def research_lead_with_icp_ai(user_properties: dict, icp: str, instructions:str, tool_config: Optional[List[Dict]] = None):
+def _remove_excluded_fields(data: Dict) -> Dict:
     """
-    Research on lead provided given input. Check how much it matches ICP.
-    This function sends an asynchronous request to gather research information about the lead and evaluate how well it matches the Ideal Customer Profile (ICP).
-    Parameters:
-    user_properties (dict): Information about the lead.
-    icp (str): The Ideal Customer Profile description.
-    instructions (str): Additional instructions for generating the research summary.
-    tool_config (Optional[dict]): Configuration for the tool (default is None).
-    Returns:
-    dict: The JSON response containing the research summary and ICP match score.
-    Raises:
-    ValueError: If required parameters are missing.
-    Exception: If there is an error in processing the request.
+    Return a copy of `data` that excludes keys named 'id'
+    or that end in '_by', '_id', '_to', or '_at'.
     """
-    instructions = f"""
-    Give a deatiled research summary of the lead given the user information input.
-    Make sure all the information about lead including experience, skills, education, etc. are included in the summary.
-    Have individual sections as in linkedin in like experience, education, skills, etc.
-    Have section with summary on what the current company that lead is working at does.
-    Highlight how the lead matches the Ideal Customer Profile (ICP) provided.
-    Research the lead based on the following information:
-    {user_properties}
-    Describe how the lead information matches the Ideal Customer Profile (ICP) provided:
-    {icp}
-    Custom insturctions for research
-    {instructions}
-    The output should be in JSON format with the following structure:
-    {{
-        "research_summary": "Short Summary of the research about lead. Include key insights and findings on how it matches the ICP.This value is neatly formmated Github Markdown.",
-        "icp_match_score": "Score of how well the lead matches the ICP (0-5). 0 no match, 5 perfect match."
-    }}
-    """
-    response, status = await get_structured_output_internal(instructions, LeadResearchInformation, tool_config=tool_config)
-    return response.model_dump()
+    excluded_keys = {"id"}
+    excluded_endings = ["_by", "_id", "_to", "_at", "_status", "research_summary"]
+    cleaned = {}
+    for k, v in data.items():
+        if k in excluded_keys:
+            continue
+        if any(k.endswith(suffix) for suffix in excluded_endings):
+            continue
+        cleaned[k] = v
+    return cleaned
 class LeadResearchInformation(BaseModel):
     research_summary: str
 @assistant_tool
-async def research_lead_with_full_info_ai(user_properties: dict, instructions:str, tool_config: Optional[List[Dict]] = None):
-    user_properties = cleanup_email_context(user_properties)
+async def research_lead_with_full_info_ai(
+    user_properties: dict,
+    instructions: str,
+    tool_config: Optional[List[Dict]] = None
+):
     """
     Research on lead provided given input. Provide Detailed Summary.
-    Parameters:
-    user_properties (dict): Information about the lead.
-    instructions (str): Additional instructions for generating the detailed summary.
-    tool_config (Optional[dict]): Configuration for the tool (default is None).
+    """
+    # Clean user properties (e.g. remove newlines, sanitize strings, etc.)
+    user_properties = cleanup_email_context(user_properties)
-    Returns:
-    dict: The JSON response containing the detailed reserach summary of the lead.
+    # Remove excluded fields from user_properties
+    user_properties = _remove_excluded_fields(user_properties)
-    Raises:
-    ValueError: If required parameters are missing.
-    Exception: If there is an error in processing the request.
-    """
+    # Optionally remove any known keys that should not appear (e.g. 'date_extracted')
+    user_properties.pop("date_extracted", None)
     instructions = f"""
         Please read the following user information and instructions, then produce a detailed summary of the lead in the specified format.
@@ -105,6 +69,12 @@ async def research_lead_with_full_info_ai(user_properties: dict, instructions:st
         8. Connections
         9. Current Company Information
         10. Contact Information
+        11. Addtional Info:
+            a. Include any githbub information like handle, repositories owned etc if present.
+            b. Include any twitter information like handle, followers etc if present.
+            c. Includ any youtube channel information like handle, subscribers etc if present.
+            d. Include any other social media information like handle, followers etc if present.
         - In the **About** section, create a clear, concise description of the lead that can be used for sales prospecting.
         - In the **Current Company Information** section, summarize what the lead’s current company does.
@@ -117,7 +87,90 @@ async def research_lead_with_full_info_ai(user_properties: dict, instructions:st
         {{
             "research_summary": "Detailed summary about lead. The summary should be neatly formatted in GitHub-Flavored Markdown, and include all the key information from the listed sections."
         }}
+    """
+    response, status = await get_structured_output_internal(
+        instructions,
+        LeadResearchInformation,
+        model="gpt-5.1-chat",
+        tool_config=tool_config
+    )
+    if status == "SUCCESS":
+        response.research_summary = clean_nul_bytes(response.research_summary)
+        return response.model_dump()
+    else:
+        return {"research_summary": ""}
+# --------------------------------------------
+# COMPANY-RELATED MODELS & FUNCTION (FIXED)
+# --------------------------------------------
+class CompanyResearchInformation(BaseModel):
+    research_summary: str
+@assistant_tool
+async def research_company_with_full_info_ai(
+    company_properties: dict,
+    instructions: str,
+    tool_config: Optional[List[Dict]] = None
+):
+    """
+    Research on company provided given input. Provide a Detailed Summary.
+    Parameters:
+    company_properties (dict): Information about the company.
+    instructions (str): Additional instructions for generating the detailed summary.
+    tool_config (Optional[List[Dict]]): Configuration for the tool (default is None).
+    Returns:
+    dict: The JSON response containing the detailed research summary of the company.
+    """
+    # Clean company properties (e.g. remove newlines, sanitize strings, etc.)
+    company_properties = cleanup_email_context(company_properties)
+    # Remove excluded fields from company_properties
+    company_properties = _remove_excluded_fields(company_properties)
+    instructions = f"""
+        Please read the following company information and instructions, then produce a detailed summary of the company in the specified format.
+        ---
+        Company Data include name, domain and website:
+        {company_properties}
+        Instructions:
+        {instructions}
+        ---
+        **Task**:
+        Give a short summary of the company based on the provided data. Include **firmographic details** if they are present.
+        The summary should have the following sections (only include them if there is relevant data):
+        1. About Company
+        2. Industry
+        3. Location / HQ
+        4. Employee Headcount
+        5. Revenue
+        6. Funding Information
+        7. Additional Firmographics (e.g. markets, expansions, or any other relevant data)
+        - In the **About Company** section, create a clear, concise description of what the company does (suitable for sales prospecting).
+        - Do not include any IDs, userIds, or GUIDs in the output.
+        - Have the above section headers even if section content is empty.
+        Use web search to find additional information about the company using company name and domain. Search what it does, news, and funding.
+        **Output**:
+        Return your final output as valid JSON with the following structure:
+        {{
+            "research_summary": "Detailed summary about the company. The summary should be neatly formatted in GitHub-Flavored Markdown, and include all the key information from the listed sections."
+        }}
     """
-    response, status = await get_structured_output_internal(instructions, LeadResearchInformation, tool_config=tool_config)
-    return response.model_dump()
+    response, status = await get_structured_output_internal(
+        instructions,
+        CompanyResearchInformation,
+        model="gpt-5.1-chat",
+        use_web_search=False,
+        tool_config=tool_config
+    )
+    if status == "SUCCESS":
+        response.research_summary = clean_nul_bytes(response.research_summary)
+        return response.model_dump()
+    else:
+        return {"research_summary": ""}

dhisana/utils/sales_navigator_crawler.py CHANGED Viewed

@@ -3,21 +3,16 @@
 # Executes the tasks and sends the results back to the service.
 import asyncio
-from datetime import datetime, timedelta
+from datetime import datetime
 import json
 import os
 import logging
 import re
 from typing import List, Dict, Any
 import html2text
-from pydantic import BaseModel, Field
 from playwright.async_api import async_playwright, Page
-import pandas as pd
 import requests  # or aiohttp if you prefer async calls
-from dhisana.utils.assistant_tool_tag import assistant_tool
-from dhisana.utils.dataframe_tools import get_structured_output
-from dhisana.utils.web_download_parse_tools import parse_html_content_as_text
 import asyncio
 import logging
 import pyperclip

dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

dhisana 0.0.1.dev85py3-none-any.whl → 0.0.1.dev236py3-none-any.whl