dhisana 0.0.1.dev100__tar.gz → 0.0.1.dev101__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/PKG-INFO +1 -1
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/setup.py +1 -1
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/enrich_lead_information.py +131 -87
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/research_lead.py +1 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/PKG-INFO +1 -1
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/README.md +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/pyproject.toml +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/setup.cfg +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/__init__.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/__init__.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/cli.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/datasets.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/models.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/predictions.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/schemas/__init__.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/schemas/common.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/schemas/sales.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/ui/__init__.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/ui/components.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/__init__.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/add_mapping.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/agent_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/apollo_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/assistant_tool_tag.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/built_with_api_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/cache_output_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/cache_output_tools_local.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/check_email_validity_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/check_for_intent_signal.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/check_linkedin_url_validity.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/clay_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/clean_properties.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/company_utils.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/compose_salesnav_query.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/compose_search_query.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/compose_three_step_workflow.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/composite_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/dataframe_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/domain_parser.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/email_parse_helpers.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/extract_email_content_for_llm.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/field_validators.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/g2_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_content.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_email.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_email_response.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_flow.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_leads_salesnav.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_linkedin_connect_message.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_linkedin_response_message.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_structured_output_internal.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/google_custom_search.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/google_workspace_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/hubspot_clearbit.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/hubspot_crm_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/instantly_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/linkedin_crawler.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/lusha_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openai_assistant_and_file_utils.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openai_helpers.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_spec_to_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_tool/__init__.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_tool/api_models.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_tool/openapi_tool.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/parse_linkedin_messages_txt.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/proxy_curl_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/python_function_to_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/sales_navigator_crawler.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/salesforce_crm_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/sendgrid_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/serpapi_search_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/test_connect.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/trasform_json.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/web_download_parse_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/workflow_code_model.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/zoominfo_tools.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/__init__.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/agent.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/flow.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/task.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/test.py +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/SOURCES.txt +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/dependency_links.txt +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/entry_points.txt +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/requires.txt +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/top_level.txt +0 -0
- {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/tests/test_agent_tools.py +0 -0
|
@@ -18,7 +18,10 @@ from dhisana.utils.field_validators import (
|
|
|
18
18
|
normalize_linkedin_url,
|
|
19
19
|
normalize_linkedin_company_url,
|
|
20
20
|
normalize_salesnav_url,
|
|
21
|
-
normalize_linkedin_company_salesnav_url
|
|
21
|
+
normalize_linkedin_company_salesnav_url,
|
|
22
|
+
validate_and_clean_email,
|
|
23
|
+
validation_organization_domain,
|
|
24
|
+
validate_website_url
|
|
22
25
|
)
|
|
23
26
|
from dhisana.utils.apollo_tools import enrich_user_info_with_apollo
|
|
24
27
|
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
@@ -36,11 +39,6 @@ from dhisana.utils.serpapi_search_tools import (
|
|
|
36
39
|
get_company_domain_from_google_search,
|
|
37
40
|
get_company_website_from_linkedin_url,
|
|
38
41
|
)
|
|
39
|
-
from dhisana.utils.field_validators import (
|
|
40
|
-
validate_and_clean_email,
|
|
41
|
-
validation_organization_domain,
|
|
42
|
-
validate_website_url
|
|
43
|
-
)
|
|
44
42
|
|
|
45
43
|
import logging
|
|
46
44
|
logging.basicConfig(level=logging.INFO)
|
|
@@ -48,23 +46,58 @@ logger = logging.getLogger(__name__)
|
|
|
48
46
|
|
|
49
47
|
|
|
50
48
|
# ----------------------------------------------------------------------
|
|
51
|
-
#
|
|
49
|
+
# Allowed Enrichment Tools
|
|
52
50
|
# ----------------------------------------------------------------------
|
|
53
51
|
ALLOWED_ENRICHMENT_TOOLS = ["proxycurl", "apollo", "zoominfo"]
|
|
54
52
|
|
|
55
|
-
# A map from tool name to the corresponding function that will enrich user info.
|
|
56
53
|
USER_LOOKUP_TOOL_NAME_TO_FUNCTION_MAP = {
|
|
57
54
|
"apollo": enrich_user_info_with_apollo,
|
|
58
55
|
"proxycurl": enrich_user_info_with_proxy_curl,
|
|
59
56
|
}
|
|
60
57
|
|
|
61
58
|
|
|
59
|
+
# ----------------------------------------------------------------------
|
|
60
|
+
# BasicLeadInformation model
|
|
61
|
+
# ----------------------------------------------------------------------
|
|
62
|
+
class BasicLeadInformation(BaseModel):
|
|
63
|
+
full_name: str = Field(..., description="Full name of the lead")
|
|
64
|
+
first_name: str = Field(..., description="First name of the lead")
|
|
65
|
+
last_name: str = Field(..., description="Last name of the lead")
|
|
66
|
+
email: str = Field(..., description="Email address of the lead")
|
|
67
|
+
primary_domain_of_organization: str = Field(..., description="Primary domain of the organization")
|
|
68
|
+
job_title: str = Field(..., description="Job Title of the lead")
|
|
69
|
+
phone: str = Field(..., description="Phone number of the lead")
|
|
70
|
+
headline: str = Field(..., description="Headline of the lead")
|
|
71
|
+
lead_location: str = Field(..., description="Location of the lead")
|
|
72
|
+
organization_name: str = Field(..., description="Current Company where lead works")
|
|
73
|
+
common_connections: int = Field(..., description="Number of common connections with the lead. Default 0")
|
|
74
|
+
followers_count: int = Field(..., description="Number of followers of the lead. Default 0")
|
|
75
|
+
tenure_in_current_role: str = Field(..., description="Tenure in the current role")
|
|
76
|
+
tenure_in_current_company: str = Field(..., description="Tenure in the current company")
|
|
77
|
+
connection_degree: str = Field(..., description="Degree of connection with the lead (1st, 2nd, 3rd)")
|
|
78
|
+
is_premium_account: bool = Field(..., description="Is the lead a premium account. Default is false.")
|
|
79
|
+
country_code: str = Field(..., description="Alpha-2 ISO3166 country code eg. US")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ----------------------------------------------------------------------
|
|
83
|
+
# Helper: chunkify
|
|
84
|
+
# ----------------------------------------------------------------------
|
|
85
|
+
def chunkify(items: List[Any], chunk_size: int) -> List[List[Any]]:
|
|
86
|
+
"""
|
|
87
|
+
Splits a list into sublists (chunks) of size `chunk_size`.
|
|
88
|
+
"""
|
|
89
|
+
for i in range(0, len(items), chunk_size):
|
|
90
|
+
yield items[i : i + chunk_size]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
# ----------------------------------------------------------------------
|
|
94
|
+
# Function: cleanup_user_name
|
|
95
|
+
# ----------------------------------------------------------------------
|
|
62
96
|
def cleanup_user_name(cloned_properties: dict) -> dict:
|
|
63
97
|
"""
|
|
64
98
|
Cleans up user name fields: 'full_name', 'first_name', 'last_name'.
|
|
65
99
|
Returns the updated dictionary. If values are invalid or placeholders, sets them to ''.
|
|
66
100
|
"""
|
|
67
|
-
|
|
68
101
|
if not isinstance(cloned_properties, dict):
|
|
69
102
|
return {}
|
|
70
103
|
|
|
@@ -86,8 +119,9 @@ def cleanup_user_name(cloned_properties: dict) -> dict:
|
|
|
86
119
|
stripped = stripped.split("|", 1)[0]
|
|
87
120
|
# Remove extra non-alphanumeric characters (but allow whitespace)
|
|
88
121
|
stripped = re.sub(r"[^a-zA-Z0-9\s]", "", stripped)
|
|
89
|
-
|
|
90
|
-
|
|
122
|
+
|
|
123
|
+
# Capitalize the first letter of each word, and lowercase the rest
|
|
124
|
+
return " ".join(word.capitalize() for word in stripped.strip().split())
|
|
91
125
|
|
|
92
126
|
full_name = normalize(cloned_properties.get("full_name"))
|
|
93
127
|
first_name = normalize(cloned_properties.get("first_name"))
|
|
@@ -100,41 +134,14 @@ def cleanup_user_name(cloned_properties: dict) -> dict:
|
|
|
100
134
|
cloned_properties["full_name"] = full_name
|
|
101
135
|
cloned_properties["first_name"] = first_name
|
|
102
136
|
cloned_properties["last_name"] = last_name
|
|
137
|
+
|
|
103
138
|
return cloned_properties
|
|
104
139
|
|
|
105
140
|
|
|
106
|
-
class BasicLeadInformation(BaseModel):
|
|
107
|
-
full_name: str = Field(..., description="Full name of the lead")
|
|
108
|
-
first_name: str = Field(..., description="First name of the lead")
|
|
109
|
-
last_name: str = Field(..., description="Last name of the lead")
|
|
110
|
-
email: str = Field(..., description="Email address of the lead")
|
|
111
|
-
primary_domain_of_organization: str = Field(..., description="Primary domain of the organization")
|
|
112
|
-
job_title: str = Field(..., description="Job Title of the lead")
|
|
113
|
-
phone: str = Field(..., description="Phone number of the lead")
|
|
114
|
-
headline: str = Field(..., description="Headline of the lead")
|
|
115
|
-
lead_location: str = Field(..., description="Location of the lead")
|
|
116
|
-
organization_name: str = Field(..., description="Current Company where lead works")
|
|
117
|
-
common_connections: int = Field(..., description="Number of common connections with the lead. Default 0")
|
|
118
|
-
followers_count: int = Field(..., description="Number of followers of the lead. Default 0")
|
|
119
|
-
tenure_in_current_role: str = Field(..., description="Tenure in the current role")
|
|
120
|
-
tenure_in_current_company: str = Field(..., description="Tenure in the current company")
|
|
121
|
-
connection_degree: str = Field(..., description="Degree of connection with the lead (1st, 2nd, 3rd)")
|
|
122
|
-
is_premium_account: bool = Field(..., description="Is the lead a premium account. Default is false.")
|
|
123
|
-
country_code: str = Field(..., description="Alpha-2 ISO3166 country code eg. US")
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def chunkify(items: List[Any], chunk_size: int) -> List[List[Any]]:
|
|
127
|
-
"""
|
|
128
|
-
Splits a list into sublists (chunks) of size `chunk_size`.
|
|
129
|
-
"""
|
|
130
|
-
for i in range(0, len(items), chunk_size):
|
|
131
|
-
yield items[i : i + chunk_size]
|
|
132
|
-
|
|
133
|
-
|
|
134
141
|
# ----------------------------------------------------------------------
|
|
135
142
|
# LLM-based cleanup for single lead
|
|
136
143
|
# ----------------------------------------------------------------------
|
|
137
|
-
async def get_clean_lead_info_with_llm(lead_info_str: str, tool_config) -> Dict[str, Any]:
|
|
144
|
+
async def get_clean_lead_info_with_llm(lead_info_str: str, tool_config: Optional[dict]) -> Dict[str, Any]:
|
|
138
145
|
"""
|
|
139
146
|
Takes a JSON string representation of partial lead info,
|
|
140
147
|
returns a cleaned-up lead dictionary matching BasicLeadInformation fields.
|
|
@@ -163,21 +170,48 @@ async def get_clean_lead_info_with_llm(lead_info_str: str, tool_config) -> Dict[
|
|
|
163
170
|
return lead_info.model_dump()
|
|
164
171
|
|
|
165
172
|
|
|
173
|
+
# ----------------------------------------------------------------------
|
|
174
|
+
# Helper: is_personal_email_domain
|
|
175
|
+
# ----------------------------------------------------------------------
|
|
176
|
+
def is_personal_email_domain(domain: str) -> bool:
|
|
177
|
+
"""
|
|
178
|
+
Very simple check to see if the domain is one of the common free/personal
|
|
179
|
+
email providers. Could expand this list or integrate a third-party API
|
|
180
|
+
for more accuracy.
|
|
181
|
+
"""
|
|
182
|
+
common_free_domains = {
|
|
183
|
+
"gmail.com", "yahoo.com", "hotmail.com", "outlook.com",
|
|
184
|
+
"protonmail.com", "icloud.com", "aol.com", "mail.com",
|
|
185
|
+
"pm.me", "yandex.com", "gmx.com"
|
|
186
|
+
}
|
|
187
|
+
domain = domain.strip().lower()
|
|
188
|
+
return (domain in common_free_domains) or domain.endswith(".edu")
|
|
189
|
+
|
|
190
|
+
|
|
166
191
|
# ----------------------------------------------------------------------
|
|
167
192
|
# Main validation & cleanup function
|
|
168
193
|
# ----------------------------------------------------------------------
|
|
169
|
-
async def validate_and_cleanup(
|
|
194
|
+
async def validate_and_cleanup(
|
|
195
|
+
cloned_properties: dict,
|
|
196
|
+
tool_config: Optional[dict] = None,
|
|
197
|
+
use_strict_check: bool = False
|
|
198
|
+
) -> dict:
|
|
170
199
|
"""
|
|
171
200
|
Wrapper to validate & normalize various properties in a dictionary.
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
201
|
+
|
|
202
|
+
1) Clean up/validate typical fields.
|
|
203
|
+
2) If name fields appear invalid, fallback to LLM-based name inference.
|
|
204
|
+
3) If 'primary_domain_of_organization' AND 'organization_website' are both empty,
|
|
205
|
+
but there's a valid corporate email, use that as the domain.
|
|
206
|
+
4) (Optional) Enrich the organization info from the name if needed.
|
|
175
207
|
"""
|
|
176
208
|
|
|
177
209
|
if not isinstance(cloned_properties, dict):
|
|
178
210
|
return {}
|
|
179
211
|
|
|
180
|
-
#
|
|
212
|
+
# ------------------------------------------------------------------
|
|
213
|
+
# Step 1: Normalize typical fields
|
|
214
|
+
# ------------------------------------------------------------------
|
|
181
215
|
cloned_properties["user_linkedin_url"] = normalize_linkedin_url(
|
|
182
216
|
cloned_properties.get("user_linkedin_url")
|
|
183
217
|
)
|
|
@@ -203,19 +237,19 @@ async def validate_and_cleanup(cloned_properties: dict, tool_config: Optional[di
|
|
|
203
237
|
cloned_properties.get("organization_name")
|
|
204
238
|
)
|
|
205
239
|
|
|
206
|
-
#
|
|
240
|
+
# ------------------------------------------------------------------
|
|
241
|
+
# Step 2: Basic name-check. If invalid => LLM fallback.
|
|
242
|
+
# ------------------------------------------------------------------
|
|
207
243
|
def has_special_characters(val: str) -> bool:
|
|
208
244
|
return bool(re.search(r"[^a-zA-Z0-9\s]", val))
|
|
209
245
|
|
|
210
246
|
def is_invalid_name(val: str) -> bool:
|
|
211
|
-
|
|
212
|
-
return len(val.strip()) < 3 or has_special_characters(val)
|
|
247
|
+
return (len(val.strip()) < 3) or has_special_characters(val)
|
|
213
248
|
|
|
214
249
|
full_name = cloned_properties.get("full_name", "")
|
|
215
250
|
first_name = cloned_properties.get("first_name", "")
|
|
216
251
|
last_name = cloned_properties.get("last_name", "")
|
|
217
252
|
|
|
218
|
-
# If ANY are invalid => Use get_clean_lead_info_with_llm
|
|
219
253
|
if (
|
|
220
254
|
is_invalid_name(full_name)
|
|
221
255
|
or is_invalid_name(first_name)
|
|
@@ -223,19 +257,44 @@ async def validate_and_cleanup(cloned_properties: dict, tool_config: Optional[di
|
|
|
223
257
|
):
|
|
224
258
|
lead_info_str = str(cloned_properties)
|
|
225
259
|
logger.info(
|
|
226
|
-
"Detected invalid name fields
|
|
260
|
+
"Detected invalid name fields. Using LLM to infer/correct name fields."
|
|
227
261
|
)
|
|
228
262
|
# Attempt LLM-based cleanup
|
|
229
263
|
new_lead_info = await get_clean_lead_info_with_llm(lead_info_str, tool_config=tool_config)
|
|
230
264
|
if new_lead_info:
|
|
231
|
-
# Just replace relevant name fields if returned from LLM
|
|
232
265
|
cloned_properties["full_name"] = new_lead_info.get("full_name", "")
|
|
233
266
|
cloned_properties["first_name"] = new_lead_info.get("first_name", "")
|
|
234
267
|
cloned_properties["last_name"] = new_lead_info.get("last_name", "")
|
|
235
268
|
else:
|
|
236
|
-
# Use cheaper logic
|
|
269
|
+
# Use the cheaper logic
|
|
237
270
|
cloned_properties = cleanup_user_name(cloned_properties)
|
|
238
271
|
|
|
272
|
+
# ------------------------------------------------------------------
|
|
273
|
+
# Step 3: If domain & website are empty but there's a corporate email
|
|
274
|
+
# ------------------------------------------------------------------
|
|
275
|
+
# - If email is present, check if domain is personal or corporate
|
|
276
|
+
# - If corporate, set primary_domain_of_organization from email domain
|
|
277
|
+
# ------------------------------------------------------------------
|
|
278
|
+
domain_empty = not cloned_properties.get("primary_domain_of_organization")
|
|
279
|
+
website_empty = not cloned_properties.get("organization_website")
|
|
280
|
+
email = cloned_properties.get("email", "")
|
|
281
|
+
|
|
282
|
+
if domain_empty and website_empty and email:
|
|
283
|
+
# parse domain from email
|
|
284
|
+
extracted_domain = email.split("@")[-1].strip().lower()
|
|
285
|
+
if extracted_domain and (not is_personal_email_domain(extracted_domain)):
|
|
286
|
+
# This is a "corporate" email domain, so use it
|
|
287
|
+
cloned_properties["primary_domain_of_organization"] = extracted_domain
|
|
288
|
+
cloned_properties["organization_website"] = f"https://www.{extracted_domain}"
|
|
289
|
+
logger.info("Set primary_domain_of_organization from corporate email domain.")
|
|
290
|
+
|
|
291
|
+
if domain_empty and not website_empty:
|
|
292
|
+
from urllib.parse import urlparse
|
|
293
|
+
parsed_website = urlparse(cloned_properties["organization_website"])
|
|
294
|
+
possible_domain = parsed_website.netloc.replace("www.", "")
|
|
295
|
+
if possible_domain:
|
|
296
|
+
cloned_properties["primary_domain_of_organization"] = possible_domain
|
|
297
|
+
logger.info("Set primary_domain_of_organization from organization_website domain.")
|
|
239
298
|
return cloned_properties
|
|
240
299
|
|
|
241
300
|
@assistant_tool
|
|
@@ -266,7 +325,7 @@ async def enrich_lead_information(
|
|
|
266
325
|
|
|
267
326
|
cloned_properties = await enrich_with_provider(cloned_properties, tool_config)
|
|
268
327
|
|
|
269
|
-
await
|
|
328
|
+
await enrich_organization_info_from_name(
|
|
270
329
|
row=cloned_properties,
|
|
271
330
|
use_strict_check=use_strict_check,
|
|
272
331
|
tool_config=tool_config,
|
|
@@ -473,7 +532,6 @@ async def enrich_organization_info_from_name(
|
|
|
473
532
|
"""
|
|
474
533
|
org_name_key = "organization_name"
|
|
475
534
|
org_domain_key = "primary_domain_of_organization"
|
|
476
|
-
linkedin_url_key = "organization_linkedin_url"
|
|
477
535
|
website_key = "organization_website"
|
|
478
536
|
|
|
479
537
|
org_name = (row.get(org_name_key) or "").strip()
|
|
@@ -489,36 +547,7 @@ async def enrich_organization_info_from_name(
|
|
|
489
547
|
# If domain or website is already present, we consider it enriched
|
|
490
548
|
if row.get(org_domain_key) or row.get(website_key):
|
|
491
549
|
return
|
|
492
|
-
|
|
493
|
-
# Try to fetch additional company info (domain, website, maybe LinkedIn)
|
|
494
|
-
company_info = await get_company_domain_from_llm_web_search(
|
|
495
|
-
company_name=org_name,
|
|
496
|
-
lead_info=row,
|
|
497
|
-
location="US",
|
|
498
|
-
tool_config=tool_config
|
|
499
|
-
)
|
|
500
|
-
|
|
501
|
-
if company_info and isinstance(company_info, dict):
|
|
502
|
-
# If the LLM found a domain, set it
|
|
503
|
-
if company_info.get("primary_domain_of_organization"):
|
|
504
|
-
row[org_domain_key] = company_info["primary_domain_of_organization"]
|
|
505
|
-
|
|
506
|
-
# If the LLM found an organization website, set it
|
|
507
|
-
if company_info.get("organization_website"):
|
|
508
|
-
row[website_key] = company_info["organization_website"]
|
|
509
|
-
|
|
510
|
-
# If there's a LinkedIn URL from LLM, set it
|
|
511
|
-
if company_info.get("organization_linkedin_url") and not row[linkedin_url_key]:
|
|
512
|
-
row[linkedin_url_key] = company_info["organization_linkedin_url"]
|
|
513
|
-
|
|
514
|
-
if company_info.get("organization_name") and not row[org_name_key]:
|
|
515
|
-
row[org_name_key] = company_info["organization_name"]
|
|
516
|
-
|
|
517
|
-
# After setting them, run set_organization_domain to finalize or clean up domain
|
|
518
|
-
await set_organization_domain(row, use_strict_check, tool_config)
|
|
519
|
-
else:
|
|
520
|
-
# If LLM didn't return anything, at least set the domain to empty
|
|
521
|
-
row[org_domain_key] = ""
|
|
550
|
+
await set_organization_domain(row, use_strict_check, tool_config)
|
|
522
551
|
|
|
523
552
|
|
|
524
553
|
async def set_organization_domain(
|
|
@@ -567,8 +596,21 @@ async def set_organization_domain(
|
|
|
567
596
|
location="US",
|
|
568
597
|
tool_config=tool_config
|
|
569
598
|
)
|
|
570
|
-
|
|
571
|
-
|
|
599
|
+
if company_info and isinstance(company_info, dict):
|
|
600
|
+
# If the LLM found a domain, set it
|
|
601
|
+
if company_info.get("primary_domain_of_organization") and not row[org_domain_key]:
|
|
602
|
+
row[org_domain_key] = company_info["primary_domain_of_organization"]
|
|
603
|
+
|
|
604
|
+
# If the LLM found an organization website, set it
|
|
605
|
+
if company_info.get("organization_website") and not row[website_key]:
|
|
606
|
+
row[website_key] = company_info["organization_website"]
|
|
607
|
+
|
|
608
|
+
# If there's a LinkedIn URL from LLM, set it
|
|
609
|
+
if company_info.get("organization_linkedin_url") and not row[linkedin_url_key]:
|
|
610
|
+
row[linkedin_url_key] = company_info["organization_linkedin_url"]
|
|
611
|
+
|
|
612
|
+
if company_info.get("organization_name") and not row[org_name_key]:
|
|
613
|
+
row[org_name_key] = company_info["organization_name"]
|
|
572
614
|
|
|
573
615
|
row[org_domain_key] = extracted_domain or ""
|
|
574
616
|
logger.debug("Final domain selected: %s", row[org_domain_key])
|
|
@@ -726,7 +768,9 @@ async def get_company_domain_from_llm_web_search(
|
|
|
726
768
|
{lead_info}
|
|
727
769
|
|
|
728
770
|
Search and gather any domain/website info or LinkedIn details.
|
|
729
|
-
|
|
771
|
+
DO NOT make up information about company.
|
|
772
|
+
Find based on the domain in the leads email if its a corporate email, company name if sepcified to find the company name, website and domain.
|
|
773
|
+
|
|
730
774
|
**Output**:
|
|
731
775
|
Return your final output as valid JSON with the following structure:
|
|
732
776
|
{{
|
|
@@ -5,6 +5,7 @@ from dhisana.utils.clean_properties import cleanup_email_context
|
|
|
5
5
|
from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
|
|
6
6
|
|
|
7
7
|
def clean_nul_bytes(s: str) -> str:
|
|
8
|
+
s = s.replace('```markdown', '')
|
|
8
9
|
return s.replace('\x00', '')
|
|
9
10
|
|
|
10
11
|
class LeadResearchInformation(BaseModel):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/check_email_validity_tools.py
RENAMED
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/check_linkedin_url_validity.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/compose_three_step_workflow.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/extract_email_content_for_llm.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_linkedin_connect_message.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openai_assistant_and_file_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_tool/openapi_tool.py
RENAMED
|
File without changes
|
{dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/parse_linkedin_messages_txt.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|