dhisana 0.0.1.dev100__tar.gz → 0.0.1.dev101__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/PKG-INFO +1 -1
  2. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/setup.py +1 -1
  3. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/enrich_lead_information.py +131 -87
  4. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/research_lead.py +1 -0
  5. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/PKG-INFO +1 -1
  6. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/README.md +0 -0
  7. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/pyproject.toml +0 -0
  8. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/setup.cfg +0 -0
  9. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/__init__.py +0 -0
  10. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/__init__.py +0 -0
  11. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/cli.py +0 -0
  12. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/datasets.py +0 -0
  13. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/models.py +0 -0
  14. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/cli/predictions.py +0 -0
  15. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/schemas/__init__.py +0 -0
  16. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/schemas/common.py +0 -0
  17. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/schemas/sales.py +0 -0
  18. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/ui/__init__.py +0 -0
  19. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/ui/components.py +0 -0
  20. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/__init__.py +0 -0
  21. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/add_mapping.py +0 -0
  22. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/agent_tools.py +0 -0
  23. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/apollo_tools.py +0 -0
  24. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/assistant_tool_tag.py +0 -0
  25. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/built_with_api_tools.py +0 -0
  26. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/cache_output_tools.py +0 -0
  27. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/cache_output_tools_local.py +0 -0
  28. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/check_email_validity_tools.py +0 -0
  29. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/check_for_intent_signal.py +0 -0
  30. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/check_linkedin_url_validity.py +0 -0
  31. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/clay_tools.py +0 -0
  32. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/clean_properties.py +0 -0
  33. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/company_utils.py +0 -0
  34. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/compose_salesnav_query.py +0 -0
  35. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/compose_search_query.py +0 -0
  36. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/compose_three_step_workflow.py +0 -0
  37. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/composite_tools.py +0 -0
  38. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/dataframe_tools.py +0 -0
  39. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/domain_parser.py +0 -0
  40. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/email_parse_helpers.py +0 -0
  41. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/extract_email_content_for_llm.py +0 -0
  42. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/field_validators.py +0 -0
  43. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/g2_tools.py +0 -0
  44. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_content.py +0 -0
  45. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_email.py +0 -0
  46. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_email_response.py +0 -0
  47. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_flow.py +0 -0
  48. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_leads_salesnav.py +0 -0
  49. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_linkedin_connect_message.py +0 -0
  50. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_linkedin_response_message.py +0 -0
  51. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/generate_structured_output_internal.py +0 -0
  52. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/google_custom_search.py +0 -0
  53. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/google_workspace_tools.py +0 -0
  54. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/hubspot_clearbit.py +0 -0
  55. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/hubspot_crm_tools.py +0 -0
  56. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/instantly_tools.py +0 -0
  57. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/linkedin_crawler.py +0 -0
  58. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/lusha_tools.py +0 -0
  59. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openai_assistant_and_file_utils.py +0 -0
  60. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openai_helpers.py +0 -0
  61. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_spec_to_tools.py +0 -0
  62. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_tool/__init__.py +0 -0
  63. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_tool/api_models.py +0 -0
  64. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +0 -0
  65. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/openapi_tool/openapi_tool.py +0 -0
  66. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/parse_linkedin_messages_txt.py +0 -0
  67. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/proxy_curl_tools.py +0 -0
  68. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/python_function_to_tools.py +0 -0
  69. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/sales_navigator_crawler.py +0 -0
  70. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/salesforce_crm_tools.py +0 -0
  71. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/sendgrid_tools.py +0 -0
  72. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/serpapi_search_tools.py +0 -0
  73. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/test_connect.py +0 -0
  74. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/trasform_json.py +0 -0
  75. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/web_download_parse_tools.py +0 -0
  76. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/workflow_code_model.py +0 -0
  77. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/utils/zoominfo_tools.py +0 -0
  78. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/__init__.py +0 -0
  79. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/agent.py +0 -0
  80. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/flow.py +0 -0
  81. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/task.py +0 -0
  82. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana/workflow/test.py +0 -0
  83. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/SOURCES.txt +0 -0
  84. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/dependency_links.txt +0 -0
  85. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/entry_points.txt +0 -0
  86. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/requires.txt +0 -0
  87. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/src/dhisana.egg-info/top_level.txt +0 -0
  88. {dhisana-0.0.1.dev100 → dhisana-0.0.1.dev101}/tests/test_agent_tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dhisana
3
- Version: 0.0.1.dev100
3
+ Version: 0.0.1.dev101
4
4
  Summary: A Python SDK for Dhisana AI Platform
5
5
  Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
6
6
  Author: Admin
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='dhisana',
5
- version='0.0.1-dev100',
5
+ version='0.0.1-dev101',
6
6
  description='A Python SDK for Dhisana AI Platform',
7
7
  author='Admin',
8
8
  author_email='contact@dhisana.ai',
@@ -18,7 +18,10 @@ from dhisana.utils.field_validators import (
18
18
  normalize_linkedin_url,
19
19
  normalize_linkedin_company_url,
20
20
  normalize_salesnav_url,
21
- normalize_linkedin_company_salesnav_url
21
+ normalize_linkedin_company_salesnav_url,
22
+ validate_and_clean_email,
23
+ validation_organization_domain,
24
+ validate_website_url
22
25
  )
23
26
  from dhisana.utils.apollo_tools import enrich_user_info_with_apollo
24
27
  from dhisana.utils.assistant_tool_tag import assistant_tool
@@ -36,11 +39,6 @@ from dhisana.utils.serpapi_search_tools import (
36
39
  get_company_domain_from_google_search,
37
40
  get_company_website_from_linkedin_url,
38
41
  )
39
- from dhisana.utils.field_validators import (
40
- validate_and_clean_email,
41
- validation_organization_domain,
42
- validate_website_url
43
- )
44
42
 
45
43
  import logging
46
44
  logging.basicConfig(level=logging.INFO)
@@ -48,23 +46,58 @@ logger = logging.getLogger(__name__)
48
46
 
49
47
 
50
48
  # ----------------------------------------------------------------------
51
- # The enrichment tools that are permissible for usage.
49
+ # Allowed Enrichment Tools
52
50
  # ----------------------------------------------------------------------
53
51
  ALLOWED_ENRICHMENT_TOOLS = ["proxycurl", "apollo", "zoominfo"]
54
52
 
55
- # A map from tool name to the corresponding function that will enrich user info.
56
53
  USER_LOOKUP_TOOL_NAME_TO_FUNCTION_MAP = {
57
54
  "apollo": enrich_user_info_with_apollo,
58
55
  "proxycurl": enrich_user_info_with_proxy_curl,
59
56
  }
60
57
 
61
58
 
59
+ # ----------------------------------------------------------------------
60
+ # BasicLeadInformation model
61
+ # ----------------------------------------------------------------------
62
+ class BasicLeadInformation(BaseModel):
63
+ full_name: str = Field(..., description="Full name of the lead")
64
+ first_name: str = Field(..., description="First name of the lead")
65
+ last_name: str = Field(..., description="Last name of the lead")
66
+ email: str = Field(..., description="Email address of the lead")
67
+ primary_domain_of_organization: str = Field(..., description="Primary domain of the organization")
68
+ job_title: str = Field(..., description="Job Title of the lead")
69
+ phone: str = Field(..., description="Phone number of the lead")
70
+ headline: str = Field(..., description="Headline of the lead")
71
+ lead_location: str = Field(..., description="Location of the lead")
72
+ organization_name: str = Field(..., description="Current Company where lead works")
73
+ common_connections: int = Field(..., description="Number of common connections with the lead. Default 0")
74
+ followers_count: int = Field(..., description="Number of followers of the lead. Default 0")
75
+ tenure_in_current_role: str = Field(..., description="Tenure in the current role")
76
+ tenure_in_current_company: str = Field(..., description="Tenure in the current company")
77
+ connection_degree: str = Field(..., description="Degree of connection with the lead (1st, 2nd, 3rd)")
78
+ is_premium_account: bool = Field(..., description="Is the lead a premium account. Default is false.")
79
+ country_code: str = Field(..., description="Alpha-2 ISO3166 country code eg. US")
80
+
81
+
82
+ # ----------------------------------------------------------------------
83
+ # Helper: chunkify
84
+ # ----------------------------------------------------------------------
85
+ def chunkify(items: List[Any], chunk_size: int) -> List[List[Any]]:
86
+ """
87
+ Splits a list into sublists (chunks) of size `chunk_size`.
88
+ """
89
+ for i in range(0, len(items), chunk_size):
90
+ yield items[i : i + chunk_size]
91
+
92
+
93
+ # ----------------------------------------------------------------------
94
+ # Function: cleanup_user_name
95
+ # ----------------------------------------------------------------------
62
96
  def cleanup_user_name(cloned_properties: dict) -> dict:
63
97
  """
64
98
  Cleans up user name fields: 'full_name', 'first_name', 'last_name'.
65
99
  Returns the updated dictionary. If values are invalid or placeholders, sets them to ''.
66
100
  """
67
-
68
101
  if not isinstance(cloned_properties, dict):
69
102
  return {}
70
103
 
@@ -86,8 +119,9 @@ def cleanup_user_name(cloned_properties: dict) -> dict:
86
119
  stripped = stripped.split("|", 1)[0]
87
120
  # Remove extra non-alphanumeric characters (but allow whitespace)
88
121
  stripped = re.sub(r"[^a-zA-Z0-9\s]", "", stripped)
89
- # Capitalize first letter, lowercase the rest
90
- return stripped.strip().capitalize()
122
+
123
+ # Capitalize the first letter of each word, and lowercase the rest
124
+ return " ".join(word.capitalize() for word in stripped.strip().split())
91
125
 
92
126
  full_name = normalize(cloned_properties.get("full_name"))
93
127
  first_name = normalize(cloned_properties.get("first_name"))
@@ -100,41 +134,14 @@ def cleanup_user_name(cloned_properties: dict) -> dict:
100
134
  cloned_properties["full_name"] = full_name
101
135
  cloned_properties["first_name"] = first_name
102
136
  cloned_properties["last_name"] = last_name
137
+
103
138
  return cloned_properties
104
139
 
105
140
 
106
- class BasicLeadInformation(BaseModel):
107
- full_name: str = Field(..., description="Full name of the lead")
108
- first_name: str = Field(..., description="First name of the lead")
109
- last_name: str = Field(..., description="Last name of the lead")
110
- email: str = Field(..., description="Email address of the lead")
111
- primary_domain_of_organization: str = Field(..., description="Primary domain of the organization")
112
- job_title: str = Field(..., description="Job Title of the lead")
113
- phone: str = Field(..., description="Phone number of the lead")
114
- headline: str = Field(..., description="Headline of the lead")
115
- lead_location: str = Field(..., description="Location of the lead")
116
- organization_name: str = Field(..., description="Current Company where lead works")
117
- common_connections: int = Field(..., description="Number of common connections with the lead. Default 0")
118
- followers_count: int = Field(..., description="Number of followers of the lead. Default 0")
119
- tenure_in_current_role: str = Field(..., description="Tenure in the current role")
120
- tenure_in_current_company: str = Field(..., description="Tenure in the current company")
121
- connection_degree: str = Field(..., description="Degree of connection with the lead (1st, 2nd, 3rd)")
122
- is_premium_account: bool = Field(..., description="Is the lead a premium account. Default is false.")
123
- country_code: str = Field(..., description="Alpha-2 ISO3166 country code eg. US")
124
-
125
-
126
- def chunkify(items: List[Any], chunk_size: int) -> List[List[Any]]:
127
- """
128
- Splits a list into sublists (chunks) of size `chunk_size`.
129
- """
130
- for i in range(0, len(items), chunk_size):
131
- yield items[i : i + chunk_size]
132
-
133
-
134
141
  # ----------------------------------------------------------------------
135
142
  # LLM-based cleanup for single lead
136
143
  # ----------------------------------------------------------------------
137
- async def get_clean_lead_info_with_llm(lead_info_str: str, tool_config) -> Dict[str, Any]:
144
+ async def get_clean_lead_info_with_llm(lead_info_str: str, tool_config: Optional[dict]) -> Dict[str, Any]:
138
145
  """
139
146
  Takes a JSON string representation of partial lead info,
140
147
  returns a cleaned-up lead dictionary matching BasicLeadInformation fields.
@@ -163,21 +170,48 @@ async def get_clean_lead_info_with_llm(lead_info_str: str, tool_config) -> Dict[
163
170
  return lead_info.model_dump()
164
171
 
165
172
 
173
+ # ----------------------------------------------------------------------
174
+ # Helper: is_personal_email_domain
175
+ # ----------------------------------------------------------------------
176
+ def is_personal_email_domain(domain: str) -> bool:
177
+ """
178
+ Very simple check to see if the domain is one of the common free/personal
179
+ email providers. Could expand this list or integrate a third-party API
180
+ for more accuracy.
181
+ """
182
+ common_free_domains = {
183
+ "gmail.com", "yahoo.com", "hotmail.com", "outlook.com",
184
+ "protonmail.com", "icloud.com", "aol.com", "mail.com",
185
+ "pm.me", "yandex.com", "gmx.com"
186
+ }
187
+ domain = domain.strip().lower()
188
+ return (domain in common_free_domains) or domain.endswith(".edu")
189
+
190
+
166
191
  # ----------------------------------------------------------------------
167
192
  # Main validation & cleanup function
168
193
  # ----------------------------------------------------------------------
169
- async def validate_and_cleanup(cloned_properties: dict, tool_config: Optional[dict] = None) -> dict:
194
+ async def validate_and_cleanup(
195
+ cloned_properties: dict,
196
+ tool_config: Optional[dict] = None,
197
+ use_strict_check: bool = False
198
+ ) -> dict:
170
199
  """
171
200
  Wrapper to validate & normalize various properties in a dictionary.
172
- If `full_name`, `first_name`, or `last_name` is under 3 characters or contains
173
- special characters, we fallback to `get_clean_lead_info_with_llm`.
174
- Otherwise, we use the cheaper `cleanup_user_name`.
201
+
202
+ 1) Clean up/validate typical fields.
203
+ 2) If name fields appear invalid, fallback to LLM-based name inference.
204
+ 3) If 'primary_domain_of_organization' AND 'organization_website' are both empty,
205
+ but there's a valid corporate email, use that as the domain.
206
+ 4) (Optional) Enrich the organization info from the name if needed.
175
207
  """
176
208
 
177
209
  if not isinstance(cloned_properties, dict):
178
210
  return {}
179
211
 
180
- # Normalize typical fields first
212
+ # ------------------------------------------------------------------
213
+ # Step 1: Normalize typical fields
214
+ # ------------------------------------------------------------------
181
215
  cloned_properties["user_linkedin_url"] = normalize_linkedin_url(
182
216
  cloned_properties.get("user_linkedin_url")
183
217
  )
@@ -203,19 +237,19 @@ async def validate_and_cleanup(cloned_properties: dict, tool_config: Optional[di
203
237
  cloned_properties.get("organization_name")
204
238
  )
205
239
 
206
- # Now decide if we need LLM-based cleanup for the name fields
240
+ # ------------------------------------------------------------------
241
+ # Step 2: Basic name-check. If invalid => LLM fallback.
242
+ # ------------------------------------------------------------------
207
243
  def has_special_characters(val: str) -> bool:
208
244
  return bool(re.search(r"[^a-zA-Z0-9\s]", val))
209
245
 
210
246
  def is_invalid_name(val: str) -> bool:
211
- """Invalid if < 3 chars OR contains any special characters."""
212
- return len(val.strip()) < 3 or has_special_characters(val)
247
+ return (len(val.strip()) < 3) or has_special_characters(val)
213
248
 
214
249
  full_name = cloned_properties.get("full_name", "")
215
250
  first_name = cloned_properties.get("first_name", "")
216
251
  last_name = cloned_properties.get("last_name", "")
217
252
 
218
- # If ANY are invalid => Use get_clean_lead_info_with_llm
219
253
  if (
220
254
  is_invalid_name(full_name)
221
255
  or is_invalid_name(first_name)
@@ -223,19 +257,44 @@ async def validate_and_cleanup(cloned_properties: dict, tool_config: Optional[di
223
257
  ):
224
258
  lead_info_str = str(cloned_properties)
225
259
  logger.info(
226
- "Detected invalid name fields, using LLM to infer/correct name fields."
260
+ "Detected invalid name fields. Using LLM to infer/correct name fields."
227
261
  )
228
262
  # Attempt LLM-based cleanup
229
263
  new_lead_info = await get_clean_lead_info_with_llm(lead_info_str, tool_config=tool_config)
230
264
  if new_lead_info:
231
- # Just replace relevant name fields if returned from LLM
232
265
  cloned_properties["full_name"] = new_lead_info.get("full_name", "")
233
266
  cloned_properties["first_name"] = new_lead_info.get("first_name", "")
234
267
  cloned_properties["last_name"] = new_lead_info.get("last_name", "")
235
268
  else:
236
- # Use cheaper logic
269
+ # Use the cheaper logic
237
270
  cloned_properties = cleanup_user_name(cloned_properties)
238
271
 
272
+ # ------------------------------------------------------------------
273
+ # Step 3: If domain & website are empty but there's a corporate email
274
+ # ------------------------------------------------------------------
275
+ # - If email is present, check if domain is personal or corporate
276
+ # - If corporate, set primary_domain_of_organization from email domain
277
+ # ------------------------------------------------------------------
278
+ domain_empty = not cloned_properties.get("primary_domain_of_organization")
279
+ website_empty = not cloned_properties.get("organization_website")
280
+ email = cloned_properties.get("email", "")
281
+
282
+ if domain_empty and website_empty and email:
283
+ # parse domain from email
284
+ extracted_domain = email.split("@")[-1].strip().lower()
285
+ if extracted_domain and (not is_personal_email_domain(extracted_domain)):
286
+ # This is a "corporate" email domain, so use it
287
+ cloned_properties["primary_domain_of_organization"] = extracted_domain
288
+ cloned_properties["organization_website"] = f"https://www.{extracted_domain}"
289
+ logger.info("Set primary_domain_of_organization from corporate email domain.")
290
+
291
+ if domain_empty and not website_empty:
292
+ from urllib.parse import urlparse
293
+ parsed_website = urlparse(cloned_properties["organization_website"])
294
+ possible_domain = parsed_website.netloc.replace("www.", "")
295
+ if possible_domain:
296
+ cloned_properties["primary_domain_of_organization"] = possible_domain
297
+ logger.info("Set primary_domain_of_organization from organization_website domain.")
239
298
  return cloned_properties
240
299
 
241
300
  @assistant_tool
@@ -266,7 +325,7 @@ async def enrich_lead_information(
266
325
 
267
326
  cloned_properties = await enrich_with_provider(cloned_properties, tool_config)
268
327
 
269
- await set_organization_domain(
328
+ await enrich_organization_info_from_name(
270
329
  row=cloned_properties,
271
330
  use_strict_check=use_strict_check,
272
331
  tool_config=tool_config,
@@ -473,7 +532,6 @@ async def enrich_organization_info_from_name(
473
532
  """
474
533
  org_name_key = "organization_name"
475
534
  org_domain_key = "primary_domain_of_organization"
476
- linkedin_url_key = "organization_linkedin_url"
477
535
  website_key = "organization_website"
478
536
 
479
537
  org_name = (row.get(org_name_key) or "").strip()
@@ -489,36 +547,7 @@ async def enrich_organization_info_from_name(
489
547
  # If domain or website is already present, we consider it enriched
490
548
  if row.get(org_domain_key) or row.get(website_key):
491
549
  return
492
-
493
- # Try to fetch additional company info (domain, website, maybe LinkedIn)
494
- company_info = await get_company_domain_from_llm_web_search(
495
- company_name=org_name,
496
- lead_info=row,
497
- location="US",
498
- tool_config=tool_config
499
- )
500
-
501
- if company_info and isinstance(company_info, dict):
502
- # If the LLM found a domain, set it
503
- if company_info.get("primary_domain_of_organization"):
504
- row[org_domain_key] = company_info["primary_domain_of_organization"]
505
-
506
- # If the LLM found an organization website, set it
507
- if company_info.get("organization_website"):
508
- row[website_key] = company_info["organization_website"]
509
-
510
- # If there's a LinkedIn URL from LLM, set it
511
- if company_info.get("organization_linkedin_url") and not row[linkedin_url_key]:
512
- row[linkedin_url_key] = company_info["organization_linkedin_url"]
513
-
514
- if company_info.get("organization_name") and not row[org_name_key]:
515
- row[org_name_key] = company_info["organization_name"]
516
-
517
- # After setting them, run set_organization_domain to finalize or clean up domain
518
- await set_organization_domain(row, use_strict_check, tool_config)
519
- else:
520
- # If LLM didn't return anything, at least set the domain to empty
521
- row[org_domain_key] = ""
550
+ await set_organization_domain(row, use_strict_check, tool_config)
522
551
 
523
552
 
524
553
  async def set_organization_domain(
@@ -567,8 +596,21 @@ async def set_organization_domain(
567
596
  location="US",
568
597
  tool_config=tool_config
569
598
  )
570
- extracted_domain = company_info.get("primary_domain_of_organization", "")
571
- logger.debug("Found domain from Google search: %s", extracted_domain)
599
+ if company_info and isinstance(company_info, dict):
600
+ # If the LLM found a domain, set it
601
+ if company_info.get("primary_domain_of_organization") and not row[org_domain_key]:
602
+ row[org_domain_key] = company_info["primary_domain_of_organization"]
603
+
604
+ # If the LLM found an organization website, set it
605
+ if company_info.get("organization_website") and not row[website_key]:
606
+ row[website_key] = company_info["organization_website"]
607
+
608
+ # If there's a LinkedIn URL from LLM, set it
609
+ if company_info.get("organization_linkedin_url") and not row[linkedin_url_key]:
610
+ row[linkedin_url_key] = company_info["organization_linkedin_url"]
611
+
612
+ if company_info.get("organization_name") and not row[org_name_key]:
613
+ row[org_name_key] = company_info["organization_name"]
572
614
 
573
615
  row[org_domain_key] = extracted_domain or ""
574
616
  logger.debug("Final domain selected: %s", row[org_domain_key])
@@ -726,7 +768,9 @@ async def get_company_domain_from_llm_web_search(
726
768
  {lead_info}
727
769
 
728
770
  Search and gather any domain/website info or LinkedIn details.
729
-
771
+ DO NOT make up information about company.
772
+ Find based on the domain in the leads email if its a corporate email, company name if sepcified to find the company name, website and domain.
773
+
730
774
  **Output**:
731
775
  Return your final output as valid JSON with the following structure:
732
776
  {{
@@ -5,6 +5,7 @@ from dhisana.utils.clean_properties import cleanup_email_context
5
5
  from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
6
6
 
7
7
  def clean_nul_bytes(s: str) -> str:
8
+ s = s.replace('```markdown', '')
8
9
  return s.replace('\x00', '')
9
10
 
10
11
  class LeadResearchInformation(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dhisana
3
- Version: 0.0.1.dev100
3
+ Version: 0.0.1.dev101
4
4
  Summary: A Python SDK for Dhisana AI Platform
5
5
  Home-page: https://github.com/dhisana-ai/dhisana-python-sdk
6
6
  Author: Admin
File without changes
File without changes