dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/schemas/common.py +33 -0
- dhisana/schemas/sales.py +224 -23
- dhisana/utils/add_mapping.py +72 -63
- dhisana/utils/apollo_tools.py +739 -109
- dhisana/utils/built_with_api_tools.py +4 -2
- dhisana/utils/cache_output_tools.py +23 -23
- dhisana/utils/check_email_validity_tools.py +456 -458
- dhisana/utils/check_for_intent_signal.py +1 -2
- dhisana/utils/check_linkedin_url_validity.py +34 -8
- dhisana/utils/clay_tools.py +3 -2
- dhisana/utils/clean_properties.py +3 -1
- dhisana/utils/compose_salesnav_query.py +0 -1
- dhisana/utils/compose_search_query.py +7 -3
- dhisana/utils/composite_tools.py +0 -1
- dhisana/utils/dataframe_tools.py +2 -2
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +585 -85
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +1 -1
- dhisana/utils/g2_tools.py +0 -1
- dhisana/utils/generate_content.py +0 -1
- dhisana/utils/generate_email.py +69 -16
- dhisana/utils/generate_email_response.py +298 -41
- dhisana/utils/generate_flow.py +0 -1
- dhisana/utils/generate_linkedin_connect_message.py +19 -6
- dhisana/utils/generate_linkedin_response_message.py +156 -65
- dhisana/utils/generate_structured_output_internal.py +351 -131
- dhisana/utils/google_custom_search.py +150 -44
- dhisana/utils/google_oauth_tools.py +721 -0
- dhisana/utils/google_workspace_tools.py +391 -25
- dhisana/utils/hubspot_clearbit.py +3 -1
- dhisana/utils/hubspot_crm_tools.py +771 -167
- dhisana/utils/instantly_tools.py +3 -1
- dhisana/utils/lusha_tools.py +10 -7
- dhisana/utils/mailgun_tools.py +150 -0
- dhisana/utils/microsoft365_tools.py +447 -0
- dhisana/utils/openai_assistant_and_file_utils.py +121 -177
- dhisana/utils/openai_helpers.py +19 -16
- dhisana/utils/parse_linkedin_messages_txt.py +2 -3
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +507 -206
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/research_lead.py +121 -68
- dhisana/utils/sales_navigator_crawler.py +1 -6
- dhisana/utils/salesforce_crm_tools.py +323 -50
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +126 -91
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +363 -432
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +576 -0
- dhisana/utils/test_connect.py +1765 -92
- dhisana/utils/trasform_json.py +95 -16
- dhisana/utils/web_download_parse_tools.py +0 -1
- dhisana/utils/zoominfo_tools.py +2 -3
- dhisana/workflow/test.py +1 -1
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
- dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
- dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
|
@@ -1,407 +1,481 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Email enrichment & validation module
|
|
3
|
+
|
|
4
|
+
Adds Findymail support on top of existing ZeroBounce, Hunter and Apollo flows.
|
|
5
|
+
|
|
6
|
+
Providers supported
|
|
7
|
+
-------------------
|
|
8
|
+
* Findymail – email finder (`/search/name`) & verifier (`/verify`)
|
|
9
|
+
* Hunter – email finder (`/email-finder`) & verifier (`/email-verifier`)
|
|
10
|
+
* ZeroBounce – guess format (`/guessformat`) & verifier (`/validate`)
|
|
11
|
+
* Apollo – enrichment fallback (re‑checked with ZeroBounce/Hunter)
|
|
12
|
+
|
|
13
|
+
Priority order
|
|
14
|
+
--------------
|
|
15
|
+
Validation: Findymail → Hunter → ZeroBounce
|
|
16
|
+
Guess/find: Findymail → Hunter → ZeroBounce → Apollo
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
1
21
|
import os
|
|
2
22
|
import json
|
|
3
23
|
import logging
|
|
24
|
+
import re
|
|
4
25
|
from typing import Dict, List, Optional, Any
|
|
26
|
+
|
|
5
27
|
import aiohttp
|
|
6
28
|
|
|
29
|
+
# ────────────────────────────────────────────────────────────────────────────
|
|
30
|
+
# Dhisana utility imports
|
|
31
|
+
# ────────────────────────────────────────────────────────────────────────────
|
|
7
32
|
from dhisana.schemas.sales import HubSpotLeadInformation
|
|
8
33
|
from dhisana.utils.field_validators import validate_and_clean_email
|
|
9
|
-
from dhisana.utils.hubspot_crm_tools import lookup_contact_by_name_and_domain
|
|
10
|
-
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
12
34
|
from dhisana.utils.apollo_tools import enrich_user_info_with_apollo
|
|
13
35
|
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
14
36
|
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
15
37
|
|
|
16
|
-
|
|
17
|
-
# 1. Access Token Helpers
|
|
18
|
-
# --------------------------------------------------------------------------------
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
19
39
|
|
|
20
|
-
|
|
40
|
+
# ===========================================================================
|
|
41
|
+
# 0. FINDYMAIL HELPERS
|
|
42
|
+
# ===========================================================================
|
|
43
|
+
FINDYMAIL_BASE_URL = "https://app.findymail.com/api"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_findymail_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
21
47
|
"""
|
|
22
|
-
|
|
48
|
+
Retrieve the Findymail API key either from tool_config or environment.
|
|
49
|
+
Tool‑config JSON shape expected:
|
|
50
|
+
{
|
|
51
|
+
"name": "findymail",
|
|
52
|
+
"configuration": [
|
|
53
|
+
{"name": "apiKey", "value": "<API_KEY>"}
|
|
54
|
+
]
|
|
55
|
+
}
|
|
23
56
|
"""
|
|
24
57
|
if tool_config:
|
|
25
|
-
|
|
26
|
-
(item for item in tool_config if item.get("name") == "
|
|
58
|
+
fm_cfg = next(
|
|
59
|
+
(item for item in tool_config if item.get("name") == "findymail"), None
|
|
27
60
|
)
|
|
28
|
-
if
|
|
29
|
-
|
|
30
|
-
c["name"]: c["value"]
|
|
31
|
-
for c in zerobounce_config.get("configuration", [])
|
|
32
|
-
if c
|
|
61
|
+
if fm_cfg:
|
|
62
|
+
cfg_map = {
|
|
63
|
+
c["name"]: c["value"] for c in fm_cfg.get("configuration", []) if c
|
|
33
64
|
}
|
|
34
|
-
|
|
65
|
+
api_key = cfg_map.get("apiKey")
|
|
35
66
|
else:
|
|
36
|
-
|
|
37
|
-
ZERO_BOUNCE_API_KEY = None
|
|
67
|
+
api_key = None
|
|
38
68
|
else:
|
|
39
|
-
|
|
40
|
-
|
|
69
|
+
api_key = None
|
|
70
|
+
|
|
71
|
+
api_key = api_key or os.getenv("FINDYMAIL_API_KEY")
|
|
72
|
+
if not api_key:
|
|
73
|
+
logger.warning(
|
|
74
|
+
"Findymail integration is not configured. Please configure the connection to Findymail in Integrations."
|
|
75
|
+
)
|
|
76
|
+
return ""
|
|
77
|
+
return api_key
|
|
41
78
|
|
|
42
|
-
ZERO_BOUNCE_API_KEY = ZERO_BOUNCE_API_KEY or os.getenv("ZERO_BOUNCE_API_KEY")
|
|
43
|
-
if not ZERO_BOUNCE_API_KEY:
|
|
44
|
-
logger.warning("ZERO_BOUNCE_API_KEY not found in config or env.")
|
|
45
|
-
return "" # Return empty so we don't break
|
|
46
79
|
|
|
47
|
-
|
|
80
|
+
# ===========================================================================
|
|
81
|
+
# 1. ACCESS‑TOKEN HELPERS FOR EXISTING PROVIDERS
|
|
82
|
+
# ===========================================================================
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_zero_bounce_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
86
|
+
"""Retrieve ZeroBounce key from config/env."""
|
|
87
|
+
if tool_config:
|
|
88
|
+
zb_cfg = next(
|
|
89
|
+
(item for item in tool_config if item.get("name") == "zerobounce"), None
|
|
90
|
+
)
|
|
91
|
+
if zb_cfg:
|
|
92
|
+
cfg_map = {
|
|
93
|
+
c["name"]: c["value"] for c in zb_cfg.get("configuration", []) if c
|
|
94
|
+
}
|
|
95
|
+
api_key = cfg_map.get("apiKey")
|
|
96
|
+
else:
|
|
97
|
+
api_key = None
|
|
98
|
+
else:
|
|
99
|
+
api_key = None
|
|
100
|
+
|
|
101
|
+
api_key = api_key or os.getenv("ZERO_BOUNCE_API_KEY")
|
|
102
|
+
if not api_key:
|
|
103
|
+
logger.warning(
|
|
104
|
+
"ZeroBounce integration is not configured. Please configure the connection to ZeroBounce in Integrations."
|
|
105
|
+
)
|
|
106
|
+
return ""
|
|
107
|
+
return api_key
|
|
48
108
|
|
|
49
109
|
|
|
50
110
|
def get_hunter_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
51
|
-
"""
|
|
52
|
-
Retrieves the Hunter.io access token from the provided tool configuration or environment.
|
|
53
|
-
"""
|
|
111
|
+
"""Retrieve Hunter.io key from config/env."""
|
|
54
112
|
if tool_config:
|
|
55
|
-
|
|
113
|
+
h_cfg = next(
|
|
56
114
|
(item for item in tool_config if item.get("name") == "hunter"), None
|
|
57
115
|
)
|
|
58
|
-
if
|
|
59
|
-
|
|
60
|
-
c["name"]: c["value"]
|
|
61
|
-
for c in hunter_config.get("configuration", [])
|
|
62
|
-
if c
|
|
116
|
+
if h_cfg:
|
|
117
|
+
cfg_map = {
|
|
118
|
+
c["name"]: c["value"] for c in h_cfg.get("configuration", []) if c
|
|
63
119
|
}
|
|
64
|
-
|
|
120
|
+
api_key = cfg_map.get("apiKey")
|
|
65
121
|
else:
|
|
66
|
-
|
|
67
|
-
HUNTER_API_KEY = None
|
|
122
|
+
api_key = None
|
|
68
123
|
else:
|
|
69
|
-
|
|
70
|
-
HUNTER_API_KEY = None
|
|
124
|
+
api_key = None
|
|
71
125
|
|
|
72
|
-
|
|
73
|
-
if not
|
|
74
|
-
logger.warning(
|
|
75
|
-
|
|
126
|
+
api_key = api_key or os.getenv("HUNTER_API_KEY")
|
|
127
|
+
if not api_key:
|
|
128
|
+
logger.warning(
|
|
129
|
+
"Hunter integration is not configured. Please configure the connection to Hunter in Integrations."
|
|
130
|
+
)
|
|
131
|
+
return ""
|
|
132
|
+
return api_key
|
|
76
133
|
|
|
77
|
-
return HUNTER_API_KEY
|
|
78
134
|
|
|
135
|
+
# ===========================================================================
|
|
136
|
+
# 2. VALIDATION FUNCTIONS
|
|
137
|
+
# ===========================================================================
|
|
79
138
|
|
|
80
|
-
# --------------------------------------------------------------------------------
|
|
81
|
-
# 2. Provider-Specific Validation Functions
|
|
82
|
-
# --------------------------------------------------------------------------------
|
|
83
139
|
|
|
84
|
-
|
|
140
|
+
@assistant_tool
|
|
141
|
+
async def check_email_validity_with_findymail(
|
|
142
|
+
email_id: str,
|
|
143
|
+
tool_config: Optional[List[Dict]] = None,
|
|
144
|
+
) -> Dict[str, Any]:
|
|
85
145
|
"""
|
|
86
|
-
|
|
146
|
+
Validate deliverability using Findymail `/verify` endpoint.
|
|
147
|
+
|
|
148
|
+
Returns
|
|
149
|
+
-------
|
|
150
|
+
{
|
|
151
|
+
"email": str,
|
|
152
|
+
"confidence": "high" | "low",
|
|
153
|
+
"is_valid": bool
|
|
154
|
+
}
|
|
87
155
|
"""
|
|
156
|
+
logger.info("Entering check_email_validity_with_findymail: %s", email_id)
|
|
157
|
+
|
|
158
|
+
if not email_id or not re.fullmatch(r"[^@]+@[^@]+\.[^@]+", email_id):
|
|
159
|
+
return {"email": email_id, "confidence": "low", "is_valid": False}
|
|
160
|
+
|
|
161
|
+
cache_key = f"findymail:{email_id}"
|
|
162
|
+
cached = retrieve_output("findymail_validate", cache_key)
|
|
163
|
+
if cached:
|
|
164
|
+
return json.loads(cached[0])
|
|
165
|
+
|
|
166
|
+
api_key = get_findymail_access_token(tool_config)
|
|
167
|
+
if not api_key:
|
|
168
|
+
return {"email": email_id, "confidence": "low", "is_valid": False}
|
|
169
|
+
|
|
170
|
+
url = f"{FINDYMAIL_BASE_URL}/verify"
|
|
171
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
async with aiohttp.ClientSession() as session:
|
|
175
|
+
async with session.post(url, json={"email": email_id}, headers=headers) as r:
|
|
176
|
+
if r.status != 200:
|
|
177
|
+
logger.warning("[Findymail] verify non‑200: %s", r.status)
|
|
178
|
+
result = {"email": email_id, "confidence": "low", "is_valid": False}
|
|
179
|
+
else:
|
|
180
|
+
data = await r.json()
|
|
181
|
+
verified = bool(data.get("verified") or data.get("result") == "verified")
|
|
182
|
+
result = {
|
|
183
|
+
"email": email_id,
|
|
184
|
+
"confidence": "high" if verified else "low",
|
|
185
|
+
"is_valid": verified,
|
|
186
|
+
}
|
|
187
|
+
except Exception as ex:
|
|
188
|
+
logger.exception("[Findymail] verify exception: %s", ex)
|
|
189
|
+
result = {"email": email_id, "confidence": "low", "is_valid": False}
|
|
190
|
+
|
|
191
|
+
cache_output("findymail_validate", cache_key, [json.dumps(result)])
|
|
192
|
+
return result
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# ───── ZeroBounce mapping/validation ───────────────────────────────────────
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _map_zerobounce_status_to_confidence(status: str) -> str:
|
|
88
199
|
status = status.lower()
|
|
89
200
|
if status == "valid":
|
|
90
201
|
return "high"
|
|
91
|
-
|
|
202
|
+
if status in ("catch-all", "unknown"):
|
|
92
203
|
return "medium"
|
|
93
|
-
elif status in ["spamtrap", "invalid"]:
|
|
94
|
-
return "low"
|
|
95
204
|
return "low"
|
|
96
205
|
|
|
97
206
|
|
|
98
207
|
@assistant_tool
|
|
99
208
|
async def check_email_validity_with_zero_bounce(
|
|
100
209
|
email_id: str,
|
|
101
|
-
tool_config: Optional[List[Dict]] = None
|
|
210
|
+
tool_config: Optional[List[Dict]] = None,
|
|
102
211
|
) -> Dict[str, Any]:
|
|
103
|
-
""
|
|
104
|
-
Validate a single email address using the ZeroBounce API, with caching.
|
|
105
|
-
Returns: {
|
|
106
|
-
"email": str,
|
|
107
|
-
"confidence": "high"|"medium"|"low",
|
|
108
|
-
"is_valid": bool
|
|
109
|
-
}
|
|
110
|
-
"""
|
|
111
|
-
logger.info("Entering check_email_validity_with_zero_bounce for email_id: %s", email_id)
|
|
112
|
-
import re
|
|
212
|
+
logger.info("Entering check_email_validity_with_zero_bounce: %s", email_id)
|
|
113
213
|
if not email_id or not re.fullmatch(r"[^@]+@[^@]+\.[^@]+", email_id):
|
|
114
|
-
return {
|
|
115
|
-
"email": email_id,
|
|
116
|
-
"confidence": "low",
|
|
117
|
-
"is_valid": False
|
|
118
|
-
}
|
|
214
|
+
return {"email": email_id, "confidence": "low", "is_valid": False}
|
|
119
215
|
|
|
120
|
-
cache_key = f"{email_id}"
|
|
121
|
-
|
|
122
|
-
if
|
|
123
|
-
|
|
124
|
-
if not cached_response:
|
|
125
|
-
return {
|
|
126
|
-
"email": email_id,
|
|
127
|
-
"confidence": "low",
|
|
128
|
-
"is_valid": False
|
|
129
|
-
}
|
|
130
|
-
return json.loads(cached_response[0])
|
|
131
|
-
|
|
132
|
-
# Get API key
|
|
133
|
-
ZERO_BOUNCE_API_KEY = get_zero_bounce_access_token(tool_config)
|
|
134
|
-
if not ZERO_BOUNCE_API_KEY:
|
|
135
|
-
logger.warning("No ZeroBounce API key available. Returning low confidence.")
|
|
136
|
-
return {
|
|
137
|
-
"email": email_id,
|
|
138
|
-
"confidence": "low",
|
|
139
|
-
"is_valid": False
|
|
140
|
-
}
|
|
216
|
+
cache_key = f"zerobounce:{email_id}"
|
|
217
|
+
cached = retrieve_output("zerobounce_validate", cache_key)
|
|
218
|
+
if cached:
|
|
219
|
+
return json.loads(cached[0])
|
|
141
220
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
)
|
|
221
|
+
api_key = get_zero_bounce_access_token(tool_config)
|
|
222
|
+
if not api_key:
|
|
223
|
+
return {"email": email_id, "confidence": "low", "is_valid": False}
|
|
146
224
|
|
|
225
|
+
url = f"https://api.zerobounce.net/v2/validate?api_key={api_key}&email={email_id}"
|
|
147
226
|
try:
|
|
148
227
|
async with aiohttp.ClientSession() as session:
|
|
149
|
-
async with session.get(url) as
|
|
150
|
-
if
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
|
|
228
|
+
async with session.get(url) as r:
|
|
229
|
+
if r.status != 200:
|
|
230
|
+
logger.warning("[ZeroBounce] non‑200: %s", r.status)
|
|
231
|
+
result = {"email": email_id, "confidence": "low", "is_valid": False}
|
|
232
|
+
else:
|
|
233
|
+
data = await r.json()
|
|
234
|
+
conf = _map_zerobounce_status_to_confidence(data.get("status", ""))
|
|
235
|
+
result = {
|
|
157
236
|
"email": email_id,
|
|
158
|
-
"confidence":
|
|
159
|
-
"is_valid":
|
|
237
|
+
"confidence": conf,
|
|
238
|
+
"is_valid": conf == "high",
|
|
160
239
|
}
|
|
161
|
-
cache_output("zerobounce_validate", cache_key, [json.dumps(final_response)])
|
|
162
|
-
return final_response
|
|
163
|
-
|
|
164
|
-
result = await response.json()
|
|
165
240
|
except Exception as ex:
|
|
166
|
-
logger.
|
|
167
|
-
|
|
168
|
-
"email": email_id,
|
|
169
|
-
"confidence": "low",
|
|
170
|
-
"is_valid": False
|
|
171
|
-
}
|
|
241
|
+
logger.exception("[ZeroBounce] validate exception: %s", ex)
|
|
242
|
+
result = {"email": email_id, "confidence": "low", "is_valid": False}
|
|
172
243
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
is_valid = (confidence == "high")
|
|
244
|
+
cache_output("zerobounce_validate", cache_key, [json.dumps(result)])
|
|
245
|
+
return result
|
|
176
246
|
|
|
177
|
-
final_response = {
|
|
178
|
-
"email": email_id,
|
|
179
|
-
"confidence": confidence,
|
|
180
|
-
"is_valid": is_valid
|
|
181
|
-
}
|
|
182
|
-
cache_output("zerobounce_validate", cache_key, [json.dumps(final_response)])
|
|
183
|
-
logger.info("Exiting check_email_validity_with_zero_bounce.")
|
|
184
|
-
return final_response
|
|
185
247
|
|
|
248
|
+
# ───── Hunter mapping/validation ───────────────────────────────────────────
|
|
186
249
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
"""
|
|
192
|
-
val = hunter_result.lower()
|
|
193
|
-
if val == "deliverable":
|
|
250
|
+
|
|
251
|
+
def _map_hunter_status_to_confidence(status: str) -> str:
|
|
252
|
+
status = status.lower()
|
|
253
|
+
if status == "deliverable":
|
|
194
254
|
return "high"
|
|
195
|
-
|
|
255
|
+
if status in ("unknown", "accept_all"):
|
|
196
256
|
return "medium"
|
|
197
|
-
elif val == "undeliverable":
|
|
198
|
-
return "low"
|
|
199
257
|
return "low"
|
|
200
258
|
|
|
201
259
|
|
|
202
260
|
@assistant_tool
|
|
203
261
|
async def check_email_validity_with_hunter(
|
|
204
262
|
email_id: str,
|
|
205
|
-
tool_config: Optional[List[Dict]] = None
|
|
263
|
+
tool_config: Optional[List[Dict]] = None,
|
|
206
264
|
) -> Dict[str, Any]:
|
|
207
|
-
""
|
|
208
|
-
Validate a single email address using Hunter.io's email verification API.
|
|
209
|
-
Returns: {
|
|
210
|
-
"email": str,
|
|
211
|
-
"confidence": "high"|"medium"|"low",
|
|
212
|
-
"is_valid": bool
|
|
213
|
-
}
|
|
214
|
-
"""
|
|
215
|
-
logger.info("Entering check_email_validity_with_hunter for email_id: %s", email_id)
|
|
216
|
-
import re
|
|
265
|
+
logger.info("Entering check_email_validity_with_hunter: %s", email_id)
|
|
217
266
|
if not email_id or not re.fullmatch(r"[^@]+@[^@]+\.[^@]+", email_id):
|
|
218
|
-
return {
|
|
219
|
-
"email": email_id,
|
|
220
|
-
"confidence": "low",
|
|
221
|
-
"is_valid": False
|
|
222
|
-
}
|
|
267
|
+
return {"email": email_id, "confidence": "low", "is_valid": False}
|
|
223
268
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
return
|
|
228
|
-
"email": email_id,
|
|
229
|
-
"confidence": "low",
|
|
230
|
-
"is_valid": False
|
|
231
|
-
}
|
|
269
|
+
cache_key = f"hunter:{email_id}"
|
|
270
|
+
cached = retrieve_output("hunter_validate", cache_key)
|
|
271
|
+
if cached:
|
|
272
|
+
return json.loads(cached[0])
|
|
232
273
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
)
|
|
274
|
+
api_key = get_hunter_access_token(tool_config)
|
|
275
|
+
if not api_key:
|
|
276
|
+
return {"email": email_id, "confidence": "low", "is_valid": False}
|
|
237
277
|
|
|
278
|
+
url = f"https://api.hunter.io/v2/email-verifier?email={email_id}&api_key={api_key}"
|
|
238
279
|
try:
|
|
239
280
|
async with aiohttp.ClientSession() as session:
|
|
240
|
-
async with session.get(url) as
|
|
241
|
-
if
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
281
|
+
async with session.get(url) as r:
|
|
282
|
+
if r.status != 200:
|
|
283
|
+
logger.warning("[Hunter] non‑200: %s", r.status)
|
|
284
|
+
result = {"email": email_id, "confidence": "low", "is_valid": False}
|
|
285
|
+
else:
|
|
286
|
+
data = await r.json()
|
|
287
|
+
res = data.get("data", {}).get("result", "")
|
|
288
|
+
conf = _map_hunter_status_to_confidence(res)
|
|
289
|
+
result = {
|
|
245
290
|
"email": email_id,
|
|
246
|
-
"confidence":
|
|
247
|
-
"is_valid":
|
|
291
|
+
"confidence": conf,
|
|
292
|
+
"is_valid": conf == "high",
|
|
248
293
|
}
|
|
294
|
+
except Exception as ex:
|
|
295
|
+
logger.exception("[Hunter] validate exception: %s", ex)
|
|
296
|
+
result = {"email": email_id, "confidence": "low", "is_valid": False}
|
|
297
|
+
|
|
298
|
+
cache_output("hunter_validate", cache_key, [json.dumps(result)])
|
|
299
|
+
return result
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# ===========================================================================
|
|
303
|
+
# 3. GUESS / FIND FUNCTIONS
|
|
304
|
+
# ===========================================================================
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
@assistant_tool
|
|
308
|
+
async def guess_email_with_findymail(
|
|
309
|
+
first_name: str,
|
|
310
|
+
last_name: str,
|
|
311
|
+
domain: str,
|
|
312
|
+
user_linkedin_url: Optional[str] = None,
|
|
313
|
+
middle_name: Optional[str] = None,
|
|
314
|
+
tool_config: Optional[List[Dict]] = None,
|
|
315
|
+
) -> Dict[str, Any]:
|
|
316
|
+
"""Use Findymail to guess an email.
|
|
317
|
+
|
|
318
|
+
If ``user_linkedin_url`` is provided, the function queries ``/search/linkedin``.
|
|
319
|
+
Otherwise it falls back to ``/search/name`` with ``first_name``/``last_name``
|
|
320
|
+
and ``domain``. Only verified emails are returned and therefore considered
|
|
321
|
+
high confidence.
|
|
322
|
+
"""
|
|
323
|
+
logger.info("Entering guess_email_with_findymail")
|
|
324
|
+
|
|
325
|
+
if user_linkedin_url:
|
|
326
|
+
cache_key = f"findymail:{user_linkedin_url}"
|
|
327
|
+
else:
|
|
328
|
+
if not first_name or not last_name or not domain:
|
|
329
|
+
return {"email": "", "email_confidence": "low"}
|
|
330
|
+
cache_key = f"findymail:{first_name}_{last_name}_{domain}"
|
|
331
|
+
|
|
332
|
+
api_key = get_findymail_access_token(tool_config)
|
|
333
|
+
if not api_key:
|
|
334
|
+
return {"email": "", "email_confidence": "low"}
|
|
335
|
+
|
|
336
|
+
cached = retrieve_output("findymail_guess", cache_key)
|
|
337
|
+
if cached:
|
|
338
|
+
return json.loads(cached[0])
|
|
339
|
+
|
|
340
|
+
if user_linkedin_url:
|
|
341
|
+
url = f"{FINDYMAIL_BASE_URL}/search/linkedin"
|
|
342
|
+
payload = {"linkedin_url": user_linkedin_url, "webhook_url": None}
|
|
343
|
+
else:
|
|
344
|
+
url = f"{FINDYMAIL_BASE_URL}/search/name"
|
|
345
|
+
full_name = " ".join(filter(None, [first_name, middle_name, last_name]))
|
|
346
|
+
payload = {"name": full_name, "domain": domain}
|
|
347
|
+
|
|
348
|
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
|
249
349
|
|
|
250
|
-
|
|
350
|
+
try:
|
|
351
|
+
async with aiohttp.ClientSession() as session:
|
|
352
|
+
async with session.post(url, headers=headers, json=payload) as r:
|
|
353
|
+
if r.status != 200:
|
|
354
|
+
logger.warning("[Findymail] search non‑200: %s", r.status)
|
|
355
|
+
result = {"email": "", "email_confidence": "low"}
|
|
356
|
+
else:
|
|
357
|
+
data = await r.json()
|
|
358
|
+
contact = data.get("contact")
|
|
359
|
+
found = contact.get("email", "") if contact else ""
|
|
360
|
+
if found:
|
|
361
|
+
result = {
|
|
362
|
+
"email": found,
|
|
363
|
+
"email_confidence": "high",
|
|
364
|
+
"contact_info": json.dumps(contact) if contact else "",
|
|
365
|
+
}
|
|
366
|
+
else:
|
|
367
|
+
result = {"email": "", "email_confidence": "low"}
|
|
251
368
|
except Exception as ex:
|
|
252
|
-
logger.
|
|
253
|
-
|
|
254
|
-
"email": email_id,
|
|
255
|
-
"confidence": "low",
|
|
256
|
-
"is_valid": False
|
|
257
|
-
}
|
|
369
|
+
logger.exception("[Findymail] search exception: %s", ex)
|
|
370
|
+
result = {"email": "", "email_confidence": "low"}
|
|
258
371
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
confidence = _map_hunter_status_to_confidence(verifier_result)
|
|
262
|
-
is_valid = (confidence == "high")
|
|
372
|
+
cache_output("findymail_guess", cache_key, [json.dumps(result)])
|
|
373
|
+
return result
|
|
263
374
|
|
|
264
|
-
final_response = {
|
|
265
|
-
"email": email_id,
|
|
266
|
-
"confidence": confidence,
|
|
267
|
-
"is_valid": is_valid
|
|
268
|
-
}
|
|
269
|
-
logger.info("Exiting check_email_validity_with_hunter.")
|
|
270
|
-
return final_response
|
|
271
375
|
|
|
376
|
+
# ───── ZeroBounce guess ────────────────────────────────────────────────────
|
|
272
377
|
|
|
273
|
-
# --------------------------------------------------------------------------------
|
|
274
|
-
# 3. Provider-Specific Guessing Functions
|
|
275
|
-
# --------------------------------------------------------------------------------
|
|
276
378
|
|
|
277
379
|
@assistant_tool
|
|
278
380
|
async def guess_email_with_zero_bounce(
|
|
279
381
|
first_name: str,
|
|
280
382
|
last_name: str,
|
|
281
383
|
domain: str,
|
|
282
|
-
user_linkedin_url: Optional[str] = None, #
|
|
384
|
+
user_linkedin_url: Optional[str] = None, # unused
|
|
283
385
|
middle_name: Optional[str] = None,
|
|
284
|
-
tool_config: Optional[List[Dict]] = None
|
|
386
|
+
tool_config: Optional[List[Dict]] = None,
|
|
285
387
|
) -> Dict[str, Any]:
|
|
286
|
-
""
|
|
287
|
-
Attempt to guess the email using ZeroBounce's guessFormat endpoint, with caching.
|
|
288
|
-
We assume the API returns an "email" and an "email_confidence" field.
|
|
289
|
-
"""
|
|
290
|
-
logger.info("Entering guess_email_with_zero_bounce.")
|
|
388
|
+
logger.info("Entering guess_email_with_zero_bounce")
|
|
291
389
|
if not first_name or not last_name or not domain:
|
|
292
|
-
logger.error("Required parameters first_name, last_name, and domain must be provided.")
|
|
293
390
|
return {"email": "", "email_confidence": "low"}
|
|
294
391
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
if cached_response is not None:
|
|
298
|
-
logger.info("Cache hit for ZeroBounce guess.")
|
|
299
|
-
return json.loads(cached_response[0]) if cached_response else {"email": "", "email_confidence": "low"}
|
|
300
|
-
|
|
301
|
-
ZERO_BOUNCE_API_KEY = get_zero_bounce_access_token(tool_config)
|
|
302
|
-
if not ZERO_BOUNCE_API_KEY:
|
|
303
|
-
logger.warning("No ZeroBounce API key available. Returning low confidence guess.")
|
|
392
|
+
api_key = get_zero_bounce_access_token(tool_config)
|
|
393
|
+
if not api_key:
|
|
304
394
|
return {"email": "", "email_confidence": "low"}
|
|
305
395
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
396
|
+
cache_key = f"zerobounce:guess:{first_name}_{last_name}_{domain}_{middle_name or ''}"
|
|
397
|
+
cached = retrieve_output("zerobounce_guess", cache_key)
|
|
398
|
+
if cached:
|
|
399
|
+
return json.loads(cached[0])
|
|
400
|
+
|
|
401
|
+
url = (
|
|
402
|
+
"https://api.zerobounce.net/v2/guessformat"
|
|
403
|
+
f"?api_key={api_key}&domain={domain}"
|
|
404
|
+
f"&first_name={first_name}&middle_name={middle_name or ''}&last_name={last_name}"
|
|
313
405
|
)
|
|
314
|
-
url = base_url + query_params
|
|
315
406
|
|
|
316
407
|
try:
|
|
317
408
|
async with aiohttp.ClientSession() as session:
|
|
318
|
-
async with session.get(url) as
|
|
319
|
-
if
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
409
|
+
async with session.get(url) as r:
|
|
410
|
+
if r.status != 200:
|
|
411
|
+
logger.warning("[ZeroBounce] guessformat non‑200: %s", r.status)
|
|
412
|
+
result = {"email": "", "email_confidence": "low"}
|
|
413
|
+
else:
|
|
414
|
+
data = await r.json()
|
|
415
|
+
if "email_confidence" not in data:
|
|
416
|
+
data["email_confidence"] = (
|
|
417
|
+
"high" if data.get("email") else "low"
|
|
418
|
+
)
|
|
419
|
+
result = data
|
|
324
420
|
except Exception as ex:
|
|
325
|
-
logger.
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
# If the API doesn't provide "email_confidence", you can supply a fallback:
|
|
329
|
-
if "email_confidence" not in result:
|
|
330
|
-
result["email_confidence"] = "medium" if result.get("email") else "low"
|
|
421
|
+
logger.exception("[ZeroBounce] guess exception: %s", ex)
|
|
422
|
+
result = {"email": "", "email_confidence": "low"}
|
|
331
423
|
|
|
332
424
|
cache_output("zerobounce_guess", cache_key, [json.dumps(result)])
|
|
333
|
-
logger.info("Exiting guess_email_with_zero_bounce.")
|
|
334
425
|
return result
|
|
335
426
|
|
|
336
427
|
|
|
428
|
+
# ───── Hunter guess ────────────────────────────────────────────────────────
|
|
429
|
+
|
|
430
|
+
|
|
337
431
|
@assistant_tool
|
|
338
432
|
async def guess_email_with_hunter(
|
|
339
433
|
first_name: str,
|
|
340
434
|
last_name: str,
|
|
341
435
|
domain: str,
|
|
342
|
-
user_linkedin_url: Optional[str] = None, #
|
|
436
|
+
user_linkedin_url: Optional[str] = None, # unused
|
|
343
437
|
middle_name: Optional[str] = None,
|
|
344
|
-
tool_config: Optional[List[Dict]] = None
|
|
438
|
+
tool_config: Optional[List[Dict]] = None,
|
|
345
439
|
) -> Dict[str, Any]:
|
|
346
|
-
""
|
|
347
|
-
Attempt to guess the email using Hunter.io's email-finder endpoint.
|
|
348
|
-
We'll interpret the "score" (0-100) from the response and map it to "email_confidence".
|
|
349
|
-
"""
|
|
350
|
-
logger.info("Entering guess_email_with_hunter.")
|
|
440
|
+
logger.info("Entering guess_email_with_hunter")
|
|
351
441
|
if not first_name or not last_name or not domain:
|
|
352
|
-
logger.error("Required parameters first_name, last_name, and domain must be provided.")
|
|
353
442
|
return {"email": "", "email_confidence": "low"}
|
|
354
443
|
|
|
355
|
-
|
|
356
|
-
if not
|
|
357
|
-
logger.warning("No Hunter API key available. Returning low-confidence guess.")
|
|
444
|
+
api_key = get_hunter_access_token(tool_config)
|
|
445
|
+
if not api_key:
|
|
358
446
|
return {"email": "", "email_confidence": "low"}
|
|
359
447
|
|
|
360
448
|
url = (
|
|
361
449
|
"https://api.hunter.io/v2/email-finder"
|
|
362
|
-
f"?domain={domain}"
|
|
363
|
-
f"&
|
|
364
|
-
f"&last_name={last_name}"
|
|
365
|
-
f"&api_key={HUNTER_API_KEY}"
|
|
450
|
+
f"?domain={domain}&first_name={first_name}&last_name={last_name}"
|
|
451
|
+
f"&api_key={api_key}"
|
|
366
452
|
)
|
|
367
|
-
# If needed, you could pass middle_name, e.g. "&middle_name={middle_name}"
|
|
368
453
|
|
|
369
454
|
try:
|
|
370
455
|
async with aiohttp.ClientSession() as session:
|
|
371
|
-
async with session.get(url) as
|
|
372
|
-
if
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
456
|
+
async with session.get(url) as r:
|
|
457
|
+
if r.status != 200:
|
|
458
|
+
logger.warning("[Hunter] email-finder non‑200: %s", r.status)
|
|
459
|
+
result = {"email": "", "email_confidence": "low"}
|
|
460
|
+
else:
|
|
461
|
+
data = await r.json()
|
|
462
|
+
email = data.get("data", {}).get("email", "")
|
|
463
|
+
score = float(data.get("data", {}).get("score", 0) or 0)
|
|
464
|
+
if score >= 80:
|
|
465
|
+
conf = "high"
|
|
466
|
+
elif score >= 50:
|
|
467
|
+
conf = "medium"
|
|
468
|
+
else:
|
|
469
|
+
conf = "low"
|
|
470
|
+
result = {"email": email, "email_confidence": conf}
|
|
378
471
|
except Exception as ex:
|
|
379
|
-
logger.
|
|
380
|
-
|
|
472
|
+
logger.exception("[Hunter] guess exception: %s", ex)
|
|
473
|
+
result = {"email": "", "email_confidence": "low"}
|
|
381
474
|
|
|
382
|
-
|
|
383
|
-
found_email = data.get("email", "")
|
|
475
|
+
return result
|
|
384
476
|
|
|
385
|
-
# Safely parse numeric score
|
|
386
|
-
raw_score = data.get("score") # might be int, float, None, or not present
|
|
387
|
-
try:
|
|
388
|
-
score = float(raw_score) if raw_score is not None else 0.0
|
|
389
|
-
except (ValueError, TypeError):
|
|
390
|
-
score = 0.0
|
|
391
|
-
|
|
392
|
-
if score >= 80:
|
|
393
|
-
confidence = "high"
|
|
394
|
-
elif score >= 50:
|
|
395
|
-
confidence = "medium"
|
|
396
|
-
else:
|
|
397
|
-
confidence = "low"
|
|
398
477
|
|
|
399
|
-
|
|
400
|
-
"email": found_email,
|
|
401
|
-
"email_confidence": confidence
|
|
402
|
-
}
|
|
403
|
-
logger.info("Exiting guess_email_with_hunter.")
|
|
404
|
-
return output
|
|
478
|
+
# ───── Apollo guess (fallback) ─────────────────────────────────────────────
|
|
405
479
|
|
|
406
480
|
|
|
407
481
|
@assistant_tool
|
|
@@ -411,115 +485,87 @@ async def guess_email_with_apollo(
|
|
|
411
485
|
domain: str,
|
|
412
486
|
user_linkedin_url: Optional[str] = None,
|
|
413
487
|
middle_name: Optional[str] = None,
|
|
414
|
-
tool_config: Optional[List[Dict]] = None
|
|
488
|
+
tool_config: Optional[List[Dict]] = None,
|
|
415
489
|
) -> Dict[str, Any]:
|
|
416
|
-
""
|
|
417
|
-
Attempt to guess/enrich an email using Apollo, then re-check with ZeroBounce
|
|
418
|
-
to ensure we have acceptable confidence.
|
|
419
|
-
"""
|
|
420
|
-
logger.info("Entering guess_email_with_apollo.")
|
|
490
|
+
logger.info("Entering guess_email_with_apollo")
|
|
421
491
|
if not first_name or not last_name or not domain:
|
|
422
|
-
logger.error("Required parameters first_name, last_name, and domain must be provided.")
|
|
423
492
|
return {"email": "", "email_confidence": "low"}
|
|
424
493
|
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
494
|
+
apollo_cfg = next(
|
|
495
|
+
(item for item in tool_config or [] if item.get("name") == "apollo"), None
|
|
496
|
+
)
|
|
497
|
+
if not apollo_cfg:
|
|
429
498
|
return {"email": "", "email_confidence": "low"}
|
|
430
499
|
|
|
431
|
-
|
|
500
|
+
input_lead = {
|
|
432
501
|
"first_name": first_name,
|
|
433
502
|
"last_name": last_name,
|
|
434
503
|
"primary_domain_of_organization": domain,
|
|
435
|
-
"user_linkedin_url": user_linkedin_url or ""
|
|
504
|
+
"user_linkedin_url": user_linkedin_url or "",
|
|
436
505
|
}
|
|
437
506
|
|
|
438
507
|
try:
|
|
439
|
-
|
|
440
|
-
response = await enrich_user_info_with_apollo(input_lead_info, tool_config)
|
|
508
|
+
enriched = await enrich_user_info_with_apollo(input_lead, tool_config)
|
|
441
509
|
except Exception as ex:
|
|
442
|
-
logger.
|
|
443
|
-
|
|
510
|
+
logger.exception("[Apollo] enrich exception: %s", ex)
|
|
511
|
+
enriched = {}
|
|
444
512
|
|
|
445
|
-
apollo_email =
|
|
513
|
+
apollo_email = enriched.get("email", "")
|
|
446
514
|
if not apollo_email:
|
|
447
|
-
# No email found
|
|
448
515
|
return {"email": "", "email_confidence": "low"}
|
|
449
516
|
|
|
450
|
-
#
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
return {
|
|
456
|
-
"email": apollo_email,
|
|
457
|
-
"email_confidence": zb_conf
|
|
458
|
-
}
|
|
459
|
-
else:
|
|
460
|
-
return {
|
|
461
|
-
"email": apollo_email,
|
|
462
|
-
"email_confidence": "low"
|
|
463
|
-
}
|
|
517
|
+
# quick re‑check with Hunter
|
|
518
|
+
validation = await check_email_validity_with_hunter(apollo_email, tool_config)
|
|
519
|
+
conf = validation.get("confidence", "low")
|
|
520
|
+
return {"email": apollo_email, "email_confidence": conf}
|
|
521
|
+
|
|
464
522
|
|
|
523
|
+
# ─── Provider map
|
|
465
524
|
GUESS_EMAIL_TOOL_MAP = {
|
|
466
|
-
"
|
|
525
|
+
"findymail": guess_email_with_findymail,
|
|
467
526
|
"hunter": guess_email_with_hunter,
|
|
527
|
+
"zerobounce": guess_email_with_zero_bounce,
|
|
468
528
|
"apollo": guess_email_with_apollo,
|
|
469
529
|
}
|
|
470
530
|
|
|
531
|
+
# ===========================================================================
|
|
532
|
+
# 4. AGGREGATORS
|
|
533
|
+
# ===========================================================================
|
|
471
534
|
|
|
472
|
-
# --------------------------------------------------------------------------------
|
|
473
|
-
# 4. Aggregators (High-Level Validation + Guess)
|
|
474
|
-
# --------------------------------------------------------------------------------
|
|
475
535
|
|
|
476
536
|
@assistant_tool
|
|
477
537
|
async def check_email_validity(
|
|
478
538
|
email_id: str,
|
|
479
|
-
tool_config: Optional[List[Dict]] = None
|
|
539
|
+
tool_config: Optional[List[Dict]] = None,
|
|
480
540
|
) -> Dict[str, Any]:
|
|
481
541
|
"""
|
|
482
|
-
Validate
|
|
483
|
-
|
|
484
|
-
2) Hunter
|
|
485
|
-
If a provider returns high confidence, we stop. Otherwise we continue.
|
|
542
|
+
Validate by provider priority:
|
|
543
|
+
1) Findymail 2) Hunter 3) ZeroBounce
|
|
486
544
|
"""
|
|
487
|
-
logger.info("Entering check_email_validity
|
|
545
|
+
logger.info("Entering check_email_validity")
|
|
488
546
|
if not tool_config:
|
|
489
|
-
|
|
490
|
-
return {
|
|
491
|
-
"email": email_id,
|
|
492
|
-
"confidence": "low",
|
|
493
|
-
"is_valid": False
|
|
494
|
-
}
|
|
547
|
+
return {"email": email_id, "confidence": "low", "is_valid": False}
|
|
495
548
|
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
priority = ["zerobounce", "hunter"]
|
|
549
|
+
names = [c.get("name") for c in tool_config if c.get("name")]
|
|
550
|
+
priority = ["findymail", "hunter", "zerobounce"]
|
|
499
551
|
|
|
500
|
-
|
|
501
|
-
"email": email_id,
|
|
502
|
-
"confidence": "low",
|
|
503
|
-
"is_valid": False
|
|
504
|
-
}
|
|
552
|
+
result: Dict[str, Any] = {"email": email_id, "confidence": "low", "is_valid": False}
|
|
505
553
|
|
|
506
554
|
for provider in priority:
|
|
507
|
-
if provider in
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
555
|
+
if provider not in names:
|
|
556
|
+
continue
|
|
557
|
+
if provider == "findymail":
|
|
558
|
+
result = await check_email_validity_with_findymail(email_id, tool_config)
|
|
559
|
+
elif provider == "hunter":
|
|
560
|
+
result = await check_email_validity_with_hunter(email_id, tool_config)
|
|
561
|
+
else:
|
|
562
|
+
result = await check_email_validity_with_zero_bounce(email_id, tool_config)
|
|
514
563
|
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
if result["confidence"] == "high":
|
|
518
|
-
logger.info(f"{provider} gave high confidence. Stopping further checks.")
|
|
519
|
-
break
|
|
564
|
+
if result["confidence"] in ("high", "low"):
|
|
565
|
+
break
|
|
520
566
|
|
|
521
|
-
logger.info("Exiting check_email_validity
|
|
522
|
-
return
|
|
567
|
+
logger.info("Exiting check_email_validity with %s", result)
|
|
568
|
+
return result
|
|
523
569
|
|
|
524
570
|
|
|
525
571
|
@assistant_tool
|
|
@@ -529,191 +575,143 @@ async def guess_email(
|
|
|
529
575
|
domain: str,
|
|
530
576
|
middle_name: Optional[str] = None,
|
|
531
577
|
user_linkedin_url: Optional[str] = None,
|
|
532
|
-
tool_config: Optional[List[Dict]] = None
|
|
578
|
+
tool_config: Optional[List[Dict]] = None,
|
|
533
579
|
) -> Dict[str, Any]:
|
|
534
580
|
"""
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
1) Hunter
|
|
538
|
-
2) ZeroBounce
|
|
539
|
-
3) Apollo (last resort, re-check with ZeroBounce)
|
|
540
|
-
If the first guess is "high" confidence, we stop. Otherwise move on, etc.
|
|
581
|
+
Guess by provider priority:
|
|
582
|
+
1) Findymail 2) Hunter 3) ZeroBounce 4) Apollo
|
|
541
583
|
"""
|
|
542
|
-
logger.info("Entering guess_email
|
|
584
|
+
logger.info("Entering guess_email")
|
|
543
585
|
if not tool_config:
|
|
544
|
-
logger.warning("No tool configuration found; returning low-confidence guess.")
|
|
545
586
|
return {"email": "", "email_confidence": "low"}
|
|
546
587
|
|
|
547
|
-
|
|
548
|
-
priority = ["zerobounce",
|
|
549
|
-
|
|
588
|
+
names = [c.get("name") for c in tool_config if c.get("name")]
|
|
589
|
+
priority = ["findymail", "hunter", "zerobounce", "apollo"]
|
|
590
|
+
|
|
591
|
+
result: Dict[str, Any] = {"email": "", "email_confidence": "low"}
|
|
550
592
|
|
|
551
593
|
for provider in priority:
|
|
552
|
-
if provider in
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
# --------------------------------------------------------------------------------
|
|
576
|
-
# 5. Orchestrating everything in a single function
|
|
577
|
-
# --------------------------------------------------------------------------------
|
|
594
|
+
if provider not in names:
|
|
595
|
+
continue
|
|
596
|
+
guess_fn = GUESS_EMAIL_TOOL_MAP[provider]
|
|
597
|
+
result = await guess_fn(
|
|
598
|
+
first_name,
|
|
599
|
+
last_name,
|
|
600
|
+
domain,
|
|
601
|
+
user_linkedin_url,
|
|
602
|
+
middle_name,
|
|
603
|
+
tool_config,
|
|
604
|
+
)
|
|
605
|
+
if result.get("email_confidence") == "high":
|
|
606
|
+
break
|
|
607
|
+
|
|
608
|
+
logger.info("Exiting guess_email with %s", result)
|
|
609
|
+
return result
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
# ===========================================================================
|
|
613
|
+
# 5. PROCESS EMAIL PROPERTIES (unchanged except provider names usable)
|
|
614
|
+
# ===========================================================================
|
|
615
|
+
|
|
578
616
|
|
|
579
617
|
@assistant_tool
|
|
580
618
|
async def process_email_properties(
|
|
581
619
|
input_properties: Dict[str, Any],
|
|
582
|
-
tool_config: Optional[List[Dict]] = None
|
|
620
|
+
tool_config: Optional[List[Dict]] = None,
|
|
583
621
|
) -> Dict[str, Any]:
|
|
584
|
-
|
|
622
|
+
"""Central orchestrator used elsewhere in Dhisana."""
|
|
623
|
+
logger.info("Entering process_email_properties")
|
|
585
624
|
|
|
586
625
|
first_name = input_properties.get("first_name", "")
|
|
587
626
|
last_name = input_properties.get("last_name", "")
|
|
588
|
-
email = input_properties.get("email", "")
|
|
589
|
-
email = validate_and_clean_email(email)
|
|
627
|
+
email = validate_and_clean_email(input_properties.get("email", ""))
|
|
590
628
|
additional_properties = input_properties.get("additional_properties", {})
|
|
591
629
|
user_linkedin_url = input_properties.get("user_linkedin_url", "")
|
|
592
630
|
domain = input_properties.get("primary_domain_of_organization", "")
|
|
593
631
|
|
|
594
632
|
if email:
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
is_valid = val_result.get("is_valid", False)
|
|
598
|
-
confidence = val_result.get("confidence", "").lower() # e.g. 'high', 'medium', 'low'
|
|
599
|
-
|
|
600
|
-
if is_valid and confidence == "high":
|
|
601
|
-
# Already good
|
|
633
|
+
val = await check_email_validity(email, tool_config)
|
|
634
|
+
if val["is_valid"] and val["confidence"] == "high":
|
|
602
635
|
input_properties["email_validation_status"] = "valid"
|
|
603
636
|
else:
|
|
604
|
-
# Invalid or medium/low -> mark invalid
|
|
605
637
|
input_properties["email_validation_status"] = "invalid"
|
|
606
|
-
|
|
607
638
|
else:
|
|
608
|
-
# No existing email -> must guess if domain is present
|
|
609
639
|
if not domain:
|
|
610
|
-
logger.info("No primary domain found; cannot guess.")
|
|
611
|
-
additional_properties["guessed_email"] = ""
|
|
612
|
-
input_properties["email"] = ""
|
|
613
640
|
input_properties["email_validation_status"] = "invalid"
|
|
641
|
+
input_properties["email"] = ""
|
|
614
642
|
else:
|
|
615
|
-
#
|
|
643
|
+
# Try HubSpot lookup first (disabled by default)
|
|
616
644
|
hubspot_lead_info = None
|
|
617
|
-
#TODO: test more and enable
|
|
618
645
|
# hubspot_lead_info = await lookup_contact_by_name_and_domain(
|
|
619
|
-
# first_name,
|
|
620
|
-
# last_name,
|
|
621
|
-
# domain,
|
|
622
|
-
# tool_config=tool_config
|
|
646
|
+
# first_name, last_name, domain, tool_config=tool_config
|
|
623
647
|
# )
|
|
624
648
|
if (
|
|
625
649
|
hubspot_lead_info
|
|
626
650
|
and isinstance(hubspot_lead_info, HubSpotLeadInformation)
|
|
627
651
|
and hubspot_lead_info.email
|
|
628
652
|
):
|
|
629
|
-
# We found a HubSpot email; validate it
|
|
630
653
|
hubspot_email = hubspot_lead_info.email
|
|
631
|
-
|
|
632
|
-
is_valid
|
|
633
|
-
confidence = val_result.get("confidence", "").lower()
|
|
634
|
-
input_properties["email"] = hubspot_email
|
|
635
|
-
if is_valid and confidence == "high":
|
|
636
|
-
# Accept HubSpot email
|
|
654
|
+
val = await check_email_validity(hubspot_email, tool_config)
|
|
655
|
+
if val["is_valid"] and val["confidence"] == "high":
|
|
637
656
|
input_properties["email"] = hubspot_email
|
|
638
657
|
input_properties["email_validation_status"] = "valid"
|
|
639
658
|
else:
|
|
640
|
-
|
|
641
|
-
guessed_result = await guess_email(
|
|
659
|
+
g = await guess_email(
|
|
642
660
|
first_name,
|
|
643
661
|
last_name,
|
|
644
662
|
domain,
|
|
645
663
|
"",
|
|
646
664
|
user_linkedin_url,
|
|
647
|
-
tool_config
|
|
665
|
+
tool_config,
|
|
648
666
|
)
|
|
649
|
-
if is_guess_usable(
|
|
650
|
-
|
|
651
|
-
|
|
667
|
+
if is_guess_usable(g):
|
|
668
|
+
input_properties["email"] = g["email"]
|
|
669
|
+
if g["email_confidence"] == "high":
|
|
652
670
|
input_properties["email_validation_status"] = "valid"
|
|
653
671
|
else:
|
|
654
|
-
additional_properties["guessed_email"] = guessed_result.get("email", "")
|
|
655
|
-
input_properties["email"] = guessed_result.get("email", "")
|
|
656
672
|
input_properties["email_validation_status"] = "invalid"
|
|
657
|
-
|
|
658
|
-
additional_properties["guessed_email"] = guessed_result.get("email", "")
|
|
659
|
-
input_properties["email"] = guessed_result.get("email", "")
|
|
660
|
-
input_properties["email_validation_status"] = "invalid"
|
|
673
|
+
additional_properties["guessed_email"] = g["email"]
|
|
661
674
|
else:
|
|
662
|
-
|
|
663
|
-
guessed_result = await guess_email(
|
|
675
|
+
g = await guess_email(
|
|
664
676
|
first_name,
|
|
665
677
|
last_name,
|
|
666
678
|
domain,
|
|
667
679
|
"",
|
|
668
680
|
user_linkedin_url,
|
|
669
|
-
tool_config
|
|
681
|
+
tool_config,
|
|
670
682
|
)
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
input_properties["email_validation_status"] = "valid"
|
|
675
|
-
else:
|
|
676
|
-
additional_properties["guessed_email"] = guessed_result.get("email", "")
|
|
677
|
-
input_properties["email"] = guessed_result["email"]
|
|
678
|
-
input_properties["email_validation_status"] = "invalid"
|
|
683
|
+
input_properties["email"] = g["email"]
|
|
684
|
+
if is_guess_usable(g) and g["email_confidence"] == "high":
|
|
685
|
+
input_properties["email_validation_status"] = "valid"
|
|
679
686
|
else:
|
|
680
|
-
additional_properties["guessed_email"] = guessed_result.get("email", "")
|
|
681
|
-
input_properties["email"] = guessed_result.get("email", "")
|
|
682
687
|
input_properties["email_validation_status"] = "invalid"
|
|
683
|
-
|
|
688
|
+
additional_properties["guessed_email"] = g["email"]
|
|
684
689
|
|
|
685
690
|
input_properties["additional_properties"] = additional_properties
|
|
686
|
-
logger.info("Exiting process_email_properties
|
|
691
|
+
logger.info("Exiting process_email_properties")
|
|
687
692
|
return input_properties
|
|
688
693
|
|
|
689
|
-
|
|
690
|
-
#
|
|
691
|
-
#
|
|
694
|
+
|
|
695
|
+
# ===========================================================================
|
|
696
|
+
# 6. HELPER FUNCTIONS
|
|
697
|
+
# ===========================================================================
|
|
698
|
+
|
|
692
699
|
|
|
693
700
|
async def safe_read_json_or_text(response: aiohttp.ClientResponse) -> Any:
|
|
694
|
-
"""
|
|
695
|
-
Safely attempts to parse an aiohttp response as JSON, else returns text.
|
|
696
|
-
"""
|
|
701
|
+
"""Attempt JSON parsing; fallback to text."""
|
|
697
702
|
try:
|
|
698
703
|
return await response.json()
|
|
699
|
-
except Exception:
|
|
704
|
+
except Exception: # noqa: BLE001
|
|
700
705
|
return await response.text()
|
|
701
706
|
|
|
702
707
|
|
|
703
708
|
def extract_domain(email: str) -> str:
|
|
704
|
-
"""
|
|
705
|
-
if "@"
|
|
706
|
-
return ""
|
|
707
|
-
return email.split("@")[-1].strip()
|
|
709
|
+
"""user@domain.com → domain.com"""
|
|
710
|
+
return email.split("@")[-1].strip() if "@" in email else ""
|
|
708
711
|
|
|
709
712
|
|
|
710
713
|
def is_guess_usable(guess_result: Dict[str, Any]) -> bool:
|
|
711
|
-
"""
|
|
712
|
-
Decide if a guessed email is "usable".
|
|
713
|
-
Here we treat "high" or "medium" as usable.
|
|
714
|
-
Adjust as needed.
|
|
715
|
-
"""
|
|
714
|
+
"""Treat high/medium as usable."""
|
|
716
715
|
if not guess_result:
|
|
717
716
|
return False
|
|
718
|
-
|
|
719
|
-
return email_confidence in ["high", "medium"]
|
|
717
|
+
return guess_result.get("email_confidence", "").lower() in ("high", "medium")
|