dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/schemas/common.py +33 -0
- dhisana/schemas/sales.py +224 -23
- dhisana/utils/add_mapping.py +72 -63
- dhisana/utils/apollo_tools.py +739 -109
- dhisana/utils/built_with_api_tools.py +4 -2
- dhisana/utils/cache_output_tools.py +23 -23
- dhisana/utils/check_email_validity_tools.py +456 -458
- dhisana/utils/check_for_intent_signal.py +1 -2
- dhisana/utils/check_linkedin_url_validity.py +34 -8
- dhisana/utils/clay_tools.py +3 -2
- dhisana/utils/clean_properties.py +3 -1
- dhisana/utils/compose_salesnav_query.py +0 -1
- dhisana/utils/compose_search_query.py +7 -3
- dhisana/utils/composite_tools.py +0 -1
- dhisana/utils/dataframe_tools.py +2 -2
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +585 -85
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +1 -1
- dhisana/utils/g2_tools.py +0 -1
- dhisana/utils/generate_content.py +0 -1
- dhisana/utils/generate_email.py +69 -16
- dhisana/utils/generate_email_response.py +298 -41
- dhisana/utils/generate_flow.py +0 -1
- dhisana/utils/generate_linkedin_connect_message.py +19 -6
- dhisana/utils/generate_linkedin_response_message.py +156 -65
- dhisana/utils/generate_structured_output_internal.py +351 -131
- dhisana/utils/google_custom_search.py +150 -44
- dhisana/utils/google_oauth_tools.py +721 -0
- dhisana/utils/google_workspace_tools.py +391 -25
- dhisana/utils/hubspot_clearbit.py +3 -1
- dhisana/utils/hubspot_crm_tools.py +771 -167
- dhisana/utils/instantly_tools.py +3 -1
- dhisana/utils/lusha_tools.py +10 -7
- dhisana/utils/mailgun_tools.py +150 -0
- dhisana/utils/microsoft365_tools.py +447 -0
- dhisana/utils/openai_assistant_and_file_utils.py +121 -177
- dhisana/utils/openai_helpers.py +19 -16
- dhisana/utils/parse_linkedin_messages_txt.py +2 -3
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +507 -206
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/research_lead.py +121 -68
- dhisana/utils/sales_navigator_crawler.py +1 -6
- dhisana/utils/salesforce_crm_tools.py +323 -50
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +126 -91
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +363 -432
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +576 -0
- dhisana/utils/test_connect.py +1765 -92
- dhisana/utils/trasform_json.py +95 -16
- dhisana/utils/web_download_parse_tools.py +0 -1
- dhisana/utils/zoominfo_tools.py +2 -3
- dhisana/workflow/test.py +1 -1
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
- dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
- dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
|
|
9
|
+
from dhisana.utils.proxy_curl_tools import (
|
|
10
|
+
get_proxycurl_access_token,
|
|
11
|
+
fill_in_missing_properties,
|
|
12
|
+
transform_company_data,
|
|
13
|
+
)
|
|
14
|
+
from dhisana.utils.cache_output_tools import cache_output
|
|
15
|
+
from urllib.parse import urlparse, urlunparse
|
|
16
|
+
from dhisana.utils.clean_properties import cleanup_properties
|
|
17
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
18
|
+
|
|
19
|
+
logging.basicConfig(level=logging.INFO)
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ────────────────────────────
|
|
24
|
+
# 🛠 Small generic helpers
|
|
25
|
+
# ────────────────────────────
|
|
26
|
+
def _remove_empty_values(d: Dict[str, Any]) -> Dict[str, Any]:
|
|
27
|
+
"""Return a copy of *d* without keys whose value is empty, None, or zero for integers."""
|
|
28
|
+
cleaned = {}
|
|
29
|
+
for k, v in d.items():
|
|
30
|
+
# Skip None values
|
|
31
|
+
if v is None:
|
|
32
|
+
continue
|
|
33
|
+
# Skip empty strings or whitespace-only strings
|
|
34
|
+
elif isinstance(v, str) and v.strip() == "":
|
|
35
|
+
continue
|
|
36
|
+
# Skip empty lists/arrays
|
|
37
|
+
elif isinstance(v, list) and len(v) == 0:
|
|
38
|
+
continue
|
|
39
|
+
# Skip zero values for integer fields (assuming they're not meaningful for search)
|
|
40
|
+
elif isinstance(v, int) and v == 0:
|
|
41
|
+
continue
|
|
42
|
+
# Keep all other values
|
|
43
|
+
else:
|
|
44
|
+
cleaned[k] = v
|
|
45
|
+
return cleaned
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _build_common_params(
|
|
49
|
+
search_params: BaseModel,
|
|
50
|
+
max_entries: int,
|
|
51
|
+
enrich_profiles: bool,
|
|
52
|
+
) -> Dict[str, Any]:
|
|
53
|
+
"""Convert a Pydantic model into Proxycurl query params, removing empty/None values."""
|
|
54
|
+
params = search_params.model_dump(exclude_none=True)
|
|
55
|
+
params = _remove_empty_values(params)
|
|
56
|
+
|
|
57
|
+
params["page_size"] = max_entries if max_entries > 0 else 5
|
|
58
|
+
params["enrich_profiles"] = "enrich" if enrich_profiles else "skip"
|
|
59
|
+
params["use_cache"] = "if-present"
|
|
60
|
+
return params
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ────────────────────────────
|
|
64
|
+
# 📄 Search parameter schemas
|
|
65
|
+
# ────────────────────────────
|
|
66
|
+
class PeopleSearchParams(BaseModel):
|
|
67
|
+
current_role_title: Optional[str] = None
|
|
68
|
+
current_company_industry: Optional[str] = None
|
|
69
|
+
current_company_employee_count_min: Optional[int] = None
|
|
70
|
+
current_company_employee_count_max: Optional[int] = None
|
|
71
|
+
country: Optional[str] = None
|
|
72
|
+
region: Optional[str] = None
|
|
73
|
+
city: Optional[str] = None
|
|
74
|
+
summary: Optional[str] = None
|
|
75
|
+
current_job_description: Optional[str] = None
|
|
76
|
+
past_job_description: Optional[str] = None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class CompanySearchParams(BaseModel):
|
|
80
|
+
country: Optional[str] = None
|
|
81
|
+
region: Optional[str] = None
|
|
82
|
+
city: Optional[str] = None
|
|
83
|
+
type: Optional[str] = None
|
|
84
|
+
follower_count_min: Optional[int] = None
|
|
85
|
+
follower_count_max: Optional[int] = None
|
|
86
|
+
name: Optional[str] = None
|
|
87
|
+
industry: Optional[str] = None
|
|
88
|
+
employee_count_max: Optional[int] = None
|
|
89
|
+
employee_count_min: Optional[int] = None
|
|
90
|
+
description: Optional[str] = None
|
|
91
|
+
founded_after_year: Optional[int] = None
|
|
92
|
+
founded_before_year: Optional[int] = None
|
|
93
|
+
funding_amount_max: Optional[int] = None
|
|
94
|
+
funding_amount_min: Optional[int] = None
|
|
95
|
+
funding_raised_after: Optional[str] = None
|
|
96
|
+
funding_raised_before: Optional[str] = None
|
|
97
|
+
public_identifier_in_list: Optional[str] = None
|
|
98
|
+
public_identifier_not_in_list: Optional[str] = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class JobSearchParams(BaseModel):
|
|
102
|
+
job_type: Optional[str] = None
|
|
103
|
+
experience_level: Optional[str] = None
|
|
104
|
+
when: Optional[str] = None
|
|
105
|
+
flexibility: Optional[str] = None
|
|
106
|
+
geo_id: Optional[int] = None
|
|
107
|
+
keyword: Optional[str] = None
|
|
108
|
+
search_id: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ────────────────────────────
|
|
112
|
+
# 👤 People search
|
|
113
|
+
# ────────────────────────────
|
|
114
|
+
@assistant_tool
|
|
115
|
+
async def proxycurl_people_search_leads(
|
|
116
|
+
search_params: PeopleSearchParams,
|
|
117
|
+
max_entries: int = 5,
|
|
118
|
+
enrich_profiles: bool = False,
|
|
119
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
120
|
+
) -> List[Dict[str, Any]]:
|
|
121
|
+
"""Search for leads on Proxycurl based on a plain‑English ICP description."""
|
|
122
|
+
|
|
123
|
+
params = _build_common_params(search_params, max_entries, enrich_profiles)
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
api_key = get_proxycurl_access_token(tool_config)
|
|
127
|
+
except ValueError as e:
|
|
128
|
+
logger.error(str(e))
|
|
129
|
+
return []
|
|
130
|
+
|
|
131
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
132
|
+
url = "https://enrichlayer.com/api/v2/search/person"
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
async with aiohttp.ClientSession() as session:
|
|
136
|
+
async with session.get(url, headers=headers, params=params) as resp:
|
|
137
|
+
if resp.status != 200:
|
|
138
|
+
logger.error("Proxycurl search error %s", resp.status)
|
|
139
|
+
return []
|
|
140
|
+
data = await resp.json()
|
|
141
|
+
except Exception as exc:
|
|
142
|
+
logger.exception("Exception during Proxycurl search: %s", exc)
|
|
143
|
+
return []
|
|
144
|
+
|
|
145
|
+
leads: List[Dict[str, Any]] = []
|
|
146
|
+
for item in (data.get("results") or [])[:max_entries]:
|
|
147
|
+
lead: Dict[str, Any] = {
|
|
148
|
+
"user_linkedin_url": item.get("linkedin_profile_url"),
|
|
149
|
+
}
|
|
150
|
+
profile = item.get("profile") or {}
|
|
151
|
+
if profile:
|
|
152
|
+
# Fill lead fields using profile data
|
|
153
|
+
lead = fill_in_missing_properties(lead, profile)
|
|
154
|
+
first_exp = (profile.get("experiences") or [{}])[0]
|
|
155
|
+
lead.setdefault("organization_name", first_exp.get("company", ""))
|
|
156
|
+
lead.setdefault(
|
|
157
|
+
"organization_linkedin_url",
|
|
158
|
+
first_exp.get("company_linkedin_profile_url", ""),
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
additional_props = lead.get("additional_properties") or {}
|
|
162
|
+
additional_props["pc_person_data"] = json.dumps(
|
|
163
|
+
cleanup_properties(profile)
|
|
164
|
+
)
|
|
165
|
+
lead["additional_properties"] = additional_props
|
|
166
|
+
|
|
167
|
+
linkedin_url = lead.get("user_linkedin_url")
|
|
168
|
+
if linkedin_url:
|
|
169
|
+
cache_output(
|
|
170
|
+
"enrich_person_info_from_proxycurl", linkedin_url, profile
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if cleaned := cleanup_properties(lead):
|
|
174
|
+
leads.append(cleaned)
|
|
175
|
+
|
|
176
|
+
return leads
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ────────────────────────────
|
|
180
|
+
# 🏢 Company search
|
|
181
|
+
# ────────────────────────────
|
|
182
|
+
@assistant_tool
|
|
183
|
+
async def proxycurl_company_search_leads(
|
|
184
|
+
search_params: CompanySearchParams,
|
|
185
|
+
max_entries: int = 5,
|
|
186
|
+
enrich_profiles: bool = False,
|
|
187
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
188
|
+
) -> List[Dict[str, Any]]:
|
|
189
|
+
"""Search for companies on Proxycurl based on given parameters."""
|
|
190
|
+
|
|
191
|
+
params = _build_common_params(search_params, max_entries, enrich_profiles)
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
api_key = get_proxycurl_access_token(tool_config)
|
|
195
|
+
except ValueError as e:
|
|
196
|
+
logger.error(str(e))
|
|
197
|
+
return []
|
|
198
|
+
|
|
199
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
200
|
+
url = "https://enrichlayer.com/api/v2/search/company"
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
async with aiohttp.ClientSession() as session:
|
|
204
|
+
async with session.get(url, headers=headers, params=params) as resp:
|
|
205
|
+
if resp.status != 200:
|
|
206
|
+
logger.error("Proxycurl company search error %s", resp.status)
|
|
207
|
+
return []
|
|
208
|
+
data = await resp.json()
|
|
209
|
+
except Exception as exc:
|
|
210
|
+
logger.exception("Exception during Proxycurl company search: %s", exc)
|
|
211
|
+
return []
|
|
212
|
+
|
|
213
|
+
companies: List[Dict[str, Any]] = []
|
|
214
|
+
for item in (data.get("results") or [])[:max_entries]:
|
|
215
|
+
company: Dict[str, Any] = {
|
|
216
|
+
"organization_linkedin_url": item.get("linkedin_profile_url"),
|
|
217
|
+
}
|
|
218
|
+
profile = item.get("profile") or {}
|
|
219
|
+
if profile:
|
|
220
|
+
# Copy mapped properties from the enriched profile
|
|
221
|
+
transformed = transform_company_data(profile)
|
|
222
|
+
company.update(transformed)
|
|
223
|
+
|
|
224
|
+
# Store the raw profile JSON for reference
|
|
225
|
+
additional_props = company.get("additional_properties") or {}
|
|
226
|
+
additional_props["pc_company_data"] = json.dumps(
|
|
227
|
+
cleanup_properties(profile)
|
|
228
|
+
)
|
|
229
|
+
company["additional_properties"] = additional_props
|
|
230
|
+
|
|
231
|
+
linkedin_url = company.get("organization_linkedin_url") or ""
|
|
232
|
+
if linkedin_url and "linkedin.com/company" in linkedin_url:
|
|
233
|
+
parsed_url = urlparse(linkedin_url)
|
|
234
|
+
if parsed_url.netloc != "www.linkedin.com":
|
|
235
|
+
standardized_netloc = "www.linkedin.com"
|
|
236
|
+
standardized_path = parsed_url.path
|
|
237
|
+
if not standardized_path.startswith("/company/"):
|
|
238
|
+
standardized_path = "/company" + standardized_path
|
|
239
|
+
standardized_url = urlunparse(
|
|
240
|
+
parsed_url._replace(
|
|
241
|
+
netloc=standardized_netloc,
|
|
242
|
+
path=standardized_path,
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
else:
|
|
246
|
+
standardized_url = linkedin_url
|
|
247
|
+
if standardized_url and not standardized_url.endswith("/"):
|
|
248
|
+
standardized_url += "/"
|
|
249
|
+
cache_output(
|
|
250
|
+
"enrich_organization_info_from_proxycurl",
|
|
251
|
+
standardized_url,
|
|
252
|
+
transformed,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
if cleaned := cleanup_properties(company):
|
|
256
|
+
companies.append(cleaned)
|
|
257
|
+
|
|
258
|
+
return companies
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# ────────────────────────────
|
|
262
|
+
# 💼 Job search
|
|
263
|
+
# ────────────────────────────
|
|
264
|
+
@assistant_tool
|
|
265
|
+
async def proxycurl_job_search(
|
|
266
|
+
search_params: JobSearchParams,
|
|
267
|
+
max_entries: int = 5,
|
|
268
|
+
enrich_profiles: bool = False,
|
|
269
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
270
|
+
) -> List[Dict[str, Any]]:
|
|
271
|
+
"""List jobs posted by a company using Proxycurl's job search API."""
|
|
272
|
+
|
|
273
|
+
# Job search endpoint does not support enrich_profiles
|
|
274
|
+
params = _build_common_params(search_params, max_entries, enrich_profiles=enrich_profiles)
|
|
275
|
+
|
|
276
|
+
try:
|
|
277
|
+
api_key = get_proxycurl_access_token(tool_config)
|
|
278
|
+
except ValueError as e:
|
|
279
|
+
logger.error(str(e))
|
|
280
|
+
return []
|
|
281
|
+
|
|
282
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
283
|
+
url = "https://enrichlayer.com/api/v2/company/job"
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
async with aiohttp.ClientSession() as session:
|
|
287
|
+
async with session.get(url, headers=headers, params=params) as resp:
|
|
288
|
+
if resp.status != 200:
|
|
289
|
+
logger.error("Proxycurl job search error %s", resp.status)
|
|
290
|
+
return []
|
|
291
|
+
data = await resp.json()
|
|
292
|
+
except Exception as exc:
|
|
293
|
+
logger.exception("Exception during Proxycurl job search: %s", exc)
|
|
294
|
+
return []
|
|
295
|
+
|
|
296
|
+
job_entries: List[Dict[str, Any]] = []
|
|
297
|
+
for item in (data.get("job") or data.get("jobs") or [])[:max_entries]:
|
|
298
|
+
job: Dict[str, Any] = {
|
|
299
|
+
"organization_name": item.get("company"),
|
|
300
|
+
"organization_linkedin_url": item.get("company_url"),
|
|
301
|
+
"job_title": item.get("job_title"),
|
|
302
|
+
"job_posting_url": item.get("job_url"),
|
|
303
|
+
"list_date": item.get("list_date"),
|
|
304
|
+
"location": item.get("location"),
|
|
305
|
+
}
|
|
306
|
+
additional_props = job.get("additional_properties") or {}
|
|
307
|
+
additional_props["pc_job_data"] = json.dumps(item)
|
|
308
|
+
job["additional_properties"] = additional_props
|
|
309
|
+
|
|
310
|
+
job_url = job.get("job_posting_url")
|
|
311
|
+
if job_url:
|
|
312
|
+
cache_output("enrich_job_info_from_proxycurl", job_url, item)
|
|
313
|
+
if cleaned := cleanup_properties(job):
|
|
314
|
+
job_entries.append(cleaned)
|
|
315
|
+
|
|
316
|
+
return job_entries
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
# ────────────────────────────
|
|
320
|
+
# 📊 Job count
|
|
321
|
+
# ────────────────────────────
|
|
322
|
+
@assistant_tool
|
|
323
|
+
async def proxycurl_job_count(
|
|
324
|
+
search_params: JobSearchParams,
|
|
325
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
326
|
+
) -> Dict[str, Any]:
|
|
327
|
+
"""Get the count of jobs posted by a company using Proxycurl's job count API."""
|
|
328
|
+
|
|
329
|
+
# Job count endpoint does not support enrich_profiles or max_entries
|
|
330
|
+
params = search_params.model_dump(exclude_none=True)
|
|
331
|
+
params = _remove_empty_values(params)
|
|
332
|
+
|
|
333
|
+
# Job count endpoint doesn't need page_size or enrich_profiles
|
|
334
|
+
if "page_size" in params:
|
|
335
|
+
del params["page_size"]
|
|
336
|
+
|
|
337
|
+
try:
|
|
338
|
+
api_key = get_proxycurl_access_token(tool_config)
|
|
339
|
+
except ValueError as e:
|
|
340
|
+
logger.error(str(e))
|
|
341
|
+
return {"count": 0}
|
|
342
|
+
|
|
343
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
344
|
+
url = "https://enrichlayer.com/api/v2/company/job/count"
|
|
345
|
+
|
|
346
|
+
try:
|
|
347
|
+
async with aiohttp.ClientSession() as session:
|
|
348
|
+
async with session.get(url, headers=headers, params=params) as resp:
|
|
349
|
+
if resp.status != 200:
|
|
350
|
+
logger.error("Proxycurl job count error %s", resp.status)
|
|
351
|
+
return {"count": 0}
|
|
352
|
+
data = await resp.json()
|
|
353
|
+
except Exception as exc:
|
|
354
|
+
logger.exception("Exception during Proxycurl job count: %s", exc)
|
|
355
|
+
return {"count": 0}
|
|
356
|
+
|
|
357
|
+
return {"count": data.get("count", 0)}
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
# ────────────────────────────
|
|
361
|
+
# 🔍 Company Profile - Get Search ID
|
|
362
|
+
# ────────────────────────────
|
|
363
|
+
@assistant_tool
|
|
364
|
+
async def proxycurl_get_company_search_id(
|
|
365
|
+
company_url: str,
|
|
366
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
367
|
+
) -> Dict[str, Any]:
|
|
368
|
+
"""Get a company's search ID using Proxycurl's Company Profile endpoint.
|
|
369
|
+
|
|
370
|
+
The search_id is required for other Proxycurl endpoints like job search and job count.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
company_url: LinkedIn company profile URL (e.g., "https://www.linkedin.com/company/microsoft/")
|
|
374
|
+
tool_config: Optional tool configuration containing API key
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
Dictionary containing search_id and basic company info, or error info if failed
|
|
378
|
+
"""
|
|
379
|
+
|
|
380
|
+
try:
|
|
381
|
+
api_key = get_proxycurl_access_token(tool_config)
|
|
382
|
+
except ValueError as e:
|
|
383
|
+
logger.error(str(e))
|
|
384
|
+
return {"error": str(e), "search_id": None}
|
|
385
|
+
|
|
386
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
387
|
+
url = "https://enrichlayer.com/api/v2/company"
|
|
388
|
+
|
|
389
|
+
params = {
|
|
390
|
+
"url": company_url,
|
|
391
|
+
"use_cache": "if-present",
|
|
392
|
+
"fallback_to_cache": "on-error"
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
try:
|
|
396
|
+
async with aiohttp.ClientSession() as session:
|
|
397
|
+
async with session.get(url, headers=headers, params=params) as resp:
|
|
398
|
+
if resp.status != 200:
|
|
399
|
+
logger.error("Proxycurl company profile error %s", resp.status)
|
|
400
|
+
return {"error": f"HTTP {resp.status}", "search_id": None}
|
|
401
|
+
data = await resp.json()
|
|
402
|
+
except Exception as exc:
|
|
403
|
+
logger.exception("Exception during Proxycurl company profile lookup: %s", exc)
|
|
404
|
+
return {"error": str(exc), "search_id": None}
|
|
405
|
+
|
|
406
|
+
# Extract the key information
|
|
407
|
+
search_id = data.get("search_id")
|
|
408
|
+
name = data.get("name")
|
|
409
|
+
linkedin_internal_id = data.get("linkedin_internal_id")
|
|
410
|
+
industry = data.get("industry")
|
|
411
|
+
|
|
412
|
+
result = {
|
|
413
|
+
"search_id": search_id,
|
|
414
|
+
"name": name,
|
|
415
|
+
"linkedin_internal_id": linkedin_internal_id,
|
|
416
|
+
"industry": industry,
|
|
417
|
+
"company_url": company_url
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
if search_id:
|
|
421
|
+
logger.info(f"Successfully retrieved search_id '{search_id}' for company '{name}'")
|
|
422
|
+
else:
|
|
423
|
+
logger.warning(f"No search_id found for company at {company_url}")
|
|
424
|
+
result["error"] = "No search_id found in response"
|
|
425
|
+
|
|
426
|
+
return result
|
dhisana/utils/research_lead.py
CHANGED
|
@@ -1,86 +1,50 @@
|
|
|
1
|
-
|
|
2
|
-
# Write up a research summary about the lead using AI.
|
|
3
|
-
# Use the provided user information, ICP to summarize the research
|
|
4
|
-
|
|
5
1
|
from typing import Dict, List, Optional
|
|
6
2
|
from pydantic import BaseModel
|
|
7
3
|
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
8
4
|
from dhisana.utils.clean_properties import cleanup_email_context
|
|
9
|
-
from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
|
|
5
|
+
from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
|
|
10
6
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
icp_match_score: int
|
|
7
|
+
def clean_nul_bytes(s: str) -> str:
|
|
8
|
+
s = s.replace('```markdown', '')
|
|
9
|
+
return s.replace('\x00', '')
|
|
15
10
|
|
|
16
|
-
|
|
17
|
-
async def research_lead_with_icp_ai(user_properties: dict, icp: str, instructions:str, tool_config: Optional[List[Dict]] = None):
|
|
11
|
+
def _remove_excluded_fields(data: Dict) -> Dict:
|
|
18
12
|
"""
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
This function sends an asynchronous request to gather research information about the lead and evaluate how well it matches the Ideal Customer Profile (ICP).
|
|
22
|
-
|
|
23
|
-
Parameters:
|
|
24
|
-
user_properties (dict): Information about the lead.
|
|
25
|
-
icp (str): The Ideal Customer Profile description.
|
|
26
|
-
instructions (str): Additional instructions for generating the research summary.
|
|
27
|
-
tool_config (Optional[dict]): Configuration for the tool (default is None).
|
|
28
|
-
|
|
29
|
-
Returns:
|
|
30
|
-
dict: The JSON response containing the research summary and ICP match score.
|
|
31
|
-
|
|
32
|
-
Raises:
|
|
33
|
-
ValueError: If required parameters are missing.
|
|
34
|
-
Exception: If there is an error in processing the request.
|
|
13
|
+
Return a copy of `data` that excludes keys named 'id'
|
|
14
|
+
or that end in '_by', '_id', '_to', or '_at'.
|
|
35
15
|
"""
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
{icp}
|
|
48
|
-
|
|
49
|
-
Custom insturctions for research
|
|
50
|
-
{instructions}
|
|
51
|
-
|
|
52
|
-
The output should be in JSON format with the following structure:
|
|
53
|
-
{{
|
|
54
|
-
"research_summary": "Short Summary of the research about lead. Include key insights and findings on how it matches the ICP.This value is neatly formmated Github Markdown.",
|
|
55
|
-
"icp_match_score": "Score of how well the lead matches the ICP (0-5). 0 no match, 5 perfect match."
|
|
56
|
-
}}
|
|
57
|
-
"""
|
|
58
|
-
response, status = await get_structured_output_internal(instructions, LeadResearchInformation, tool_config=tool_config)
|
|
59
|
-
return response.model_dump()
|
|
60
|
-
|
|
16
|
+
excluded_keys = {"id"}
|
|
17
|
+
excluded_endings = ["_by", "_id", "_to", "_at", "_status", "research_summary"]
|
|
18
|
+
|
|
19
|
+
cleaned = {}
|
|
20
|
+
for k, v in data.items():
|
|
21
|
+
if k in excluded_keys:
|
|
22
|
+
continue
|
|
23
|
+
if any(k.endswith(suffix) for suffix in excluded_endings):
|
|
24
|
+
continue
|
|
25
|
+
cleaned[k] = v
|
|
26
|
+
return cleaned
|
|
61
27
|
|
|
62
28
|
class LeadResearchInformation(BaseModel):
|
|
63
29
|
research_summary: str
|
|
64
30
|
|
|
65
|
-
|
|
66
31
|
@assistant_tool
|
|
67
|
-
async def research_lead_with_full_info_ai(
|
|
68
|
-
|
|
69
|
-
|
|
32
|
+
async def research_lead_with_full_info_ai(
|
|
33
|
+
user_properties: dict,
|
|
34
|
+
instructions: str,
|
|
35
|
+
tool_config: Optional[List[Dict]] = None
|
|
36
|
+
):
|
|
70
37
|
"""
|
|
71
38
|
Research on lead provided given input. Provide Detailed Summary.
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
tool_config (Optional[dict]): Configuration for the tool (default is None).
|
|
39
|
+
"""
|
|
40
|
+
# Clean user properties (e.g. remove newlines, sanitize strings, etc.)
|
|
41
|
+
user_properties = cleanup_email_context(user_properties)
|
|
76
42
|
|
|
77
|
-
|
|
78
|
-
|
|
43
|
+
# Remove excluded fields from user_properties
|
|
44
|
+
user_properties = _remove_excluded_fields(user_properties)
|
|
79
45
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
Exception: If there is an error in processing the request.
|
|
83
|
-
"""
|
|
46
|
+
# Optionally remove any known keys that should not appear (e.g. 'date_extracted')
|
|
47
|
+
user_properties.pop("date_extracted", None)
|
|
84
48
|
|
|
85
49
|
instructions = f"""
|
|
86
50
|
Please read the following user information and instructions, then produce a detailed summary of the lead in the specified format.
|
|
@@ -105,6 +69,12 @@ async def research_lead_with_full_info_ai(user_properties: dict, instructions:st
|
|
|
105
69
|
8. Connections
|
|
106
70
|
9. Current Company Information
|
|
107
71
|
10. Contact Information
|
|
72
|
+
11. Addtional Info:
|
|
73
|
+
a. Include any githbub information like handle, repositories owned etc if present.
|
|
74
|
+
b. Include any twitter information like handle, followers etc if present.
|
|
75
|
+
c. Includ any youtube channel information like handle, subscribers etc if present.
|
|
76
|
+
d. Include any other social media information like handle, followers etc if present.
|
|
77
|
+
|
|
108
78
|
|
|
109
79
|
- In the **About** section, create a clear, concise description of the lead that can be used for sales prospecting.
|
|
110
80
|
- In the **Current Company Information** section, summarize what the lead’s current company does.
|
|
@@ -117,7 +87,90 @@ async def research_lead_with_full_info_ai(user_properties: dict, instructions:st
|
|
|
117
87
|
{{
|
|
118
88
|
"research_summary": "Detailed summary about lead. The summary should be neatly formatted in GitHub-Flavored Markdown, and include all the key information from the listed sections."
|
|
119
89
|
}}
|
|
90
|
+
"""
|
|
91
|
+
response, status = await get_structured_output_internal(
|
|
92
|
+
instructions,
|
|
93
|
+
LeadResearchInformation,
|
|
94
|
+
model="gpt-5.1-chat",
|
|
95
|
+
tool_config=tool_config
|
|
96
|
+
)
|
|
97
|
+
if status == "SUCCESS":
|
|
98
|
+
response.research_summary = clean_nul_bytes(response.research_summary)
|
|
99
|
+
return response.model_dump()
|
|
100
|
+
else:
|
|
101
|
+
return {"research_summary": ""}
|
|
102
|
+
|
|
103
|
+
# --------------------------------------------
|
|
104
|
+
# COMPANY-RELATED MODELS & FUNCTION (FIXED)
|
|
105
|
+
# --------------------------------------------
|
|
106
|
+
class CompanyResearchInformation(BaseModel):
|
|
107
|
+
research_summary: str
|
|
108
|
+
|
|
109
|
+
@assistant_tool
|
|
110
|
+
async def research_company_with_full_info_ai(
|
|
111
|
+
company_properties: dict,
|
|
112
|
+
instructions: str,
|
|
113
|
+
tool_config: Optional[List[Dict]] = None
|
|
114
|
+
):
|
|
115
|
+
"""
|
|
116
|
+
Research on company provided given input. Provide a Detailed Summary.
|
|
117
|
+
|
|
118
|
+
Parameters:
|
|
119
|
+
company_properties (dict): Information about the company.
|
|
120
|
+
instructions (str): Additional instructions for generating the detailed summary.
|
|
121
|
+
tool_config (Optional[List[Dict]]): Configuration for the tool (default is None).
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
dict: The JSON response containing the detailed research summary of the company.
|
|
125
|
+
"""
|
|
126
|
+
# Clean company properties (e.g. remove newlines, sanitize strings, etc.)
|
|
127
|
+
company_properties = cleanup_email_context(company_properties)
|
|
128
|
+
|
|
129
|
+
# Remove excluded fields from company_properties
|
|
130
|
+
company_properties = _remove_excluded_fields(company_properties)
|
|
131
|
+
|
|
132
|
+
instructions = f"""
|
|
133
|
+
Please read the following company information and instructions, then produce a detailed summary of the company in the specified format.
|
|
134
|
+
---
|
|
135
|
+
Company Data include name, domain and website:
|
|
136
|
+
{company_properties}
|
|
120
137
|
|
|
138
|
+
Instructions:
|
|
139
|
+
{instructions}
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
**Task**:
|
|
143
|
+
Give a short summary of the company based on the provided data. Include **firmographic details** if they are present.
|
|
144
|
+
The summary should have the following sections (only include them if there is relevant data):
|
|
145
|
+
|
|
146
|
+
1. About Company
|
|
147
|
+
2. Industry
|
|
148
|
+
3. Location / HQ
|
|
149
|
+
4. Employee Headcount
|
|
150
|
+
5. Revenue
|
|
151
|
+
6. Funding Information
|
|
152
|
+
7. Additional Firmographics (e.g. markets, expansions, or any other relevant data)
|
|
153
|
+
|
|
154
|
+
- In the **About Company** section, create a clear, concise description of what the company does (suitable for sales prospecting).
|
|
155
|
+
- Do not include any IDs, userIds, or GUIDs in the output.
|
|
156
|
+
- Have the above section headers even if section content is empty.
|
|
157
|
+
Use web search to find additional information about the company using company name and domain. Search what it does, news, and funding.
|
|
158
|
+
|
|
159
|
+
**Output**:
|
|
160
|
+
Return your final output as valid JSON with the following structure:
|
|
161
|
+
{{
|
|
162
|
+
"research_summary": "Detailed summary about the company. The summary should be neatly formatted in GitHub-Flavored Markdown, and include all the key information from the listed sections."
|
|
163
|
+
}}
|
|
121
164
|
"""
|
|
122
|
-
response, status = await get_structured_output_internal(
|
|
123
|
-
|
|
165
|
+
response, status = await get_structured_output_internal(
|
|
166
|
+
instructions,
|
|
167
|
+
CompanyResearchInformation,
|
|
168
|
+
model="gpt-5.1-chat",
|
|
169
|
+
use_web_search=False,
|
|
170
|
+
tool_config=tool_config
|
|
171
|
+
)
|
|
172
|
+
if status == "SUCCESS":
|
|
173
|
+
response.research_summary = clean_nul_bytes(response.research_summary)
|
|
174
|
+
return response.model_dump()
|
|
175
|
+
else:
|
|
176
|
+
return {"research_summary": ""}
|
|
@@ -3,21 +3,16 @@
|
|
|
3
3
|
# Executes the tasks and sends the results back to the service.
|
|
4
4
|
|
|
5
5
|
import asyncio
|
|
6
|
-
from datetime import datetime
|
|
6
|
+
from datetime import datetime
|
|
7
7
|
import json
|
|
8
8
|
import os
|
|
9
9
|
import logging
|
|
10
10
|
import re
|
|
11
11
|
from typing import List, Dict, Any
|
|
12
12
|
import html2text
|
|
13
|
-
from pydantic import BaseModel, Field
|
|
14
13
|
from playwright.async_api import async_playwright, Page
|
|
15
|
-
import pandas as pd
|
|
16
14
|
import requests # or aiohttp if you prefer async calls
|
|
17
15
|
|
|
18
|
-
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
19
|
-
from dhisana.utils.dataframe_tools import get_structured_output
|
|
20
|
-
from dhisana.utils.web_download_parse_tools import parse_html_content_as_text
|
|
21
16
|
import asyncio
|
|
22
17
|
import logging
|
|
23
18
|
import pyperclip
|