dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. dhisana/schemas/common.py +33 -0
  2. dhisana/schemas/sales.py +224 -23
  3. dhisana/utils/add_mapping.py +72 -63
  4. dhisana/utils/apollo_tools.py +739 -109
  5. dhisana/utils/built_with_api_tools.py +4 -2
  6. dhisana/utils/cache_output_tools.py +23 -23
  7. dhisana/utils/check_email_validity_tools.py +456 -458
  8. dhisana/utils/check_for_intent_signal.py +1 -2
  9. dhisana/utils/check_linkedin_url_validity.py +34 -8
  10. dhisana/utils/clay_tools.py +3 -2
  11. dhisana/utils/clean_properties.py +3 -1
  12. dhisana/utils/compose_salesnav_query.py +0 -1
  13. dhisana/utils/compose_search_query.py +7 -3
  14. dhisana/utils/composite_tools.py +0 -1
  15. dhisana/utils/dataframe_tools.py +2 -2
  16. dhisana/utils/email_body_utils.py +72 -0
  17. dhisana/utils/email_provider.py +375 -0
  18. dhisana/utils/enrich_lead_information.py +585 -85
  19. dhisana/utils/fetch_openai_config.py +129 -0
  20. dhisana/utils/field_validators.py +1 -1
  21. dhisana/utils/g2_tools.py +0 -1
  22. dhisana/utils/generate_content.py +0 -1
  23. dhisana/utils/generate_email.py +69 -16
  24. dhisana/utils/generate_email_response.py +298 -41
  25. dhisana/utils/generate_flow.py +0 -1
  26. dhisana/utils/generate_linkedin_connect_message.py +19 -6
  27. dhisana/utils/generate_linkedin_response_message.py +156 -65
  28. dhisana/utils/generate_structured_output_internal.py +351 -131
  29. dhisana/utils/google_custom_search.py +150 -44
  30. dhisana/utils/google_oauth_tools.py +721 -0
  31. dhisana/utils/google_workspace_tools.py +391 -25
  32. dhisana/utils/hubspot_clearbit.py +3 -1
  33. dhisana/utils/hubspot_crm_tools.py +771 -167
  34. dhisana/utils/instantly_tools.py +3 -1
  35. dhisana/utils/lusha_tools.py +10 -7
  36. dhisana/utils/mailgun_tools.py +150 -0
  37. dhisana/utils/microsoft365_tools.py +447 -0
  38. dhisana/utils/openai_assistant_and_file_utils.py +121 -177
  39. dhisana/utils/openai_helpers.py +19 -16
  40. dhisana/utils/parse_linkedin_messages_txt.py +2 -3
  41. dhisana/utils/profile.py +37 -0
  42. dhisana/utils/proxy_curl_tools.py +507 -206
  43. dhisana/utils/proxycurl_search_leads.py +426 -0
  44. dhisana/utils/research_lead.py +121 -68
  45. dhisana/utils/sales_navigator_crawler.py +1 -6
  46. dhisana/utils/salesforce_crm_tools.py +323 -50
  47. dhisana/utils/search_router.py +131 -0
  48. dhisana/utils/search_router_jobs.py +51 -0
  49. dhisana/utils/sendgrid_tools.py +126 -91
  50. dhisana/utils/serarch_router_local_business.py +75 -0
  51. dhisana/utils/serpapi_additional_tools.py +290 -0
  52. dhisana/utils/serpapi_google_jobs.py +117 -0
  53. dhisana/utils/serpapi_google_search.py +188 -0
  54. dhisana/utils/serpapi_local_business_search.py +129 -0
  55. dhisana/utils/serpapi_search_tools.py +363 -432
  56. dhisana/utils/serperdev_google_jobs.py +125 -0
  57. dhisana/utils/serperdev_local_business.py +154 -0
  58. dhisana/utils/serperdev_search.py +233 -0
  59. dhisana/utils/smtp_email_tools.py +576 -0
  60. dhisana/utils/test_connect.py +1765 -92
  61. dhisana/utils/trasform_json.py +95 -16
  62. dhisana/utils/web_download_parse_tools.py +0 -1
  63. dhisana/utils/zoominfo_tools.py +2 -3
  64. dhisana/workflow/test.py +1 -1
  65. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
  66. dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
  67. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
  68. dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
  69. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
  70. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,426 @@
1
+ import json
2
+ import logging
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ import aiohttp
6
+ from pydantic import BaseModel
7
+
8
+ from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
9
+ from dhisana.utils.proxy_curl_tools import (
10
+ get_proxycurl_access_token,
11
+ fill_in_missing_properties,
12
+ transform_company_data,
13
+ )
14
+ from dhisana.utils.cache_output_tools import cache_output
15
+ from urllib.parse import urlparse, urlunparse
16
+ from dhisana.utils.clean_properties import cleanup_properties
17
+ from dhisana.utils.assistant_tool_tag import assistant_tool
18
+
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ # ────────────────────────────
24
+ # 🛠 Small generic helpers
25
+ # ────────────────────────────
26
+ def _remove_empty_values(d: Dict[str, Any]) -> Dict[str, Any]:
27
+ """Return a copy of *d* without keys whose value is empty, None, or zero for integers."""
28
+ cleaned = {}
29
+ for k, v in d.items():
30
+ # Skip None values
31
+ if v is None:
32
+ continue
33
+ # Skip empty strings or whitespace-only strings
34
+ elif isinstance(v, str) and v.strip() == "":
35
+ continue
36
+ # Skip empty lists/arrays
37
+ elif isinstance(v, list) and len(v) == 0:
38
+ continue
39
+ # Skip zero values for integer fields (assuming they're not meaningful for search)
40
+ elif isinstance(v, int) and v == 0:
41
+ continue
42
+ # Keep all other values
43
+ else:
44
+ cleaned[k] = v
45
+ return cleaned
46
+
47
+
48
+ def _build_common_params(
49
+ search_params: BaseModel,
50
+ max_entries: int,
51
+ enrich_profiles: bool,
52
+ ) -> Dict[str, Any]:
53
+ """Convert a Pydantic model into Proxycurl query params, removing empty/None values."""
54
+ params = search_params.model_dump(exclude_none=True)
55
+ params = _remove_empty_values(params)
56
+
57
+ params["page_size"] = max_entries if max_entries > 0 else 5
58
+ params["enrich_profiles"] = "enrich" if enrich_profiles else "skip"
59
+ params["use_cache"] = "if-present"
60
+ return params
61
+
62
+
63
+ # ────────────────────────────
64
+ # 📄 Search parameter schemas
65
+ # ────────────────────────────
66
+ class PeopleSearchParams(BaseModel):
67
+ current_role_title: Optional[str] = None
68
+ current_company_industry: Optional[str] = None
69
+ current_company_employee_count_min: Optional[int] = None
70
+ current_company_employee_count_max: Optional[int] = None
71
+ country: Optional[str] = None
72
+ region: Optional[str] = None
73
+ city: Optional[str] = None
74
+ summary: Optional[str] = None
75
+ current_job_description: Optional[str] = None
76
+ past_job_description: Optional[str] = None
77
+
78
+
79
+ class CompanySearchParams(BaseModel):
80
+ country: Optional[str] = None
81
+ region: Optional[str] = None
82
+ city: Optional[str] = None
83
+ type: Optional[str] = None
84
+ follower_count_min: Optional[int] = None
85
+ follower_count_max: Optional[int] = None
86
+ name: Optional[str] = None
87
+ industry: Optional[str] = None
88
+ employee_count_max: Optional[int] = None
89
+ employee_count_min: Optional[int] = None
90
+ description: Optional[str] = None
91
+ founded_after_year: Optional[int] = None
92
+ founded_before_year: Optional[int] = None
93
+ funding_amount_max: Optional[int] = None
94
+ funding_amount_min: Optional[int] = None
95
+ funding_raised_after: Optional[str] = None
96
+ funding_raised_before: Optional[str] = None
97
+ public_identifier_in_list: Optional[str] = None
98
+ public_identifier_not_in_list: Optional[str] = None
99
+
100
+
101
+ class JobSearchParams(BaseModel):
102
+ job_type: Optional[str] = None
103
+ experience_level: Optional[str] = None
104
+ when: Optional[str] = None
105
+ flexibility: Optional[str] = None
106
+ geo_id: Optional[int] = None
107
+ keyword: Optional[str] = None
108
+ search_id: Optional[str] = None
109
+
110
+
111
+ # ────────────────────────────
112
+ # 👤 People search
113
+ # ────────────────────────────
114
+ @assistant_tool
115
+ async def proxycurl_people_search_leads(
116
+ search_params: PeopleSearchParams,
117
+ max_entries: int = 5,
118
+ enrich_profiles: bool = False,
119
+ tool_config: Optional[List[Dict[str, Any]]] = None,
120
+ ) -> List[Dict[str, Any]]:
121
+ """Search for leads on Proxycurl based on a plain‑English ICP description."""
122
+
123
+ params = _build_common_params(search_params, max_entries, enrich_profiles)
124
+
125
+ try:
126
+ api_key = get_proxycurl_access_token(tool_config)
127
+ except ValueError as e:
128
+ logger.error(str(e))
129
+ return []
130
+
131
+ headers = {"Authorization": f"Bearer {api_key}"}
132
+ url = "https://enrichlayer.com/api/v2/search/person"
133
+
134
+ try:
135
+ async with aiohttp.ClientSession() as session:
136
+ async with session.get(url, headers=headers, params=params) as resp:
137
+ if resp.status != 200:
138
+ logger.error("Proxycurl search error %s", resp.status)
139
+ return []
140
+ data = await resp.json()
141
+ except Exception as exc:
142
+ logger.exception("Exception during Proxycurl search: %s", exc)
143
+ return []
144
+
145
+ leads: List[Dict[str, Any]] = []
146
+ for item in (data.get("results") or [])[:max_entries]:
147
+ lead: Dict[str, Any] = {
148
+ "user_linkedin_url": item.get("linkedin_profile_url"),
149
+ }
150
+ profile = item.get("profile") or {}
151
+ if profile:
152
+ # Fill lead fields using profile data
153
+ lead = fill_in_missing_properties(lead, profile)
154
+ first_exp = (profile.get("experiences") or [{}])[0]
155
+ lead.setdefault("organization_name", first_exp.get("company", ""))
156
+ lead.setdefault(
157
+ "organization_linkedin_url",
158
+ first_exp.get("company_linkedin_profile_url", ""),
159
+ )
160
+
161
+ additional_props = lead.get("additional_properties") or {}
162
+ additional_props["pc_person_data"] = json.dumps(
163
+ cleanup_properties(profile)
164
+ )
165
+ lead["additional_properties"] = additional_props
166
+
167
+ linkedin_url = lead.get("user_linkedin_url")
168
+ if linkedin_url:
169
+ cache_output(
170
+ "enrich_person_info_from_proxycurl", linkedin_url, profile
171
+ )
172
+
173
+ if cleaned := cleanup_properties(lead):
174
+ leads.append(cleaned)
175
+
176
+ return leads
177
+
178
+
179
+ # ────────────────────────────
180
+ # 🏢 Company search
181
+ # ────────────────────────────
182
+ @assistant_tool
183
+ async def proxycurl_company_search_leads(
184
+ search_params: CompanySearchParams,
185
+ max_entries: int = 5,
186
+ enrich_profiles: bool = False,
187
+ tool_config: Optional[List[Dict[str, Any]]] = None,
188
+ ) -> List[Dict[str, Any]]:
189
+ """Search for companies on Proxycurl based on given parameters."""
190
+
191
+ params = _build_common_params(search_params, max_entries, enrich_profiles)
192
+
193
+ try:
194
+ api_key = get_proxycurl_access_token(tool_config)
195
+ except ValueError as e:
196
+ logger.error(str(e))
197
+ return []
198
+
199
+ headers = {"Authorization": f"Bearer {api_key}"}
200
+ url = "https://enrichlayer.com/api/v2/search/company"
201
+
202
+ try:
203
+ async with aiohttp.ClientSession() as session:
204
+ async with session.get(url, headers=headers, params=params) as resp:
205
+ if resp.status != 200:
206
+ logger.error("Proxycurl company search error %s", resp.status)
207
+ return []
208
+ data = await resp.json()
209
+ except Exception as exc:
210
+ logger.exception("Exception during Proxycurl company search: %s", exc)
211
+ return []
212
+
213
+ companies: List[Dict[str, Any]] = []
214
+ for item in (data.get("results") or [])[:max_entries]:
215
+ company: Dict[str, Any] = {
216
+ "organization_linkedin_url": item.get("linkedin_profile_url"),
217
+ }
218
+ profile = item.get("profile") or {}
219
+ if profile:
220
+ # Copy mapped properties from the enriched profile
221
+ transformed = transform_company_data(profile)
222
+ company.update(transformed)
223
+
224
+ # Store the raw profile JSON for reference
225
+ additional_props = company.get("additional_properties") or {}
226
+ additional_props["pc_company_data"] = json.dumps(
227
+ cleanup_properties(profile)
228
+ )
229
+ company["additional_properties"] = additional_props
230
+
231
+ linkedin_url = company.get("organization_linkedin_url") or ""
232
+ if linkedin_url and "linkedin.com/company" in linkedin_url:
233
+ parsed_url = urlparse(linkedin_url)
234
+ if parsed_url.netloc != "www.linkedin.com":
235
+ standardized_netloc = "www.linkedin.com"
236
+ standardized_path = parsed_url.path
237
+ if not standardized_path.startswith("/company/"):
238
+ standardized_path = "/company" + standardized_path
239
+ standardized_url = urlunparse(
240
+ parsed_url._replace(
241
+ netloc=standardized_netloc,
242
+ path=standardized_path,
243
+ )
244
+ )
245
+ else:
246
+ standardized_url = linkedin_url
247
+ if standardized_url and not standardized_url.endswith("/"):
248
+ standardized_url += "/"
249
+ cache_output(
250
+ "enrich_organization_info_from_proxycurl",
251
+ standardized_url,
252
+ transformed,
253
+ )
254
+
255
+ if cleaned := cleanup_properties(company):
256
+ companies.append(cleaned)
257
+
258
+ return companies
259
+
260
+
261
+ # ────────────────────────────
262
+ # 💼 Job search
263
+ # ────────────────────────────
264
+ @assistant_tool
265
+ async def proxycurl_job_search(
266
+ search_params: JobSearchParams,
267
+ max_entries: int = 5,
268
+ enrich_profiles: bool = False,
269
+ tool_config: Optional[List[Dict[str, Any]]] = None,
270
+ ) -> List[Dict[str, Any]]:
271
+ """List jobs posted by a company using Proxycurl's job search API."""
272
+
273
+ # Job search endpoint does not support enrich_profiles
274
+ params = _build_common_params(search_params, max_entries, enrich_profiles=enrich_profiles)
275
+
276
+ try:
277
+ api_key = get_proxycurl_access_token(tool_config)
278
+ except ValueError as e:
279
+ logger.error(str(e))
280
+ return []
281
+
282
+ headers = {"Authorization": f"Bearer {api_key}"}
283
+ url = "https://enrichlayer.com/api/v2/company/job"
284
+
285
+ try:
286
+ async with aiohttp.ClientSession() as session:
287
+ async with session.get(url, headers=headers, params=params) as resp:
288
+ if resp.status != 200:
289
+ logger.error("Proxycurl job search error %s", resp.status)
290
+ return []
291
+ data = await resp.json()
292
+ except Exception as exc:
293
+ logger.exception("Exception during Proxycurl job search: %s", exc)
294
+ return []
295
+
296
+ job_entries: List[Dict[str, Any]] = []
297
+ for item in (data.get("job") or data.get("jobs") or [])[:max_entries]:
298
+ job: Dict[str, Any] = {
299
+ "organization_name": item.get("company"),
300
+ "organization_linkedin_url": item.get("company_url"),
301
+ "job_title": item.get("job_title"),
302
+ "job_posting_url": item.get("job_url"),
303
+ "list_date": item.get("list_date"),
304
+ "location": item.get("location"),
305
+ }
306
+ additional_props = job.get("additional_properties") or {}
307
+ additional_props["pc_job_data"] = json.dumps(item)
308
+ job["additional_properties"] = additional_props
309
+
310
+ job_url = job.get("job_posting_url")
311
+ if job_url:
312
+ cache_output("enrich_job_info_from_proxycurl", job_url, item)
313
+ if cleaned := cleanup_properties(job):
314
+ job_entries.append(cleaned)
315
+
316
+ return job_entries
317
+
318
+
319
+ # ────────────────────────────
320
+ # 📊 Job count
321
+ # ────────────────────────────
322
+ @assistant_tool
323
+ async def proxycurl_job_count(
324
+ search_params: JobSearchParams,
325
+ tool_config: Optional[List[Dict[str, Any]]] = None,
326
+ ) -> Dict[str, Any]:
327
+ """Get the count of jobs posted by a company using Proxycurl's job count API."""
328
+
329
+ # Job count endpoint does not support enrich_profiles or max_entries
330
+ params = search_params.model_dump(exclude_none=True)
331
+ params = _remove_empty_values(params)
332
+
333
+ # Job count endpoint doesn't need page_size or enrich_profiles
334
+ if "page_size" in params:
335
+ del params["page_size"]
336
+
337
+ try:
338
+ api_key = get_proxycurl_access_token(tool_config)
339
+ except ValueError as e:
340
+ logger.error(str(e))
341
+ return {"count": 0}
342
+
343
+ headers = {"Authorization": f"Bearer {api_key}"}
344
+ url = "https://enrichlayer.com/api/v2/company/job/count"
345
+
346
+ try:
347
+ async with aiohttp.ClientSession() as session:
348
+ async with session.get(url, headers=headers, params=params) as resp:
349
+ if resp.status != 200:
350
+ logger.error("Proxycurl job count error %s", resp.status)
351
+ return {"count": 0}
352
+ data = await resp.json()
353
+ except Exception as exc:
354
+ logger.exception("Exception during Proxycurl job count: %s", exc)
355
+ return {"count": 0}
356
+
357
+ return {"count": data.get("count", 0)}
358
+
359
+
360
+ # ────────────────────────────
361
+ # 🔍 Company Profile - Get Search ID
362
+ # ────────────────────────────
363
+ @assistant_tool
364
+ async def proxycurl_get_company_search_id(
365
+ company_url: str,
366
+ tool_config: Optional[List[Dict[str, Any]]] = None,
367
+ ) -> Dict[str, Any]:
368
+ """Get a company's search ID using Proxycurl's Company Profile endpoint.
369
+
370
+ The search_id is required for other Proxycurl endpoints like job search and job count.
371
+
372
+ Args:
373
+ company_url: LinkedIn company profile URL (e.g., "https://www.linkedin.com/company/microsoft/")
374
+ tool_config: Optional tool configuration containing API key
375
+
376
+ Returns:
377
+ Dictionary containing search_id and basic company info, or error info if failed
378
+ """
379
+
380
+ try:
381
+ api_key = get_proxycurl_access_token(tool_config)
382
+ except ValueError as e:
383
+ logger.error(str(e))
384
+ return {"error": str(e), "search_id": None}
385
+
386
+ headers = {"Authorization": f"Bearer {api_key}"}
387
+ url = "https://enrichlayer.com/api/v2/company"
388
+
389
+ params = {
390
+ "url": company_url,
391
+ "use_cache": "if-present",
392
+ "fallback_to_cache": "on-error"
393
+ }
394
+
395
+ try:
396
+ async with aiohttp.ClientSession() as session:
397
+ async with session.get(url, headers=headers, params=params) as resp:
398
+ if resp.status != 200:
399
+ logger.error("Proxycurl company profile error %s", resp.status)
400
+ return {"error": f"HTTP {resp.status}", "search_id": None}
401
+ data = await resp.json()
402
+ except Exception as exc:
403
+ logger.exception("Exception during Proxycurl company profile lookup: %s", exc)
404
+ return {"error": str(exc), "search_id": None}
405
+
406
+ # Extract the key information
407
+ search_id = data.get("search_id")
408
+ name = data.get("name")
409
+ linkedin_internal_id = data.get("linkedin_internal_id")
410
+ industry = data.get("industry")
411
+
412
+ result = {
413
+ "search_id": search_id,
414
+ "name": name,
415
+ "linkedin_internal_id": linkedin_internal_id,
416
+ "industry": industry,
417
+ "company_url": company_url
418
+ }
419
+
420
+ if search_id:
421
+ logger.info(f"Successfully retrieved search_id '{search_id}' for company '{name}'")
422
+ else:
423
+ logger.warning(f"No search_id found for company at {company_url}")
424
+ result["error"] = "No search_id found in response"
425
+
426
+ return result
@@ -1,86 +1,50 @@
1
-
2
- # Write up a research summary about the lead using AI.
3
- # Use the provided user information, ICP to summarize the research
4
-
5
1
  from typing import Dict, List, Optional
6
2
  from pydantic import BaseModel
7
3
  from dhisana.utils.assistant_tool_tag import assistant_tool
8
4
  from dhisana.utils.clean_properties import cleanup_email_context
9
- from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
5
+ from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
10
6
 
11
- # Define a model for lead research information
12
- class LeadResearchInformation(BaseModel):
13
- research_summary: str
14
- icp_match_score: int
7
+ def clean_nul_bytes(s: str) -> str:
8
+ s = s.replace('```markdown', '')
9
+ return s.replace('\x00', '')
15
10
 
16
- @assistant_tool
17
- async def research_lead_with_icp_ai(user_properties: dict, icp: str, instructions:str, tool_config: Optional[List[Dict]] = None):
11
+ def _remove_excluded_fields(data: Dict) -> Dict:
18
12
  """
19
- Research on lead provided given input. Check how much it matches ICP.
20
-
21
- This function sends an asynchronous request to gather research information about the lead and evaluate how well it matches the Ideal Customer Profile (ICP).
22
-
23
- Parameters:
24
- user_properties (dict): Information about the lead.
25
- icp (str): The Ideal Customer Profile description.
26
- instructions (str): Additional instructions for generating the research summary.
27
- tool_config (Optional[dict]): Configuration for the tool (default is None).
28
-
29
- Returns:
30
- dict: The JSON response containing the research summary and ICP match score.
31
-
32
- Raises:
33
- ValueError: If required parameters are missing.
34
- Exception: If there is an error in processing the request.
13
+ Return a copy of `data` that excludes keys named 'id'
14
+ or that end in '_by', '_id', '_to', or '_at'.
35
15
  """
36
-
37
- instructions = f"""
38
- Give a deatiled research summary of the lead given the user information input.
39
- Make sure all the information about lead including experience, skills, education, etc. are included in the summary.
40
- Have individual sections as in linkedin in like experience, education, skills, etc.
41
- Have section with summary on what the current company that lead is working at does.
42
- Highlight how the lead matches the Ideal Customer Profile (ICP) provided.
43
- Research the lead based on the following information:
44
- {user_properties}
45
-
46
- Describe how the lead information matches the Ideal Customer Profile (ICP) provided:
47
- {icp}
48
-
49
- Custom insturctions for research
50
- {instructions}
51
-
52
- The output should be in JSON format with the following structure:
53
- {{
54
- "research_summary": "Short Summary of the research about lead. Include key insights and findings on how it matches the ICP.This value is neatly formmated Github Markdown.",
55
- "icp_match_score": "Score of how well the lead matches the ICP (0-5). 0 no match, 5 perfect match."
56
- }}
57
- """
58
- response, status = await get_structured_output_internal(instructions, LeadResearchInformation, tool_config=tool_config)
59
- return response.model_dump()
60
-
16
+ excluded_keys = {"id"}
17
+ excluded_endings = ["_by", "_id", "_to", "_at", "_status", "research_summary"]
18
+
19
+ cleaned = {}
20
+ for k, v in data.items():
21
+ if k in excluded_keys:
22
+ continue
23
+ if any(k.endswith(suffix) for suffix in excluded_endings):
24
+ continue
25
+ cleaned[k] = v
26
+ return cleaned
61
27
 
62
28
  class LeadResearchInformation(BaseModel):
63
29
  research_summary: str
64
30
 
65
-
66
31
  @assistant_tool
67
- async def research_lead_with_full_info_ai(user_properties: dict, instructions:str, tool_config: Optional[List[Dict]] = None):
68
-
69
- user_properties = cleanup_email_context(user_properties)
32
+ async def research_lead_with_full_info_ai(
33
+ user_properties: dict,
34
+ instructions: str,
35
+ tool_config: Optional[List[Dict]] = None
36
+ ):
70
37
  """
71
38
  Research on lead provided given input. Provide Detailed Summary.
72
- Parameters:
73
- user_properties (dict): Information about the lead.
74
- instructions (str): Additional instructions for generating the detailed summary.
75
- tool_config (Optional[dict]): Configuration for the tool (default is None).
39
+ """
40
+ # Clean user properties (e.g. remove newlines, sanitize strings, etc.)
41
+ user_properties = cleanup_email_context(user_properties)
76
42
 
77
- Returns:
78
- dict: The JSON response containing the detailed reserach summary of the lead.
43
+ # Remove excluded fields from user_properties
44
+ user_properties = _remove_excluded_fields(user_properties)
79
45
 
80
- Raises:
81
- ValueError: If required parameters are missing.
82
- Exception: If there is an error in processing the request.
83
- """
46
+ # Optionally remove any known keys that should not appear (e.g. 'date_extracted')
47
+ user_properties.pop("date_extracted", None)
84
48
 
85
49
  instructions = f"""
86
50
  Please read the following user information and instructions, then produce a detailed summary of the lead in the specified format.
@@ -105,6 +69,12 @@ async def research_lead_with_full_info_ai(user_properties: dict, instructions:st
105
69
  8. Connections
106
70
  9. Current Company Information
107
71
  10. Contact Information
72
+ 11. Addtional Info:
73
+ a. Include any githbub information like handle, repositories owned etc if present.
74
+ b. Include any twitter information like handle, followers etc if present.
75
+ c. Includ any youtube channel information like handle, subscribers etc if present.
76
+ d. Include any other social media information like handle, followers etc if present.
77
+
108
78
 
109
79
  - In the **About** section, create a clear, concise description of the lead that can be used for sales prospecting.
110
80
  - In the **Current Company Information** section, summarize what the lead’s current company does.
@@ -117,7 +87,90 @@ async def research_lead_with_full_info_ai(user_properties: dict, instructions:st
117
87
  {{
118
88
  "research_summary": "Detailed summary about lead. The summary should be neatly formatted in GitHub-Flavored Markdown, and include all the key information from the listed sections."
119
89
  }}
90
+ """
91
+ response, status = await get_structured_output_internal(
92
+ instructions,
93
+ LeadResearchInformation,
94
+ model="gpt-5.1-chat",
95
+ tool_config=tool_config
96
+ )
97
+ if status == "SUCCESS":
98
+ response.research_summary = clean_nul_bytes(response.research_summary)
99
+ return response.model_dump()
100
+ else:
101
+ return {"research_summary": ""}
102
+
103
+ # --------------------------------------------
104
+ # COMPANY-RELATED MODELS & FUNCTION (FIXED)
105
+ # --------------------------------------------
106
+ class CompanyResearchInformation(BaseModel):
107
+ research_summary: str
108
+
109
+ @assistant_tool
110
+ async def research_company_with_full_info_ai(
111
+ company_properties: dict,
112
+ instructions: str,
113
+ tool_config: Optional[List[Dict]] = None
114
+ ):
115
+ """
116
+ Research on company provided given input. Provide a Detailed Summary.
117
+
118
+ Parameters:
119
+ company_properties (dict): Information about the company.
120
+ instructions (str): Additional instructions for generating the detailed summary.
121
+ tool_config (Optional[List[Dict]]): Configuration for the tool (default is None).
122
+
123
+ Returns:
124
+ dict: The JSON response containing the detailed research summary of the company.
125
+ """
126
+ # Clean company properties (e.g. remove newlines, sanitize strings, etc.)
127
+ company_properties = cleanup_email_context(company_properties)
128
+
129
+ # Remove excluded fields from company_properties
130
+ company_properties = _remove_excluded_fields(company_properties)
131
+
132
+ instructions = f"""
133
+ Please read the following company information and instructions, then produce a detailed summary of the company in the specified format.
134
+ ---
135
+ Company Data include name, domain and website:
136
+ {company_properties}
120
137
 
138
+ Instructions:
139
+ {instructions}
140
+ ---
141
+
142
+ **Task**:
143
+ Give a short summary of the company based on the provided data. Include **firmographic details** if they are present.
144
+ The summary should have the following sections (only include them if there is relevant data):
145
+
146
+ 1. About Company
147
+ 2. Industry
148
+ 3. Location / HQ
149
+ 4. Employee Headcount
150
+ 5. Revenue
151
+ 6. Funding Information
152
+ 7. Additional Firmographics (e.g. markets, expansions, or any other relevant data)
153
+
154
+ - In the **About Company** section, create a clear, concise description of what the company does (suitable for sales prospecting).
155
+ - Do not include any IDs, userIds, or GUIDs in the output.
156
+ - Have the above section headers even if section content is empty.
157
+ Use web search to find additional information about the company using company name and domain. Search what it does, news, and funding.
158
+
159
+ **Output**:
160
+ Return your final output as valid JSON with the following structure:
161
+ {{
162
+ "research_summary": "Detailed summary about the company. The summary should be neatly formatted in GitHub-Flavored Markdown, and include all the key information from the listed sections."
163
+ }}
121
164
  """
122
- response, status = await get_structured_output_internal(instructions, LeadResearchInformation, tool_config=tool_config)
123
- return response.model_dump()
165
+ response, status = await get_structured_output_internal(
166
+ instructions,
167
+ CompanyResearchInformation,
168
+ model="gpt-5.1-chat",
169
+ use_web_search=False,
170
+ tool_config=tool_config
171
+ )
172
+ if status == "SUCCESS":
173
+ response.research_summary = clean_nul_bytes(response.research_summary)
174
+ return response.model_dump()
175
+ else:
176
+ return {"research_summary": ""}
@@ -3,21 +3,16 @@
3
3
  # Executes the tasks and sends the results back to the service.
4
4
 
5
5
  import asyncio
6
- from datetime import datetime, timedelta
6
+ from datetime import datetime
7
7
  import json
8
8
  import os
9
9
  import logging
10
10
  import re
11
11
  from typing import List, Dict, Any
12
12
  import html2text
13
- from pydantic import BaseModel, Field
14
13
  from playwright.async_api import async_playwright, Page
15
- import pandas as pd
16
14
  import requests # or aiohttp if you prefer async calls
17
15
 
18
- from dhisana.utils.assistant_tool_tag import assistant_tool
19
- from dhisana.utils.dataframe_tools import get_structured_output
20
- from dhisana.utils.web_download_parse_tools import parse_html_content_as_text
21
16
  import asyncio
22
17
  import logging
23
18
  import pyperclip