dhisana 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. dhisana/schemas/common.py +10 -1
  2. dhisana/schemas/sales.py +203 -22
  3. dhisana/utils/add_mapping.py +0 -2
  4. dhisana/utils/apollo_tools.py +739 -119
  5. dhisana/utils/built_with_api_tools.py +4 -2
  6. dhisana/utils/check_email_validity_tools.py +35 -18
  7. dhisana/utils/check_for_intent_signal.py +1 -2
  8. dhisana/utils/check_linkedin_url_validity.py +34 -8
  9. dhisana/utils/clay_tools.py +3 -2
  10. dhisana/utils/clean_properties.py +1 -4
  11. dhisana/utils/compose_salesnav_query.py +0 -1
  12. dhisana/utils/compose_search_query.py +7 -3
  13. dhisana/utils/composite_tools.py +0 -1
  14. dhisana/utils/dataframe_tools.py +2 -2
  15. dhisana/utils/email_body_utils.py +72 -0
  16. dhisana/utils/email_provider.py +174 -35
  17. dhisana/utils/enrich_lead_information.py +183 -53
  18. dhisana/utils/fetch_openai_config.py +129 -0
  19. dhisana/utils/field_validators.py +1 -1
  20. dhisana/utils/g2_tools.py +0 -1
  21. dhisana/utils/generate_content.py +0 -1
  22. dhisana/utils/generate_email.py +68 -23
  23. dhisana/utils/generate_email_response.py +294 -46
  24. dhisana/utils/generate_flow.py +0 -1
  25. dhisana/utils/generate_linkedin_connect_message.py +9 -2
  26. dhisana/utils/generate_linkedin_response_message.py +137 -66
  27. dhisana/utils/generate_structured_output_internal.py +317 -164
  28. dhisana/utils/google_custom_search.py +150 -44
  29. dhisana/utils/google_oauth_tools.py +721 -0
  30. dhisana/utils/google_workspace_tools.py +278 -54
  31. dhisana/utils/hubspot_clearbit.py +3 -1
  32. dhisana/utils/hubspot_crm_tools.py +718 -272
  33. dhisana/utils/instantly_tools.py +3 -1
  34. dhisana/utils/lusha_tools.py +10 -7
  35. dhisana/utils/mailgun_tools.py +150 -0
  36. dhisana/utils/microsoft365_tools.py +447 -0
  37. dhisana/utils/openai_assistant_and_file_utils.py +121 -177
  38. dhisana/utils/openai_helpers.py +8 -6
  39. dhisana/utils/parse_linkedin_messages_txt.py +1 -3
  40. dhisana/utils/profile.py +37 -0
  41. dhisana/utils/proxy_curl_tools.py +377 -76
  42. dhisana/utils/proxycurl_search_leads.py +426 -0
  43. dhisana/utils/research_lead.py +3 -3
  44. dhisana/utils/sales_navigator_crawler.py +1 -6
  45. dhisana/utils/salesforce_crm_tools.py +323 -50
  46. dhisana/utils/search_router.py +131 -0
  47. dhisana/utils/search_router_jobs.py +51 -0
  48. dhisana/utils/sendgrid_tools.py +126 -91
  49. dhisana/utils/serarch_router_local_business.py +75 -0
  50. dhisana/utils/serpapi_additional_tools.py +290 -0
  51. dhisana/utils/serpapi_google_jobs.py +117 -0
  52. dhisana/utils/serpapi_google_search.py +188 -0
  53. dhisana/utils/serpapi_local_business_search.py +129 -0
  54. dhisana/utils/serpapi_search_tools.py +360 -432
  55. dhisana/utils/serperdev_google_jobs.py +125 -0
  56. dhisana/utils/serperdev_local_business.py +154 -0
  57. dhisana/utils/serperdev_search.py +233 -0
  58. dhisana/utils/smtp_email_tools.py +178 -18
  59. dhisana/utils/test_connect.py +1603 -130
  60. dhisana/utils/trasform_json.py +3 -3
  61. dhisana/utils/web_download_parse_tools.py +0 -1
  62. dhisana/utils/zoominfo_tools.py +2 -3
  63. dhisana/workflow/test.py +1 -1
  64. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +1 -1
  65. dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
  66. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
  67. dhisana-0.0.1.dev116.dist-info/RECORD +0 -83
  68. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
  69. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,426 @@
1
+ import json
2
+ import logging
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ import aiohttp
6
+ from pydantic import BaseModel
7
+
8
+ from dhisana.utils.generate_structured_output_internal import get_structured_output_internal
9
+ from dhisana.utils.proxy_curl_tools import (
10
+ get_proxycurl_access_token,
11
+ fill_in_missing_properties,
12
+ transform_company_data,
13
+ )
14
+ from dhisana.utils.cache_output_tools import cache_output
15
+ from urllib.parse import urlparse, urlunparse
16
+ from dhisana.utils.clean_properties import cleanup_properties
17
+ from dhisana.utils.assistant_tool_tag import assistant_tool
18
+
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ # ────────────────────────────
24
+ # 🛠 Small generic helpers
25
+ # ────────────────────────────
26
+ def _remove_empty_values(d: Dict[str, Any]) -> Dict[str, Any]:
27
+ """Return a copy of *d* without keys whose value is empty, None, or zero for integers."""
28
+ cleaned = {}
29
+ for k, v in d.items():
30
+ # Skip None values
31
+ if v is None:
32
+ continue
33
+ # Skip empty strings or whitespace-only strings
34
+ elif isinstance(v, str) and v.strip() == "":
35
+ continue
36
+ # Skip empty lists/arrays
37
+ elif isinstance(v, list) and len(v) == 0:
38
+ continue
39
+ # Skip zero values for integer fields (assuming they're not meaningful for search)
40
+ elif isinstance(v, int) and v == 0:
41
+ continue
42
+ # Keep all other values
43
+ else:
44
+ cleaned[k] = v
45
+ return cleaned
46
+
47
+
48
+ def _build_common_params(
49
+ search_params: BaseModel,
50
+ max_entries: int,
51
+ enrich_profiles: bool,
52
+ ) -> Dict[str, Any]:
53
+ """Convert a Pydantic model into Proxycurl query params, removing empty/None values."""
54
+ params = search_params.model_dump(exclude_none=True)
55
+ params = _remove_empty_values(params)
56
+
57
+ params["page_size"] = max_entries if max_entries > 0 else 5
58
+ params["enrich_profiles"] = "enrich" if enrich_profiles else "skip"
59
+ params["use_cache"] = "if-present"
60
+ return params
61
+
62
+
63
+ # ────────────────────────────
64
+ # 📄 Search parameter schemas
65
+ # ────────────────────────────
66
+ class PeopleSearchParams(BaseModel):
67
+ current_role_title: Optional[str] = None
68
+ current_company_industry: Optional[str] = None
69
+ current_company_employee_count_min: Optional[int] = None
70
+ current_company_employee_count_max: Optional[int] = None
71
+ country: Optional[str] = None
72
+ region: Optional[str] = None
73
+ city: Optional[str] = None
74
+ summary: Optional[str] = None
75
+ current_job_description: Optional[str] = None
76
+ past_job_description: Optional[str] = None
77
+
78
+
79
+ class CompanySearchParams(BaseModel):
80
+ country: Optional[str] = None
81
+ region: Optional[str] = None
82
+ city: Optional[str] = None
83
+ type: Optional[str] = None
84
+ follower_count_min: Optional[int] = None
85
+ follower_count_max: Optional[int] = None
86
+ name: Optional[str] = None
87
+ industry: Optional[str] = None
88
+ employee_count_max: Optional[int] = None
89
+ employee_count_min: Optional[int] = None
90
+ description: Optional[str] = None
91
+ founded_after_year: Optional[int] = None
92
+ founded_before_year: Optional[int] = None
93
+ funding_amount_max: Optional[int] = None
94
+ funding_amount_min: Optional[int] = None
95
+ funding_raised_after: Optional[str] = None
96
+ funding_raised_before: Optional[str] = None
97
+ public_identifier_in_list: Optional[str] = None
98
+ public_identifier_not_in_list: Optional[str] = None
99
+
100
+
101
+ class JobSearchParams(BaseModel):
102
+ job_type: Optional[str] = None
103
+ experience_level: Optional[str] = None
104
+ when: Optional[str] = None
105
+ flexibility: Optional[str] = None
106
+ geo_id: Optional[int] = None
107
+ keyword: Optional[str] = None
108
+ search_id: Optional[str] = None
109
+
110
+
111
+ # ────────────────────────────
112
+ # 👤 People search
113
+ # ────────────────────────────
114
+ @assistant_tool
115
+ async def proxycurl_people_search_leads(
116
+ search_params: PeopleSearchParams,
117
+ max_entries: int = 5,
118
+ enrich_profiles: bool = False,
119
+ tool_config: Optional[List[Dict[str, Any]]] = None,
120
+ ) -> List[Dict[str, Any]]:
121
+ """Search for leads on Proxycurl based on a plain‑English ICP description."""
122
+
123
+ params = _build_common_params(search_params, max_entries, enrich_profiles)
124
+
125
+ try:
126
+ api_key = get_proxycurl_access_token(tool_config)
127
+ except ValueError as e:
128
+ logger.error(str(e))
129
+ return []
130
+
131
+ headers = {"Authorization": f"Bearer {api_key}"}
132
+ url = "https://enrichlayer.com/api/v2/search/person"
133
+
134
+ try:
135
+ async with aiohttp.ClientSession() as session:
136
+ async with session.get(url, headers=headers, params=params) as resp:
137
+ if resp.status != 200:
138
+ logger.error("Proxycurl search error %s", resp.status)
139
+ return []
140
+ data = await resp.json()
141
+ except Exception as exc:
142
+ logger.exception("Exception during Proxycurl search: %s", exc)
143
+ return []
144
+
145
+ leads: List[Dict[str, Any]] = []
146
+ for item in (data.get("results") or [])[:max_entries]:
147
+ lead: Dict[str, Any] = {
148
+ "user_linkedin_url": item.get("linkedin_profile_url"),
149
+ }
150
+ profile = item.get("profile") or {}
151
+ if profile:
152
+ # Fill lead fields using profile data
153
+ lead = fill_in_missing_properties(lead, profile)
154
+ first_exp = (profile.get("experiences") or [{}])[0]
155
+ lead.setdefault("organization_name", first_exp.get("company", ""))
156
+ lead.setdefault(
157
+ "organization_linkedin_url",
158
+ first_exp.get("company_linkedin_profile_url", ""),
159
+ )
160
+
161
+ additional_props = lead.get("additional_properties") or {}
162
+ additional_props["pc_person_data"] = json.dumps(
163
+ cleanup_properties(profile)
164
+ )
165
+ lead["additional_properties"] = additional_props
166
+
167
+ linkedin_url = lead.get("user_linkedin_url")
168
+ if linkedin_url:
169
+ cache_output(
170
+ "enrich_person_info_from_proxycurl", linkedin_url, profile
171
+ )
172
+
173
+ if cleaned := cleanup_properties(lead):
174
+ leads.append(cleaned)
175
+
176
+ return leads
177
+
178
+
179
+ # ────────────────────────────
180
+ # 🏢 Company search
181
+ # ────────────────────────────
182
+ @assistant_tool
183
+ async def proxycurl_company_search_leads(
184
+ search_params: CompanySearchParams,
185
+ max_entries: int = 5,
186
+ enrich_profiles: bool = False,
187
+ tool_config: Optional[List[Dict[str, Any]]] = None,
188
+ ) -> List[Dict[str, Any]]:
189
+ """Search for companies on Proxycurl based on given parameters."""
190
+
191
+ params = _build_common_params(search_params, max_entries, enrich_profiles)
192
+
193
+ try:
194
+ api_key = get_proxycurl_access_token(tool_config)
195
+ except ValueError as e:
196
+ logger.error(str(e))
197
+ return []
198
+
199
+ headers = {"Authorization": f"Bearer {api_key}"}
200
+ url = "https://enrichlayer.com/api/v2/search/company"
201
+
202
+ try:
203
+ async with aiohttp.ClientSession() as session:
204
+ async with session.get(url, headers=headers, params=params) as resp:
205
+ if resp.status != 200:
206
+ logger.error("Proxycurl company search error %s", resp.status)
207
+ return []
208
+ data = await resp.json()
209
+ except Exception as exc:
210
+ logger.exception("Exception during Proxycurl company search: %s", exc)
211
+ return []
212
+
213
+ companies: List[Dict[str, Any]] = []
214
+ for item in (data.get("results") or [])[:max_entries]:
215
+ company: Dict[str, Any] = {
216
+ "organization_linkedin_url": item.get("linkedin_profile_url"),
217
+ }
218
+ profile = item.get("profile") or {}
219
+ if profile:
220
+ # Copy mapped properties from the enriched profile
221
+ transformed = transform_company_data(profile)
222
+ company.update(transformed)
223
+
224
+ # Store the raw profile JSON for reference
225
+ additional_props = company.get("additional_properties") or {}
226
+ additional_props["pc_company_data"] = json.dumps(
227
+ cleanup_properties(profile)
228
+ )
229
+ company["additional_properties"] = additional_props
230
+
231
+ linkedin_url = company.get("organization_linkedin_url") or ""
232
+ if linkedin_url and "linkedin.com/company" in linkedin_url:
233
+ parsed_url = urlparse(linkedin_url)
234
+ if parsed_url.netloc != "www.linkedin.com":
235
+ standardized_netloc = "www.linkedin.com"
236
+ standardized_path = parsed_url.path
237
+ if not standardized_path.startswith("/company/"):
238
+ standardized_path = "/company" + standardized_path
239
+ standardized_url = urlunparse(
240
+ parsed_url._replace(
241
+ netloc=standardized_netloc,
242
+ path=standardized_path,
243
+ )
244
+ )
245
+ else:
246
+ standardized_url = linkedin_url
247
+ if standardized_url and not standardized_url.endswith("/"):
248
+ standardized_url += "/"
249
+ cache_output(
250
+ "enrich_organization_info_from_proxycurl",
251
+ standardized_url,
252
+ transformed,
253
+ )
254
+
255
+ if cleaned := cleanup_properties(company):
256
+ companies.append(cleaned)
257
+
258
+ return companies
259
+
260
+
261
+ # ────────────────────────────
262
+ # 💼 Job search
263
+ # ────────────────────────────
264
+ @assistant_tool
265
+ async def proxycurl_job_search(
266
+ search_params: JobSearchParams,
267
+ max_entries: int = 5,
268
+ enrich_profiles: bool = False,
269
+ tool_config: Optional[List[Dict[str, Any]]] = None,
270
+ ) -> List[Dict[str, Any]]:
271
+ """List jobs posted by a company using Proxycurl's job search API."""
272
+
273
+ # Job search endpoint does not support enrich_profiles
274
+ params = _build_common_params(search_params, max_entries, enrich_profiles=enrich_profiles)
275
+
276
+ try:
277
+ api_key = get_proxycurl_access_token(tool_config)
278
+ except ValueError as e:
279
+ logger.error(str(e))
280
+ return []
281
+
282
+ headers = {"Authorization": f"Bearer {api_key}"}
283
+ url = "https://enrichlayer.com/api/v2/company/job"
284
+
285
+ try:
286
+ async with aiohttp.ClientSession() as session:
287
+ async with session.get(url, headers=headers, params=params) as resp:
288
+ if resp.status != 200:
289
+ logger.error("Proxycurl job search error %s", resp.status)
290
+ return []
291
+ data = await resp.json()
292
+ except Exception as exc:
293
+ logger.exception("Exception during Proxycurl job search: %s", exc)
294
+ return []
295
+
296
+ job_entries: List[Dict[str, Any]] = []
297
+ for item in (data.get("job") or data.get("jobs") or [])[:max_entries]:
298
+ job: Dict[str, Any] = {
299
+ "organization_name": item.get("company"),
300
+ "organization_linkedin_url": item.get("company_url"),
301
+ "job_title": item.get("job_title"),
302
+ "job_posting_url": item.get("job_url"),
303
+ "list_date": item.get("list_date"),
304
+ "location": item.get("location"),
305
+ }
306
+ additional_props = job.get("additional_properties") or {}
307
+ additional_props["pc_job_data"] = json.dumps(item)
308
+ job["additional_properties"] = additional_props
309
+
310
+ job_url = job.get("job_posting_url")
311
+ if job_url:
312
+ cache_output("enrich_job_info_from_proxycurl", job_url, item)
313
+ if cleaned := cleanup_properties(job):
314
+ job_entries.append(cleaned)
315
+
316
+ return job_entries
317
+
318
+
319
+ # ────────────────────────────
320
+ # 📊 Job count
321
+ # ────────────────────────────
322
+ @assistant_tool
323
+ async def proxycurl_job_count(
324
+ search_params: JobSearchParams,
325
+ tool_config: Optional[List[Dict[str, Any]]] = None,
326
+ ) -> Dict[str, Any]:
327
+ """Get the count of jobs posted by a company using Proxycurl's job count API."""
328
+
329
+ # Job count endpoint does not support enrich_profiles or max_entries
330
+ params = search_params.model_dump(exclude_none=True)
331
+ params = _remove_empty_values(params)
332
+
333
+ # Job count endpoint doesn't need page_size or enrich_profiles
334
+ if "page_size" in params:
335
+ del params["page_size"]
336
+
337
+ try:
338
+ api_key = get_proxycurl_access_token(tool_config)
339
+ except ValueError as e:
340
+ logger.error(str(e))
341
+ return {"count": 0}
342
+
343
+ headers = {"Authorization": f"Bearer {api_key}"}
344
+ url = "https://enrichlayer.com/api/v2/company/job/count"
345
+
346
+ try:
347
+ async with aiohttp.ClientSession() as session:
348
+ async with session.get(url, headers=headers, params=params) as resp:
349
+ if resp.status != 200:
350
+ logger.error("Proxycurl job count error %s", resp.status)
351
+ return {"count": 0}
352
+ data = await resp.json()
353
+ except Exception as exc:
354
+ logger.exception("Exception during Proxycurl job count: %s", exc)
355
+ return {"count": 0}
356
+
357
+ return {"count": data.get("count", 0)}
358
+
359
+
360
+ # ────────────────────────────
361
+ # 🔍 Company Profile - Get Search ID
362
+ # ────────────────────────────
363
+ @assistant_tool
364
+ async def proxycurl_get_company_search_id(
365
+ company_url: str,
366
+ tool_config: Optional[List[Dict[str, Any]]] = None,
367
+ ) -> Dict[str, Any]:
368
+ """Get a company's search ID using Proxycurl's Company Profile endpoint.
369
+
370
+ The search_id is required for other Proxycurl endpoints like job search and job count.
371
+
372
+ Args:
373
+ company_url: LinkedIn company profile URL (e.g., "https://www.linkedin.com/company/microsoft/")
374
+ tool_config: Optional tool configuration containing API key
375
+
376
+ Returns:
377
+ Dictionary containing search_id and basic company info, or error info if failed
378
+ """
379
+
380
+ try:
381
+ api_key = get_proxycurl_access_token(tool_config)
382
+ except ValueError as e:
383
+ logger.error(str(e))
384
+ return {"error": str(e), "search_id": None}
385
+
386
+ headers = {"Authorization": f"Bearer {api_key}"}
387
+ url = "https://enrichlayer.com/api/v2/company"
388
+
389
+ params = {
390
+ "url": company_url,
391
+ "use_cache": "if-present",
392
+ "fallback_to_cache": "on-error"
393
+ }
394
+
395
+ try:
396
+ async with aiohttp.ClientSession() as session:
397
+ async with session.get(url, headers=headers, params=params) as resp:
398
+ if resp.status != 200:
399
+ logger.error("Proxycurl company profile error %s", resp.status)
400
+ return {"error": f"HTTP {resp.status}", "search_id": None}
401
+ data = await resp.json()
402
+ except Exception as exc:
403
+ logger.exception("Exception during Proxycurl company profile lookup: %s", exc)
404
+ return {"error": str(exc), "search_id": None}
405
+
406
+ # Extract the key information
407
+ search_id = data.get("search_id")
408
+ name = data.get("name")
409
+ linkedin_internal_id = data.get("linkedin_internal_id")
410
+ industry = data.get("industry")
411
+
412
+ result = {
413
+ "search_id": search_id,
414
+ "name": name,
415
+ "linkedin_internal_id": linkedin_internal_id,
416
+ "industry": industry,
417
+ "company_url": company_url
418
+ }
419
+
420
+ if search_id:
421
+ logger.info(f"Successfully retrieved search_id '{search_id}' for company '{name}'")
422
+ else:
423
+ logger.warning(f"No search_id found for company at {company_url}")
424
+ result["error"] = "No search_id found in response"
425
+
426
+ return result
@@ -91,7 +91,7 @@ async def research_lead_with_full_info_ai(
91
91
  response, status = await get_structured_output_internal(
92
92
  instructions,
93
93
  LeadResearchInformation,
94
- model="gpt-4.1-mini",
94
+ model="gpt-5.1-chat",
95
95
  tool_config=tool_config
96
96
  )
97
97
  if status == "SUCCESS":
@@ -165,8 +165,8 @@ async def research_company_with_full_info_ai(
165
165
  response, status = await get_structured_output_internal(
166
166
  instructions,
167
167
  CompanyResearchInformation,
168
- model="gpt-4.1-mini",
169
- use_web_search=True,
168
+ model="gpt-5.1-chat",
169
+ use_web_search=False,
170
170
  tool_config=tool_config
171
171
  )
172
172
  if status == "SUCCESS":
@@ -3,21 +3,16 @@
3
3
  # Executes the tasks and sends the results back to the service.
4
4
 
5
5
  import asyncio
6
- from datetime import datetime, timedelta
6
+ from datetime import datetime
7
7
  import json
8
8
  import os
9
9
  import logging
10
10
  import re
11
11
  from typing import List, Dict, Any
12
12
  import html2text
13
- from pydantic import BaseModel, Field
14
13
  from playwright.async_api import async_playwright, Page
15
- import pandas as pd
16
14
  import requests # or aiohttp if you prefer async calls
17
15
 
18
- from dhisana.utils.assistant_tool_tag import assistant_tool
19
- from dhisana.utils.dataframe_tools import get_structured_output
20
- from dhisana.utils.web_download_parse_tools import parse_html_content_as_text
21
16
  import asyncio
22
17
  import logging
23
18
  import pyperclip