dhisana 0.0.1.dev243__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. dhisana/__init__.py +1 -0
  2. dhisana/cli/__init__.py +1 -0
  3. dhisana/cli/cli.py +20 -0
  4. dhisana/cli/datasets.py +27 -0
  5. dhisana/cli/models.py +26 -0
  6. dhisana/cli/predictions.py +20 -0
  7. dhisana/schemas/__init__.py +1 -0
  8. dhisana/schemas/common.py +399 -0
  9. dhisana/schemas/sales.py +965 -0
  10. dhisana/ui/__init__.py +1 -0
  11. dhisana/ui/components.py +472 -0
  12. dhisana/utils/__init__.py +1 -0
  13. dhisana/utils/add_mapping.py +352 -0
  14. dhisana/utils/agent_tools.py +51 -0
  15. dhisana/utils/apollo_tools.py +1597 -0
  16. dhisana/utils/assistant_tool_tag.py +4 -0
  17. dhisana/utils/built_with_api_tools.py +282 -0
  18. dhisana/utils/cache_output_tools.py +98 -0
  19. dhisana/utils/cache_output_tools_local.py +78 -0
  20. dhisana/utils/check_email_validity_tools.py +717 -0
  21. dhisana/utils/check_for_intent_signal.py +107 -0
  22. dhisana/utils/check_linkedin_url_validity.py +209 -0
  23. dhisana/utils/clay_tools.py +43 -0
  24. dhisana/utils/clean_properties.py +135 -0
  25. dhisana/utils/company_utils.py +60 -0
  26. dhisana/utils/compose_salesnav_query.py +259 -0
  27. dhisana/utils/compose_search_query.py +759 -0
  28. dhisana/utils/compose_three_step_workflow.py +234 -0
  29. dhisana/utils/composite_tools.py +137 -0
  30. dhisana/utils/dataframe_tools.py +237 -0
  31. dhisana/utils/domain_parser.py +45 -0
  32. dhisana/utils/email_body_utils.py +72 -0
  33. dhisana/utils/email_parse_helpers.py +132 -0
  34. dhisana/utils/email_provider.py +375 -0
  35. dhisana/utils/enrich_lead_information.py +933 -0
  36. dhisana/utils/extract_email_content_for_llm.py +101 -0
  37. dhisana/utils/fetch_openai_config.py +129 -0
  38. dhisana/utils/field_validators.py +426 -0
  39. dhisana/utils/g2_tools.py +104 -0
  40. dhisana/utils/generate_content.py +41 -0
  41. dhisana/utils/generate_custom_message.py +271 -0
  42. dhisana/utils/generate_email.py +278 -0
  43. dhisana/utils/generate_email_response.py +465 -0
  44. dhisana/utils/generate_flow.py +102 -0
  45. dhisana/utils/generate_leads_salesnav.py +303 -0
  46. dhisana/utils/generate_linkedin_connect_message.py +224 -0
  47. dhisana/utils/generate_linkedin_response_message.py +317 -0
  48. dhisana/utils/generate_structured_output_internal.py +462 -0
  49. dhisana/utils/google_custom_search.py +267 -0
  50. dhisana/utils/google_oauth_tools.py +727 -0
  51. dhisana/utils/google_workspace_tools.py +1294 -0
  52. dhisana/utils/hubspot_clearbit.py +96 -0
  53. dhisana/utils/hubspot_crm_tools.py +2440 -0
  54. dhisana/utils/instantly_tools.py +149 -0
  55. dhisana/utils/linkedin_crawler.py +168 -0
  56. dhisana/utils/lusha_tools.py +333 -0
  57. dhisana/utils/mailgun_tools.py +156 -0
  58. dhisana/utils/mailreach_tools.py +123 -0
  59. dhisana/utils/microsoft365_tools.py +455 -0
  60. dhisana/utils/openai_assistant_and_file_utils.py +267 -0
  61. dhisana/utils/openai_helpers.py +977 -0
  62. dhisana/utils/openapi_spec_to_tools.py +45 -0
  63. dhisana/utils/openapi_tool/__init__.py +1 -0
  64. dhisana/utils/openapi_tool/api_models.py +633 -0
  65. dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
  66. dhisana/utils/openapi_tool/openapi_tool.py +319 -0
  67. dhisana/utils/parse_linkedin_messages_txt.py +100 -0
  68. dhisana/utils/profile.py +37 -0
  69. dhisana/utils/proxy_curl_tools.py +1226 -0
  70. dhisana/utils/proxycurl_search_leads.py +426 -0
  71. dhisana/utils/python_function_to_tools.py +83 -0
  72. dhisana/utils/research_lead.py +176 -0
  73. dhisana/utils/sales_navigator_crawler.py +1103 -0
  74. dhisana/utils/salesforce_crm_tools.py +477 -0
  75. dhisana/utils/search_router.py +131 -0
  76. dhisana/utils/search_router_jobs.py +51 -0
  77. dhisana/utils/sendgrid_tools.py +162 -0
  78. dhisana/utils/serarch_router_local_business.py +75 -0
  79. dhisana/utils/serpapi_additional_tools.py +290 -0
  80. dhisana/utils/serpapi_google_jobs.py +117 -0
  81. dhisana/utils/serpapi_google_search.py +188 -0
  82. dhisana/utils/serpapi_local_business_search.py +129 -0
  83. dhisana/utils/serpapi_search_tools.py +852 -0
  84. dhisana/utils/serperdev_google_jobs.py +125 -0
  85. dhisana/utils/serperdev_local_business.py +154 -0
  86. dhisana/utils/serperdev_search.py +233 -0
  87. dhisana/utils/smtp_email_tools.py +582 -0
  88. dhisana/utils/test_connect.py +2087 -0
  89. dhisana/utils/trasform_json.py +173 -0
  90. dhisana/utils/web_download_parse_tools.py +189 -0
  91. dhisana/utils/workflow_code_model.py +5 -0
  92. dhisana/utils/zoominfo_tools.py +357 -0
  93. dhisana/workflow/__init__.py +1 -0
  94. dhisana/workflow/agent.py +18 -0
  95. dhisana/workflow/flow.py +44 -0
  96. dhisana/workflow/task.py +43 -0
  97. dhisana/workflow/test.py +90 -0
  98. dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
  99. dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
  100. dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
  101. dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
  102. dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1597 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import re
6
+ import aiohttp
7
+ import backoff
8
+
9
+ from dhisana.schemas.sales import LeadsQueryFilters, CompanyQueryFilters
10
+ from dhisana.utils.assistant_tool_tag import assistant_tool
11
+ from urllib.parse import urlparse, parse_qs
12
+ from typing import Any, Dict, List, Optional, Tuple, Union
13
+
14
+ from dhisana.utils.clean_properties import cleanup_properties
15
+
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> Tuple[str, bool]:
21
+ """
22
+ Retrieves an Apollo access token from tool configuration or environment variables.
23
+
24
+ Args:
25
+ tool_config (list): Optional tool configuration payload provided to the tool.
26
+
27
+ Returns:
28
+ Tuple[str, bool]: A tuple containing the token string and a boolean flag indicating
29
+ whether the token represents an OAuth bearer token (``True``) or an API key (``False``).
30
+
31
+ Raises:
32
+ ValueError: If the Apollo integration has not been configured.
33
+ """
34
+ token: Optional[str] = None
35
+ is_oauth = False
36
+
37
+ if tool_config:
38
+ apollo_config = next(
39
+ (item for item in tool_config if item.get("name") == "apollo"), None
40
+ )
41
+ if apollo_config:
42
+ config_map = {
43
+ item["name"]: item.get("value")
44
+ for item in apollo_config.get("configuration", [])
45
+ if item
46
+ }
47
+
48
+ raw_oauth = config_map.get("oauth_tokens")
49
+ if isinstance(raw_oauth, str):
50
+ try:
51
+ raw_oauth = json.loads(raw_oauth)
52
+ except Exception:
53
+ raw_oauth = None
54
+ if isinstance(raw_oauth, dict):
55
+ token = (
56
+ raw_oauth.get("access_token")
57
+ or raw_oauth.get("token")
58
+ )
59
+ if token:
60
+ is_oauth = True
61
+
62
+ if not token:
63
+ direct_access_token = config_map.get("access_token")
64
+ if direct_access_token:
65
+ token = direct_access_token
66
+ is_oauth = True
67
+
68
+ if not token:
69
+ api_key = config_map.get("apiKey") or config_map.get("api_key")
70
+ if api_key:
71
+ token = api_key
72
+ is_oauth = False
73
+ else:
74
+ logger.warning("No 'apollo' config item found in tool_config.")
75
+
76
+ if not token:
77
+ env_oauth_token = os.getenv("APOLLO_ACCESS_TOKEN")
78
+ if env_oauth_token:
79
+ token = env_oauth_token
80
+ is_oauth = True
81
+
82
+ if not token:
83
+ env_api_key = os.getenv("APOLLO_API_KEY")
84
+ if env_api_key:
85
+ token = env_api_key
86
+ is_oauth = False
87
+
88
+ if not token:
89
+ logger.error("Apollo integration is not configured.")
90
+ raise ValueError(
91
+ "Apollo integration is not configured. Please configure the connection to Apollo in Integrations."
92
+ )
93
+
94
+ return token, is_oauth
95
+
96
+
97
+ @assistant_tool
98
+ @backoff.on_exception(
99
+ backoff.expo,
100
+ aiohttp.ClientResponseError,
101
+ max_tries=2,
102
+ giveup=lambda e: e.status != 429,
103
+ factor=10,
104
+ )
105
+ async def enrich_person_info_from_apollo(
106
+ linkedin_url: Optional[str] = None,
107
+ email: Optional[str] = None,
108
+ phone: Optional[str] = None,
109
+ fetch_valid_phone_number: Optional[bool] = False,
110
+ tool_config: Optional[List[Dict]] = None,
111
+ ) -> Dict[str, Any]:
112
+ """
113
+ Fetch a person's details from Apollo using LinkedIn URL, email, or phone number.
114
+
115
+ Parameters:
116
+ - **linkedin_url** (*str*, optional): LinkedIn profile URL of the person.
117
+ - **email** (*str*, optional): Email address of the person.
118
+ - **phone** (*str*, optional): Phone number of the person.
119
+ - **fetch_valid_phone_number** (*bool*, optional): If True, include phone numbers in the API response. Defaults to False.
120
+
121
+ Returns:
122
+ - **dict**: JSON response containing person information.
123
+ """
124
+ logger.info("Entering enrich_person_info_from_apollo")
125
+
126
+ token, is_oauth = get_apollo_access_token(tool_config)
127
+
128
+ if not linkedin_url and not email and not phone:
129
+ logger.warning("No linkedin_url, email, or phone provided. At least one is required.")
130
+ return {'error': "At least one of linkedin_url, email, or phone must be provided"}
131
+
132
+ headers = {"Content-Type": "application/json"}
133
+ if is_oauth:
134
+ headers["Authorization"] = f"Bearer {token}"
135
+ else:
136
+ headers["X-Api-Key"] = token
137
+
138
+ data = {}
139
+ if linkedin_url:
140
+ logger.debug(f"LinkedIn URL provided: {linkedin_url}")
141
+ data['linkedin_url'] = linkedin_url
142
+ if email:
143
+ logger.debug(f"Email provided: {email}")
144
+ data['email'] = email
145
+ if phone:
146
+ logger.debug(f"Phone provided: {phone}")
147
+ data['phone_numbers'] = [phone] # Apollo expects a list for phone numbers
148
+
149
+ # Add reveal_phone_number parameter if fetch_valid_phone_number is True
150
+ if fetch_valid_phone_number:
151
+ logger.debug("fetch_valid_phone_number flag is True, including phone numbers in API response")
152
+ data['reveal_phone_number'] = True
153
+
154
+ url = 'https://api.apollo.io/api/v1/people/match'
155
+
156
+ async with aiohttp.ClientSession() as session:
157
+ try:
158
+ async with session.post(url, headers=headers, json=data) as response:
159
+ logger.debug(f"Received response status: {response.status}")
160
+ if response.status == 200:
161
+ result = await response.json()
162
+ logger.info("Successfully retrieved person info from Apollo.")
163
+ return result
164
+ elif response.status == 429:
165
+ msg = "Rate limit exceeded"
166
+ logger.warning(msg)
167
+ await asyncio.sleep(30)
168
+ raise aiohttp.ClientResponseError(
169
+ request_info=response.request_info,
170
+ history=response.history,
171
+ status=response.status,
172
+ message=msg,
173
+ headers=response.headers
174
+ )
175
+ else:
176
+ result = await response.json()
177
+ logger.warning(f"enrich_person_info_from_apollo error: {result}")
178
+ return {'error': result}
179
+ except Exception as e:
180
+ logger.exception("Exception occurred while fetching person info from Apollo.")
181
+ return {'error': str(e)}
182
+
183
+
184
+ @backoff.on_exception(
185
+ backoff.expo,
186
+ aiohttp.ClientResponseError,
187
+ max_tries=2,
188
+ giveup=lambda e: e.status != 429,
189
+ factor=10,
190
+ )
191
+ async def lookup_person_in_apollo_by_name(
192
+ full_name: str,
193
+ company_name: Optional[str] = None,
194
+ tool_config: Optional[List[Dict]] = None,
195
+ ) -> Dict[str, Any]:
196
+ """
197
+ Fetch a person's details from Apollo using their full name and optionally company name.
198
+
199
+ Parameters:
200
+ - **full_name** (*str*): Full name of the person.
201
+ - **company_name** (*str*, optional): Name of the company where the person works.
202
+ - **tool_config** (*list*, optional): Tool configuration for API keys.
203
+
204
+ Returns:
205
+ - **dict**: JSON response containing person information.
206
+ """
207
+ logger.info("Entering lookup_person_in_apollo_by_name")
208
+
209
+ if not full_name:
210
+ logger.warning("No full_name provided.")
211
+ return {'error': "Full name is required"}
212
+
213
+ token, is_oauth = get_apollo_access_token(tool_config)
214
+ headers = {"Content-Type": "application/json"}
215
+ if is_oauth:
216
+ headers["Authorization"] = f"Bearer {token}"
217
+ else:
218
+ headers["X-Api-Key"] = token
219
+
220
+ # Construct the query payload
221
+ data = {
222
+ "q_keywords": f"{full_name} {company_name}" if company_name else full_name,
223
+ "page": 1,
224
+ "per_page": 10
225
+ }
226
+
227
+ url = 'https://api.apollo.io/api/v1/mixed_people/search'
228
+ logger.debug(f"Making request to Apollo with payload: {data}")
229
+
230
+ async with aiohttp.ClientSession() as session:
231
+ try:
232
+ async with session.post(url, headers=headers, json=data) as response:
233
+ logger.debug(f"Received response status: {response.status}")
234
+ if response.status == 200:
235
+ result = await response.json()
236
+ logger.info("Successfully looked up person by name on Apollo.")
237
+ return result
238
+ elif response.status == 429:
239
+ msg = "Rate limit exceeded"
240
+ logger.warning(msg)
241
+ await asyncio.sleep(30)
242
+ raise aiohttp.ClientResponseError(
243
+ request_info=response.request_info,
244
+ history=response.history,
245
+ status=response.status,
246
+ message=msg,
247
+ headers=response.headers
248
+ )
249
+ else:
250
+ result = await response.json()
251
+ logger.warning(f"lookup_person_in_apollo_by_name error: {result}")
252
+ return {'error': result}
253
+ except Exception as e:
254
+ logger.exception("Exception occurred while looking up person by name.")
255
+ return {'error': str(e)}
256
+
257
+ @assistant_tool
258
+ @backoff.on_exception(
259
+ backoff.expo,
260
+ aiohttp.ClientResponseError,
261
+ max_tries=2,
262
+ giveup=lambda e: e.status != 429,
263
+ factor=30,
264
+ )
265
+ async def enrich_organization_info_from_apollo(
266
+ organization_domain: Optional[str] = None,
267
+ tool_config: Optional[List[Dict]] = None,
268
+ ) -> Dict[str, Any]:
269
+ """
270
+ Fetch an organization's details from Apollo using the organization domain.
271
+
272
+ Parameters:
273
+ - **organization_domain** (*str*, optional): Domain of the organization.
274
+
275
+ Returns:
276
+ - **dict**: JSON response containing organization information.
277
+ """
278
+ logger.info("Entering enrich_organization_info_from_apollo")
279
+
280
+ token, is_oauth = get_apollo_access_token(tool_config)
281
+
282
+ if not organization_domain:
283
+ logger.warning("No organization domain provided.")
284
+ return {'error': "organization domain must be provided"}
285
+
286
+ headers = {
287
+ "Content-Type": "application/json",
288
+ "Cache-Control": "no-cache",
289
+ "accept": "application/json"
290
+ }
291
+ if is_oauth:
292
+ headers["Authorization"] = f"Bearer {token}"
293
+ else:
294
+ headers["X-Api-Key"] = token
295
+
296
+ url = f'https://api.apollo.io/api/v1/organizations/enrich?domain={organization_domain}'
297
+ logger.debug(f"Making GET request to Apollo for organization domain: {organization_domain}")
298
+
299
+ async with aiohttp.ClientSession() as session:
300
+ try:
301
+ async with session.get(url, headers=headers) as response:
302
+ logger.debug(f"Received response status: {response.status}")
303
+ if response.status == 200:
304
+ result = await response.json()
305
+ logger.info("Successfully retrieved organization info from Apollo.")
306
+ return result
307
+ elif response.status == 429:
308
+ msg = "Rate limit exceeded"
309
+ logger.warning(msg)
310
+ raise aiohttp.ClientResponseError(
311
+ request_info=response.request_info,
312
+ history=response.history,
313
+ status=response.status,
314
+ message=msg,
315
+ headers=response.headers
316
+ )
317
+ else:
318
+ result = await response.json()
319
+ logger.warning(f"Error from Apollo while enriching org info: {result}")
320
+ return {'error': result}
321
+ except Exception as e:
322
+ logger.exception("Exception occurred while fetching organization info from Apollo.")
323
+ return {'error': str(e)}
324
+
325
+
326
+
327
+ @backoff.on_exception(
328
+ backoff.expo,
329
+ aiohttp.ClientResponseError,
330
+ max_tries=5,
331
+ giveup=lambda e: e.status != 429,
332
+ factor=2,
333
+ )
334
+ async def fetch_apollo_data(session, url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
335
+ logger.info("Entering fetch_apollo_data")
336
+ logger.debug("Making POST request to Apollo.")
337
+ async with session.post(url, headers=headers, json=payload) as response:
338
+ logger.debug(f"Received response status: {response.status}")
339
+ if response.status == 200:
340
+ result = await response.json()
341
+ logger.info("Successfully fetched data from Apollo.")
342
+ return result
343
+ elif response.status == 429:
344
+ msg = "Rate limit exceeded"
345
+ logger.warning(msg)
346
+ raise aiohttp.ClientResponseError(
347
+ request_info=response.request_info,
348
+ history=response.history,
349
+ status=response.status,
350
+ message=msg,
351
+ headers=response.headers
352
+ )
353
+ else:
354
+ logger.error(f"Unexpected status code {response.status} from Apollo. Raising exception.")
355
+ response.raise_for_status()
356
+
357
+
358
+ async def search_people_with_apollo(
359
+ tool_config: Optional[List[Dict[str, Any]]] = None,
360
+ dynamic_payload: Optional[Dict[str, Any]] = None,
361
+ ) -> List[Dict[str, Any]]:
362
+ logger.info("Entering search_people_with_apollo")
363
+
364
+ if not dynamic_payload:
365
+ logger.warning("No payload given; returning empty result.")
366
+ return []
367
+
368
+ token, is_oauth = get_apollo_access_token(tool_config)
369
+ headers = {
370
+ "Cache-Control": "no-cache",
371
+ "Content-Type": "application/json",
372
+ }
373
+ if is_oauth:
374
+ headers["Authorization"] = f"Bearer {token}"
375
+ else:
376
+ headers["X-Api-Key"] = token
377
+
378
+ url = "https://api.apollo.io/api/v1/mixed_people/search"
379
+ logger.info(f"Sending payload to Apollo (single page): {json.dumps(dynamic_payload, indent=2)}")
380
+
381
+ async with aiohttp.ClientSession() as session:
382
+ data = await fetch_apollo_data(session, url, headers, dynamic_payload)
383
+ if not data:
384
+ logger.error("No data returned from Apollo.")
385
+ return []
386
+
387
+ people = data.get("people", [])
388
+ contacts = data.get("contacts", [])
389
+ return people + contacts
390
+
391
+ def fill_in_properties_with_preference(input_user_properties: dict, person_data: dict) -> dict:
392
+ """
393
+ For each property:
394
+ - If input_user_properties already has a non-empty value, keep it.
395
+ - Otherwise, take the value from person_data if available.
396
+ """
397
+
398
+ def is_empty(value):
399
+ """Returns True if the value is None, empty string, or only whitespace."""
400
+ return value is None or (isinstance(value, str) and not value.strip())
401
+
402
+ # Full name
403
+ # Because `person_data.get("name")` has precedence over input_user_properties,
404
+ # we only update it if input_user_properties is empty/None for "full_name".
405
+ if is_empty(input_user_properties.get("full_name")) and person_data.get("name"):
406
+ input_user_properties["full_name"] = person_data["name"]
407
+
408
+ # First name
409
+ if is_empty(input_user_properties.get("first_name")) and person_data.get("first_name"):
410
+ input_user_properties["first_name"] = person_data["first_name"]
411
+
412
+ # Last name
413
+ if is_empty(input_user_properties.get("last_name")) and person_data.get("last_name"):
414
+ input_user_properties["last_name"] = person_data["last_name"]
415
+
416
+ # Email
417
+ if is_empty(input_user_properties.get("email")):
418
+ input_user_properties["email"] = person_data.get("email", "")
419
+
420
+ # Phone
421
+ if is_empty(input_user_properties.get("phone")):
422
+ # person_data["contact"] might not be defined, so we chain get calls
423
+ input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
424
+ .get("sanitized_phone", ""))
425
+
426
+ # LinkedIn URL
427
+ if is_empty(input_user_properties.get("user_linkedin_url")) and person_data.get("linkedin_url"):
428
+ input_user_properties["user_linkedin_url"] = person_data["linkedin_url"]
429
+
430
+ # Organization data
431
+ org_data = person_data.get("organization") or {}
432
+ if org_data:
433
+ # Primary domain
434
+ if is_empty(input_user_properties.get("primary_domain_of_organization")) and org_data.get("primary_domain"):
435
+ input_user_properties["primary_domain_of_organization"] = org_data["primary_domain"]
436
+
437
+ # Organization name
438
+ if is_empty(input_user_properties.get("organization_name")) and org_data.get("name"):
439
+ input_user_properties["organization_name"] = org_data["name"]
440
+
441
+ # Organization LinkedIn URL
442
+ if is_empty(input_user_properties.get("organization_linkedin_url")) and org_data.get("linkedin_url"):
443
+ input_user_properties["organization_linkedin_url"] = org_data["linkedin_url"]
444
+
445
+ # Organization website
446
+ if is_empty(input_user_properties.get("organization_website")) and org_data.get("website_url"):
447
+ input_user_properties["organization_website"] = org_data["website_url"]
448
+
449
+ # Keywords
450
+ if is_empty(input_user_properties.get("keywords")) and org_data.get("keywords"):
451
+ input_user_properties["keywords"] = ", ".join(org_data["keywords"])
452
+
453
+ # Title / Job Title
454
+ if is_empty(input_user_properties.get("job_title")) and person_data.get("title"):
455
+ input_user_properties["job_title"] = person_data["title"]
456
+
457
+ # Headline
458
+ if is_empty(input_user_properties.get("headline")) and person_data.get("headline"):
459
+ input_user_properties["headline"] = person_data["headline"]
460
+
461
+ # Summary about lead (fallback to headline if summary is missing, or if none set yet)
462
+ if is_empty(input_user_properties.get("summary_about_lead")) and person_data.get("headline"):
463
+ input_user_properties["summary_about_lead"] = person_data["headline"]
464
+
465
+ # City/State -> lead_location (avoid literal "None")
466
+ city = person_data.get("city")
467
+ state = person_data.get("state")
468
+ parts = []
469
+ for value in (city, state):
470
+ if value is None:
471
+ continue
472
+ s = str(value).strip()
473
+ if not s or s.lower() == "none":
474
+ continue
475
+ parts.append(s)
476
+ lead_location = ", ".join(parts) if parts else None
477
+ if is_empty(input_user_properties.get("lead_location")) and lead_location:
478
+ input_user_properties["lead_location"] = lead_location
479
+
480
+ # Filter out placeholder emails
481
+ if input_user_properties.get("email") and "domain.com" in input_user_properties["email"].lower():
482
+ input_user_properties["email"] = ""
483
+
484
+ return input_user_properties
485
+
486
+
487
+ async def search_leads_with_apollo(
488
+ query: LeadsQueryFilters,
489
+ max_items_to_search: Optional[int] = 10,
490
+ example_url: Optional[str] = None,
491
+ tool_config: Optional[List[Dict[str, Any]]] = None,
492
+ ) -> List[Dict]:
493
+ logger.info("Entering search_leads_with_apollo")
494
+
495
+ max_items = max_items_to_search or 10
496
+ if max_items > 2500:
497
+ logger.warning("Requested max_items_to_search > 2000, overriding to 2000.")
498
+ max_items = 2500
499
+
500
+ # -----------------------------
501
+ # A) example_url -> parse query
502
+ # -----------------------------
503
+ if example_url:
504
+ logger.debug(f"example_url provided: {example_url}")
505
+
506
+ parsed_url = urlparse(example_url)
507
+ query_string = parsed_url.query
508
+
509
+ if not query_string and "?" in parsed_url.fragment:
510
+ fragment_query = parsed_url.fragment.split("?", 1)[1]
511
+ query_string = fragment_query
512
+
513
+ query_params = parse_qs(query_string)
514
+
515
+ page_list = query_params.get("page", ["1"])
516
+ per_page_list = query_params.get("per_page", ["100"])
517
+
518
+ try:
519
+ page_val = int(page_list[-1])
520
+ except ValueError:
521
+ page_val = 1
522
+
523
+ try:
524
+ per_page_val = int(per_page_list[-1])
525
+ except ValueError:
526
+ per_page_val = min(max_items, 100)
527
+
528
+ dynamic_payload: Dict[str, Any] = {
529
+ "page": page_val,
530
+ "per_page": per_page_val,
531
+ }
532
+
533
+ # You can augment this mapping if you have more custom fields
534
+ mapping = {
535
+ "personLocations": "person_locations",
536
+ "organizationNumEmployeesRanges": "organization_num_employees_ranges",
537
+ "personTitles": "person_titles",
538
+ # Important: handle personNotTitles as well
539
+ "personNotTitles": "person_not_titles",
540
+
541
+ "qOrganizationJobTitles": "q_organization_job_titles",
542
+ "sortAscending": "sort_ascending",
543
+ "sortByField": "sort_by_field",
544
+ "contactEmailStatusV2": "contact_email_status",
545
+ "searchSignalIds": "search_signal_ids",
546
+ "organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
547
+ "revenueRange[max]": "revenue_range_max",
548
+ "revenueRange[min]": "revenue_range_min",
549
+ "currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
550
+ "organizationIndustryTagIds": "organization_industry_tag_ids",
551
+ "notOrganizationIds": "not_organization_ids",
552
+ }
553
+
554
+ for raw_key, raw_value_list in query_params.items():
555
+ # Strip off [] if present so we can do a snake_case transform
556
+ if raw_key.endswith("[]"):
557
+ key = raw_key[:-2]
558
+ else:
559
+ key = raw_key
560
+
561
+ # If the mapping has this raw_key or the stripped key, use it:
562
+ if raw_key in mapping:
563
+ key = mapping[raw_key]
564
+ elif key in mapping:
565
+ key = mapping[key]
566
+ else:
567
+ # fallback: convert camelCase -> snake_case
568
+ key = re.sub(r'(?<!^)(?=[A-Z])', '_', key).lower()
569
+
570
+ # If there's only one item, let's pull it out as a single str
571
+ # otherwise, keep it a list
572
+ if len(raw_value_list) == 1:
573
+ final_value: Union[str, List[str]] = raw_value_list[0]
574
+ else:
575
+ final_value = raw_value_list
576
+
577
+ # Known booleans
578
+ if key in ("sort_ascending",):
579
+ val_lower = str(final_value).lower()
580
+ final_value = val_lower in ("true", "1", "yes")
581
+
582
+ # Parse numeric fields
583
+ if key in ("page", "per_page"):
584
+ try:
585
+ final_value = int(final_value)
586
+ except ValueError:
587
+ pass
588
+
589
+ # Join arrays for q_keywords
590
+ if key == "q_keywords" and isinstance(final_value, list):
591
+ final_value = " ".join(final_value)
592
+
593
+ # ---------------------------------------------
594
+ # Force any param that originated from `[]` to
595
+ # be a list, even if there's only one value.
596
+ # Or handle known array-likely parameters:
597
+ # ---------------------------------------------
598
+ if raw_key.endswith("[]"):
599
+ # Guaranteed to treat it as a list
600
+ if isinstance(final_value, str):
601
+ final_value = [final_value]
602
+ else:
603
+ # Or if we have a known array param
604
+ if key in (
605
+ "person_locations",
606
+ "person_titles",
607
+ "person_seniorities",
608
+ "organization_locations",
609
+ "q_organization_domains",
610
+ "contact_email_status",
611
+ "organization_ids",
612
+ "organization_num_employees_ranges",
613
+ "person_not_titles", # <--- added so single item is forced into list
614
+ "q_organization_job_titles",
615
+ "organization_latest_funding_stage_cd",
616
+ ):
617
+ if isinstance(final_value, str):
618
+ final_value = [final_value]
619
+
620
+ dynamic_payload[key] = final_value
621
+
622
+ # Remove invalid sort
623
+ if dynamic_payload.get("sort_by_field") == "[none]":
624
+ dynamic_payload.pop("sort_by_field")
625
+
626
+ if "per_page" not in query_params:
627
+ dynamic_payload["per_page"] = min(max_items, 100)
628
+
629
+ # -----------------------------------
630
+ # B) No example_url -> build from `query`
631
+ # -----------------------------------
632
+ else:
633
+ dynamic_payload = {
634
+ "person_titles": query.person_current_titles or [],
635
+ "person_locations": query.person_locations or [],
636
+ "search_signal_ids": query.filter_by_signals or [],
637
+ "q_keywords": query.search_keywords or "",
638
+ "organization_num_employees_ranges": (
639
+ query.organization_num_employees_ranges
640
+ or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
641
+ ),
642
+ "page": 1,
643
+ "per_page": min(max_items, 100),
644
+ }
645
+ if query.job_openings_with_titles:
646
+ dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
647
+ if query.latest_funding_stages:
648
+ dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
649
+ if query.sort_by_field is not None:
650
+ dynamic_payload["sort_by_field"] = query.sort_by_field
651
+ if query.sort_ascending is not None:
652
+ dynamic_payload["sort_ascending"] = query.sort_ascending
653
+ if query.person_seniorities:
654
+ dynamic_payload["person_seniorities"] = query.person_seniorities
655
+
656
+ # -----------------------------
657
+ # C) Fetch multiple pages
658
+ # -----------------------------
659
+ all_people: List[Dict[str, Any]] = []
660
+ total_fetched = 0
661
+
662
+ current_page = int(dynamic_payload.get("page", 1))
663
+ per_page = int(dynamic_payload.get("per_page", min(max_items, 100)))
664
+
665
+ while total_fetched < max_items:
666
+ page_payload = dict(dynamic_payload)
667
+ page_payload["page"] = current_page
668
+ page_payload["per_page"] = per_page
669
+
670
+ logger.debug(f"Fetching page {current_page}, per_page {per_page}")
671
+ page_results = await search_people_with_apollo(tool_config=tool_config, dynamic_payload=page_payload)
672
+
673
+ if not page_results:
674
+ break
675
+
676
+ all_people.extend(page_results)
677
+ page_count = len(page_results)
678
+ total_fetched += page_count
679
+
680
+ if page_count < per_page or total_fetched >= max_items:
681
+ break
682
+
683
+ current_page += 1
684
+
685
+ logger.info(f"Fetched a total of {len(all_people)} items from Apollo (across pages).")
686
+
687
+ # -----------------------------------------------
688
+ # Convert raw results -> dictionary objects
689
+ # -----------------------------------------------
690
+ leads: List[Dict[str, Any]] = []
691
+ for user_data_from_apollo in all_people:
692
+ person_data = user_data_from_apollo
693
+
694
+ input_user_properties: Dict[str, Any] = {}
695
+
696
+ additional_props = input_user_properties.get("additional_properties") or {}
697
+ input_user_properties = fill_in_properties_with_preference(input_user_properties, person_data)
698
+
699
+ person_data = cleanup_properties(person_data)
700
+
701
+ additional_props["apollo_person_data"] = json.dumps(person_data)
702
+ input_user_properties["additional_properties"] = additional_props
703
+
704
+ leads.append(input_user_properties)
705
+
706
+ logger.info(f"Converted {len(leads)} Apollo records into dictionaries.")
707
+ return leads
708
+
709
+
710
+ async def search_leads_with_apollo_page(
711
+ query: LeadsQueryFilters,
712
+ page: Optional[int] = 1,
713
+ per_page: Optional[int] = 25,
714
+ example_url: Optional[str] = None,
715
+ tool_config: Optional[List[Dict[str, Any]]] = None,
716
+ ) -> Dict[str, Any]:
717
+ """Fetch a single page of Apollo leads using ``page`` and ``per_page``.
718
+
719
+ This helper performs one request to the Apollo API and returns the fetched
720
+ leads along with comprehensive pagination metadata.
721
+
722
+ Args:
723
+ query: LeadsQueryFilters object containing search criteria
724
+ page: Page number to fetch (1-indexed, defaults to 1)
725
+ per_page: Number of results per page (defaults to 25)
726
+ example_url: Optional URL to parse search parameters from
727
+ tool_config: Optional tool configuration for API keys
728
+
729
+ Returns:
730
+ Dict containing:
731
+ - current_page: The current page number
732
+ - per_page: Number of results per page
733
+ - total_entries: Total number of results available
734
+ - total_pages: Total number of pages available
735
+ - has_next_page: Boolean indicating if more pages exist
736
+ - next_page: Next page number (None if no more pages)
737
+ - results: List of lead dictionaries for this page
738
+ """
739
+ logger.info("Entering search_leads_with_apollo_page")
740
+
741
+ if example_url:
742
+ parsed_url = urlparse(example_url)
743
+ query_string = parsed_url.query
744
+
745
+ if not query_string and "?" in parsed_url.fragment:
746
+ fragment_query = parsed_url.fragment.split("?", 1)[1]
747
+ query_string = fragment_query
748
+
749
+ query_params = parse_qs(query_string)
750
+
751
+ dynamic_payload: Dict[str, Any] = {
752
+ "page": page,
753
+ "per_page": per_page,
754
+ }
755
+
756
+ mapping = {
757
+ "personLocations": "person_locations",
758
+ "organizationNumEmployeesRanges": "organization_num_employees_ranges",
759
+ "personTitles": "person_titles",
760
+ "personNotTitles": "person_not_titles",
761
+ "qOrganizationJobTitles": "q_organization_job_titles",
762
+ "sortAscending": "sort_ascending",
763
+ "sortByField": "sort_by_field",
764
+ "contactEmailStatusV2": "contact_email_status",
765
+ "searchSignalIds": "search_signal_ids",
766
+ "organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
767
+ "revenueRange[max]": "revenue_range_max",
768
+ "revenueRange[min]": "revenue_range_min",
769
+ "currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
770
+ "organizationIndustryTagIds": "organization_industry_tag_ids",
771
+ "notOrganizationIds": "not_organization_ids",
772
+ }
773
+
774
+ for raw_key, raw_value_list in query_params.items():
775
+ if raw_key.endswith("[]"):
776
+ key = raw_key[:-2]
777
+ else:
778
+ key = raw_key
779
+
780
+ if raw_key in mapping:
781
+ key = mapping[raw_key]
782
+ elif key in mapping:
783
+ key = mapping[key]
784
+ else:
785
+ key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
786
+
787
+ if len(raw_value_list) == 1:
788
+ final_value: Union[str, List[str]] = raw_value_list[0]
789
+ else:
790
+ final_value = raw_value_list
791
+
792
+ if key in ("sort_ascending",):
793
+ val_lower = str(final_value).lower()
794
+ final_value = val_lower in ("true", "1", "yes")
795
+
796
+ if key in ("page", "per_page"):
797
+ try:
798
+ final_value = int(final_value)
799
+ except ValueError:
800
+ pass
801
+
802
+ if key == "q_keywords" and isinstance(final_value, list):
803
+ final_value = " ".join(final_value)
804
+
805
+ if raw_key.endswith("[]"):
806
+ if isinstance(final_value, str):
807
+ final_value = [final_value]
808
+ else:
809
+ if key in (
810
+ "person_locations",
811
+ "person_titles",
812
+ "person_seniorities",
813
+ "organization_locations",
814
+ "q_organization_domains",
815
+ "contact_email_status",
816
+ "organization_ids",
817
+ "organization_num_employees_ranges",
818
+ "person_not_titles",
819
+ "q_organization_job_titles",
820
+ "organization_latest_funding_stage_cd",
821
+ ):
822
+ if isinstance(final_value, str):
823
+ final_value = [final_value]
824
+
825
+ dynamic_payload[key] = final_value
826
+
827
+ if dynamic_payload.get("sort_by_field") == "[none]":
828
+ dynamic_payload.pop("sort_by_field")
829
+
830
+ # -----------------------------------
831
+ # B) No example_url -> build from `query`
832
+ # -----------------------------------
833
+ else:
834
+ dynamic_payload = {
835
+ "person_titles": query.person_current_titles or [],
836
+ "person_locations": query.person_locations or [],
837
+ "search_signal_ids": query.filter_by_signals or [],
838
+ "q_keywords": query.search_keywords or "",
839
+ "organization_num_employees_ranges": (
840
+ query.organization_num_employees_ranges
841
+ or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
842
+ ),
843
+ }
844
+ if query.job_openings_with_titles:
845
+ dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
846
+ if query.latest_funding_stages:
847
+ dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
848
+ if query.sort_by_field is not None:
849
+ dynamic_payload["sort_by_field"] = query.sort_by_field
850
+ if query.sort_ascending is not None:
851
+ dynamic_payload["sort_ascending"] = query.sort_ascending
852
+ if query.q_organization_keyword_tags:
853
+ dynamic_payload["q_organization_keyword_tags"] = query.q_organization_keyword_tags
854
+
855
+ if query.q_not_organization_keyword_tags:
856
+ dynamic_payload["q_not_organization_keyword_tags"] = query.q_not_organization_keyword_tags
857
+
858
+ page_payload = dict(dynamic_payload)
859
+ page_payload["page"] = page
860
+ page_payload["per_page"] = per_page
861
+
862
+ print(f"Fetching Apollo page {page} with per_page {per_page}..."
863
+ f" Payload: {json.dumps(page_payload, indent=2)}")
864
+
865
+ # Get the full Apollo API response with pagination metadata
866
+ token, is_oauth = get_apollo_access_token(tool_config)
867
+ headers = {
868
+ "Cache-Control": "no-cache",
869
+ "Content-Type": "application/json",
870
+ }
871
+ if is_oauth:
872
+ headers["Authorization"] = f"Bearer {token}"
873
+ else:
874
+ headers["X-Api-Key"] = token
875
+
876
+ url = "https://api.apollo.io/api/v1/mixed_people/search"
877
+
878
+ async with aiohttp.ClientSession() as session:
879
+ apollo_response = await fetch_apollo_data(session, url, headers, page_payload)
880
+ if not apollo_response:
881
+ return {"current_page": page, "per_page": per_page, "total_entries": 0, "total_pages": 0, "has_next_page": False, "results": []}
882
+
883
+ # Extract pagination metadata
884
+ pagination = apollo_response.get("pagination", {})
885
+ current_page = pagination.get("page", page)
886
+ total_entries = pagination.get("total_entries", 0)
887
+ total_pages = pagination.get("total_pages", 0)
888
+ per_page_actual = pagination.get("per_page", per_page)
889
+
890
+ # Determine if there are more pages
891
+ has_next_page = current_page < total_pages
892
+
893
+ # Extract people and contacts
894
+ people = apollo_response.get("people", [])
895
+ contacts = apollo_response.get("contacts", [])
896
+ page_results = people + contacts
897
+
898
+ leads: List[Dict[str, Any]] = []
899
+ for person_data in page_results:
900
+ input_user_properties: Dict[str, Any] = {}
901
+ additional_props = input_user_properties.get("additional_properties") or {}
902
+ input_user_properties = fill_in_properties_with_preference(input_user_properties, person_data)
903
+ person_data = cleanup_properties(person_data)
904
+ additional_props["apollo_person_data"] = json.dumps(person_data)
905
+ input_user_properties["additional_properties"] = additional_props
906
+ leads.append(input_user_properties)
907
+
908
+ logger.info(f"Converted {len(leads)} Apollo records into dictionaries (single page mode). Page {current_page} of {total_pages}")
909
+
910
+ return {
911
+ "current_page": current_page,
912
+ "per_page": per_page_actual,
913
+ "total_entries": total_entries,
914
+ "total_pages": total_pages,
915
+ "has_next_page": has_next_page,
916
+ "next_page": current_page + 1 if has_next_page else None,
917
+ "results": leads
918
+ }
919
+
920
+ @assistant_tool
921
+ async def get_organization_domain_from_apollo(
922
+ organization_id: str,
923
+ tool_config: Optional[List[Dict]] = None
924
+ ) -> Dict[str, Any]:
925
+ """
926
+ Fetch an organization's domain from Apollo using the organization ID.
927
+
928
+ Parameters:
929
+ - organization_id (str): ID of the organization.
930
+
931
+ Returns:
932
+ - dict: Contains the organization's ID and domain, or an error message.
933
+ """
934
+ logger.info("Entering get_organization_domain_from_apollo")
935
+
936
+ if not organization_id:
937
+ logger.warning("No organization_id provided.")
938
+ return {'error': 'organization_id must be provided'}
939
+
940
+ try:
941
+ result = await get_organization_details_from_apollo(organization_id, tool_config=tool_config)
942
+ if 'error' in result:
943
+ return result
944
+ domain = result.get('primary_domain')
945
+ if domain:
946
+ logger.info("Successfully retrieved domain from Apollo organization details.")
947
+ return {'organization_id': organization_id, 'domain': domain}
948
+ else:
949
+ logger.warning("Domain not found in the organization details.")
950
+ return {'error': 'Domain not found in the organization details'}
951
+ except Exception as e:
952
+ logger.exception("Exception occurred in get_organization_domain_from_apollo.")
953
+ return {'error': str(e)}
954
+
955
+
956
+ @assistant_tool
957
+ @backoff.on_exception(
958
+ backoff.expo,
959
+ aiohttp.ClientResponseError,
960
+ max_tries=3,
961
+ giveup=lambda e: e.status != 429,
962
+ factor=60,
963
+ )
964
+ async def get_organization_details_from_apollo(
965
+ organization_id: str,
966
+ tool_config: Optional[List[Dict]] = None,
967
+ ) -> Dict[str, Any]:
968
+ """
969
+ Fetch an organization's details from Apollo using the organization ID.
970
+
971
+ Parameters:
972
+ - organization_id (str): ID of the organization.
973
+
974
+ Returns:
975
+ - dict: Organization details or an error message.
976
+ """
977
+ logger.info("Entering get_organization_details_from_apollo")
978
+
979
+ token, is_oauth = get_apollo_access_token(tool_config)
980
+ if not organization_id:
981
+ logger.warning("No organization_id provided.")
982
+ return {'error': "Organization ID must be provided"}
983
+
984
+ headers = {
985
+ "Content-Type": "application/json",
986
+ "Cache-Control": "no-cache",
987
+ "Accept": "application/json"
988
+ }
989
+ if is_oauth:
990
+ headers["Authorization"] = f"Bearer {token}"
991
+ else:
992
+ headers["X-Api-Key"] = token
993
+
994
+ url = f'https://api.apollo.io/api/v1/organizations/{organization_id}'
995
+ logger.debug(f"Making GET request to Apollo for organization ID: {organization_id}")
996
+
997
+ async with aiohttp.ClientSession() as session:
998
+ try:
999
+ async with session.get(url, headers=headers) as response:
1000
+ logger.debug(f"Received response status: {response.status}")
1001
+ if response.status == 200:
1002
+ result = await response.json()
1003
+ org_details = result.get('organization', {})
1004
+ if org_details:
1005
+ logger.info("Successfully retrieved organization details from Apollo.")
1006
+ return org_details
1007
+ else:
1008
+ logger.warning("Organization details not found in the response.")
1009
+ return {'error': 'Organization details not found in the response'}
1010
+ elif response.status == 429:
1011
+ msg = "Rate limit exceeded"
1012
+ limit_minute = response.headers.get('x-rate-limit-minute')
1013
+ limit_hourly = response.headers.get('x-rate-limit-hourly')
1014
+ limit_daily = response.headers.get('x-rate-limit-daily')
1015
+ logger.info(f"get_organization_details_from_apollo x-rate-limit-minute: {limit_minute}")
1016
+ logger.info(f"get_organization_details_from_apollo x-rate-limit-hourly: {limit_hourly}")
1017
+ logger.info(f"get_organization_details_from_apollo x-rate-limit-daily: {limit_daily}")
1018
+ logger.warning(msg)
1019
+ raise aiohttp.ClientResponseError(
1020
+ request_info=response.request_info,
1021
+ history=response.history,
1022
+ status=response.status,
1023
+ message=msg,
1024
+ headers=response.headers
1025
+ )
1026
+ else:
1027
+ result = await response.json()
1028
+ logger.warning(f"get_organization_details_from_apollo error: {result}")
1029
+ return {'error': result}
1030
+ except Exception as e:
1031
+ logger.exception("Exception occurred while fetching organization details from Apollo.")
1032
+ return {'error': str(e)}
1033
+
1034
+
1035
+ async def enrich_user_info_with_apollo(
1036
+ input_user_properties: Dict[str, Any],
1037
+ tool_config: Optional[List[Dict]] = None
1038
+ ) -> Dict[str, Any]:
1039
+ """
1040
+ Enriches the user info (input_user_properties) with data from Apollo.
1041
+ Attempts direct enrichment if LinkedIn URL or email is provided; otherwise,
1042
+ performs a name-based search. Updates the user_properties dictionary in place.
1043
+
1044
+ Parameters:
1045
+ - input_user_properties (Dict[str, Any]): A dictionary with user details.
1046
+ - tool_config (List[Dict], optional): Apollo tool configuration.
1047
+
1048
+ Returns:
1049
+ - Dict[str, Any]: Updated input_user_properties with enriched data from Apollo.
1050
+ """
1051
+ logger.info("Entering enrich_user_info_with_apollo")
1052
+
1053
+ if not input_user_properties:
1054
+ logger.warning("No input_user_properties provided; returning empty dict.")
1055
+ return {}
1056
+
1057
+ linkedin_url = input_user_properties.get("user_linkedin_url", "")
1058
+ email = input_user_properties.get("email", "")
1059
+ user_data_from_apollo = None
1060
+
1061
+ logger.debug(f"Properties => LinkedIn URL: {linkedin_url}, Email: {email}")
1062
+
1063
+ # If LinkedIn url or email is present, attempt direct enrichment
1064
+ if linkedin_url or email:
1065
+ try:
1066
+ user_data_from_apollo = await enrich_person_info_from_apollo(
1067
+ linkedin_url=linkedin_url,
1068
+ email=email,
1069
+ tool_config=tool_config
1070
+ )
1071
+ except Exception:
1072
+ logger.exception("Exception occurred while enriching person info from Apollo by LinkedIn or email.")
1073
+ else:
1074
+ # Fallback to name-based lookup
1075
+ first_name = input_user_properties.get("first_name", "")
1076
+ last_name = input_user_properties.get("last_name", "")
1077
+ full_name = input_user_properties.get("full_name", f"{first_name} {last_name}").strip()
1078
+ company = input_user_properties.get("organization_name", "")
1079
+
1080
+ if not full_name:
1081
+ logger.warning("No full_name or (first_name + last_name) provided.")
1082
+ input_user_properties["found_user_in_apollo"] = False
1083
+ return input_user_properties
1084
+
1085
+ logger.debug(f"Looking up Apollo by name: {full_name}, company: {company}")
1086
+ try:
1087
+ search_result = await lookup_person_in_apollo_by_name(
1088
+ full_name=full_name,
1089
+ company_name=company,
1090
+ tool_config=tool_config
1091
+ )
1092
+
1093
+ # Extract people and contacts from the search result
1094
+ people = search_result.get("people", [])
1095
+ contacts = search_result.get("contacts", [])
1096
+ results = people + contacts
1097
+ logger.info(f"Name-based lookup returned {len(results)} results from Apollo.")
1098
+
1099
+ for person in results:
1100
+ person_name = person.get("name", "").lower()
1101
+ person_first_name = person.get("first_name", "").lower()
1102
+ person_last_name = person.get("last_name", "").lower()
1103
+ person_company = (person.get("organization", {}) or {}).get("name", "").lower()
1104
+
1105
+ # Match the full name or first/last name and company
1106
+ if (
1107
+ (person_name == full_name.lower() or
1108
+ (person_first_name == first_name.lower() and person_last_name == last_name.lower()))
1109
+ and (not company or person_company == company.lower())
1110
+ ):
1111
+ logger.info(f"Found matching person {person.get('name')} in Apollo. Enriching data.")
1112
+ linkedin_url = person.get("linkedin_url", "")
1113
+ if linkedin_url:
1114
+ try:
1115
+ user_data_from_apollo = await enrich_person_info_from_apollo(
1116
+ linkedin_url=linkedin_url,
1117
+ tool_config=tool_config
1118
+ )
1119
+ except Exception:
1120
+ logger.exception("Exception occurred during second stage Apollo enrichment.")
1121
+ if user_data_from_apollo:
1122
+ break
1123
+ except Exception:
1124
+ logger.exception("Exception occurred while performing name-based lookup in Apollo.")
1125
+
1126
+ if not user_data_from_apollo:
1127
+ logger.debug("No user data returned from Apollo.")
1128
+ input_user_properties["found_user_in_apollo"] = False
1129
+ return input_user_properties
1130
+
1131
+ # At this point, user_data_from_apollo likely has "person" key
1132
+ person_data = user_data_from_apollo.get("person", {})
1133
+ additional_props = input_user_properties.get("additional_properties") or {}
1134
+
1135
+
1136
+ # Fill missing contact info if not already present
1137
+ if not input_user_properties.get("email"):
1138
+ input_user_properties["email"] = person_data.get("email", "")
1139
+ if not input_user_properties.get("phone"):
1140
+ input_user_properties["phone"] = (person_data.get("contact", {}) or {}).get("sanitized_phone", "")
1141
+
1142
+ # Map fields
1143
+ if person_data.get("name"):
1144
+ input_user_properties["full_name"] = person_data["name"]
1145
+ if person_data.get("first_name"):
1146
+ input_user_properties["first_name"] = person_data["first_name"]
1147
+ if person_data.get("last_name"):
1148
+ input_user_properties["last_name"] = person_data["last_name"]
1149
+ if person_data.get("linkedin_url"):
1150
+ input_user_properties["user_linkedin_url"] = person_data["linkedin_url"]
1151
+
1152
+ if person_data.get("organization"):
1153
+ org_data = person_data["organization"] or {}
1154
+ if org_data.get("primary_domain"):
1155
+ input_user_properties["primary_domain_of_organization"] = org_data["primary_domain"]
1156
+ if org_data.get("name"):
1157
+ input_user_properties["organization_name"] = org_data["name"]
1158
+ if org_data.get("linkedin_url"):
1159
+ input_user_properties["organization_linkedin_url"] = org_data["linkedin_url"]
1160
+ if org_data.get("website_url"):
1161
+ input_user_properties["organization_website"] = org_data["website_url"]
1162
+ if org_data.get("keywords"):
1163
+ input_user_properties["keywords"] = ", ".join(org_data["keywords"])
1164
+
1165
+ if person_data.get("title"):
1166
+ input_user_properties["job_title"] = person_data["title"]
1167
+ if person_data.get("headline"):
1168
+ input_user_properties["headline"] = person_data["headline"]
1169
+ # If there's no summary_about_lead, reuse the person's headline
1170
+ if not input_user_properties.get("summary_about_lead"):
1171
+ input_user_properties["summary_about_lead"] = person_data["headline"]
1172
+
1173
+ # Derive location (avoid literal "None")
1174
+ city = person_data.get("city")
1175
+ state = person_data.get("state")
1176
+ parts = []
1177
+ for value in (city, state):
1178
+ if value is None:
1179
+ continue
1180
+ s = str(value).strip()
1181
+ if not s or s.lower() == "none":
1182
+ continue
1183
+ parts.append(s)
1184
+ lead_location = ", ".join(parts)
1185
+ if lead_location:
1186
+ input_user_properties["lead_location"] = lead_location
1187
+
1188
+ # Verify name match
1189
+ first_matched = bool(
1190
+ input_user_properties.get("first_name")
1191
+ and person_data.get("first_name") == input_user_properties["first_name"]
1192
+ )
1193
+ last_matched = bool(
1194
+ input_user_properties.get("last_name")
1195
+ and person_data.get("last_name") == input_user_properties["last_name"]
1196
+ )
1197
+ if first_matched and last_matched:
1198
+ logger.info("Matching user found and data enriched from Apollo.")
1199
+ input_user_properties["found_user_in_apollo"] = True
1200
+
1201
+ person_data = cleanup_properties(person_data)
1202
+ additional_props["apollo_person_data"] = json.dumps(person_data)
1203
+ input_user_properties["additional_properties"] = additional_props
1204
+
1205
+ return input_user_properties
1206
+
1207
+
1208
+ async def search_companies_with_apollo(
1209
+ tool_config: Optional[List[Dict[str, Any]]] = None,
1210
+ dynamic_payload: Optional[Dict[str, Any]] = None,
1211
+ ) -> List[Dict[str, Any]]:
1212
+ """
1213
+ Search for companies using Apollo's organizations/search endpoint.
1214
+
1215
+ Args:
1216
+ tool_config: Apollo API configuration
1217
+ dynamic_payload: Search parameters for the API call
1218
+
1219
+ Returns:
1220
+ List of company/organization dictionaries
1221
+ """
1222
+ logger.info("Entering search_companies_with_apollo")
1223
+
1224
+ if not dynamic_payload:
1225
+ logger.warning("No payload given; returning empty result.")
1226
+ return []
1227
+
1228
+ token, is_oauth = get_apollo_access_token(tool_config)
1229
+ headers = {
1230
+ "Cache-Control": "no-cache",
1231
+ "Content-Type": "application/json",
1232
+ }
1233
+ if is_oauth:
1234
+ headers["Authorization"] = f"Bearer {token}"
1235
+ else:
1236
+ headers["X-Api-Key"] = token
1237
+
1238
+ url = "https://api.apollo.io/api/v1/organizations/search"
1239
+ logger.info(f"Sending payload to Apollo organizations endpoint (single page): {json.dumps(dynamic_payload, indent=2)}")
1240
+
1241
+ async with aiohttp.ClientSession() as session:
1242
+ data = await fetch_apollo_data(session, url, headers, dynamic_payload)
1243
+ if not data:
1244
+ logger.error("No data returned from Apollo organizations search.")
1245
+ return []
1246
+
1247
+ organizations = data.get("organizations", [])
1248
+ accounts = data.get("accounts", []) # Apollo sometimes returns accounts as well
1249
+ return organizations + accounts
1250
+
1251
+
1252
+ def fill_in_company_properties(company_data: dict) -> dict:
1253
+ """
1254
+ Convert Apollo company/organization data into a standardized format.
1255
+
1256
+ Args:
1257
+ company_data: Raw company data from Apollo API
1258
+
1259
+ Returns:
1260
+ Dictionary with standardized company properties
1261
+ """
1262
+ company_properties = {}
1263
+
1264
+ # Basic company information
1265
+ company_properties["organization_name"] = company_data.get("name", "")
1266
+ company_properties["primary_domain"] = company_data.get("primary_domain", "")
1267
+ company_properties["website_url"] = company_data.get("website_url", "")
1268
+ company_properties["organization_linkedin_url"] = company_data.get("linkedin_url", "")
1269
+
1270
+ # Location information
1271
+ company_properties["organization_city"] = company_data.get("city", "")
1272
+ company_properties["organization_state"] = company_data.get("state", "")
1273
+ company_properties["organization_country"] = company_data.get("country", "")
1274
+
1275
+ # Create a combined location string
1276
+ location_parts = [
1277
+ company_data.get("city", ""),
1278
+ company_data.get("state", ""),
1279
+ company_data.get("country", "")
1280
+ ]
1281
+ company_properties["organization_location"] = ", ".join([part for part in location_parts if part])
1282
+
1283
+ # Company size and financial info
1284
+ company_properties["employee_count"] = company_data.get("estimated_num_employees", 0)
1285
+ company_properties["annual_revenue"] = company_data.get("annual_revenue", 0)
1286
+
1287
+ # Industry and business info
1288
+ company_properties["industry"] = company_data.get("industry", "")
1289
+ company_properties["keywords"] = ", ".join(company_data.get("keywords", []))
1290
+ company_properties["description"] = company_data.get("description", "")
1291
+
1292
+ # Funding and growth
1293
+ company_properties["founded_year"] = company_data.get("founded_year", "")
1294
+ company_properties["funding_stage"] = company_data.get("latest_funding_stage", "")
1295
+ company_properties["total_funding"] = company_data.get("total_funding", 0)
1296
+
1297
+ # Technology stack
1298
+ tech_stack = company_data.get("technology_names", [])
1299
+ if tech_stack:
1300
+ company_properties["technology_stack"] = ", ".join(tech_stack)
1301
+
1302
+ # Apollo-specific IDs
1303
+ company_properties["apollo_organization_id"] = company_data.get("id", "")
1304
+
1305
+ # Additional metadata
1306
+ company_properties["phone"] = company_data.get("phone", "")
1307
+ company_properties["facebook_url"] = company_data.get("facebook_url", "")
1308
+ company_properties["twitter_url"] = company_data.get("twitter_url", "")
1309
+
1310
+ # Store raw data for reference
1311
+ company_properties["additional_properties"] = {
1312
+ "apollo_organization_data": json.dumps(cleanup_properties(company_data))
1313
+ }
1314
+
1315
+ return company_properties
1316
+
1317
+
1318
+ @assistant_tool
1319
+ async def search_companies_with_apollo_page(
1320
+ query: CompanyQueryFilters,
1321
+ page: Optional[int] = 1,
1322
+ per_page: Optional[int] = 25,
1323
+ example_url: Optional[str] = None,
1324
+ tool_config: Optional[List[Dict[str, Any]]] = None,
1325
+ ) -> Dict[str, Any]:
1326
+ """
1327
+ Fetch a single page of Apollo companies using ``page`` and ``per_page``.
1328
+
1329
+ This helper performs one request to the Apollo API and returns the fetched
1330
+ companies along with comprehensive pagination metadata.
1331
+
1332
+ Args:
1333
+ query: CompanyQueryFilters object containing search criteria
1334
+ page: Page number to fetch (1-indexed, defaults to 1)
1335
+ per_page: Number of results per page (defaults to 25)
1336
+ example_url: Optional URL to parse search parameters from
1337
+ tool_config: Optional tool configuration for API keys
1338
+
1339
+ Returns:
1340
+ Dict containing:
1341
+ - current_page: The current page number
1342
+ - per_page: Number of results per page
1343
+ - total_entries: Total number of results available
1344
+ - total_pages: Total number of pages available
1345
+ - has_next_page: Boolean indicating if more pages exist
1346
+ - next_page: Next page number (None if no more pages)
1347
+ - results: List of company dictionaries for this page
1348
+ """
1349
+ logger.info("Entering search_companies_with_apollo_page")
1350
+
1351
+ if example_url:
1352
+ parsed_url = urlparse(example_url)
1353
+ query_string = parsed_url.query
1354
+
1355
+ if not query_string and "?" in parsed_url.fragment:
1356
+ fragment_query = parsed_url.fragment.split("?", 1)[1]
1357
+ query_string = fragment_query
1358
+
1359
+ query_params = parse_qs(query_string)
1360
+
1361
+ dynamic_payload: Dict[str, Any] = {
1362
+ "page": page,
1363
+ "per_page": per_page,
1364
+ }
1365
+
1366
+ # Organization-specific URL parameter mapping
1367
+ mapping = {
1368
+ "organizationLocations": "organization_locations",
1369
+ "organizationNumEmployeesRanges": "organization_num_employees_ranges",
1370
+ "organizationIndustries": "organization_industries",
1371
+ "organizationIndustryTagIds": "organization_industry_tag_ids",
1372
+ "qKeywords": "q_keywords",
1373
+ "qOrganizationDomains": "q_organization_domains",
1374
+ "sortAscending": "sort_ascending",
1375
+ "sortByField": "sort_by_field",
1376
+ "organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
1377
+ "revenueRange[max]": "revenue_range_max",
1378
+ "revenueRange[min]": "revenue_range_min",
1379
+ "currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
1380
+ "organizationIds": "organization_ids",
1381
+ "notOrganizationIds": "not_organization_ids",
1382
+ "qOrganizationSearchListId": "q_organization_search_list_id",
1383
+ "qNotOrganizationSearchListId": "q_not_organization_search_list_id",
1384
+ }
1385
+
1386
+ for raw_key, raw_value_list in query_params.items():
1387
+ if raw_key.endswith("[]"):
1388
+ key = raw_key[:-2]
1389
+ else:
1390
+ key = raw_key
1391
+
1392
+ if raw_key in mapping:
1393
+ key = mapping[raw_key]
1394
+ elif key in mapping:
1395
+ key = mapping[key]
1396
+ else:
1397
+ key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
1398
+
1399
+ if len(raw_value_list) == 1:
1400
+ final_value: Union[str, List[str]] = raw_value_list[0]
1401
+ else:
1402
+ final_value = raw_value_list
1403
+
1404
+ if key in ("sort_ascending",):
1405
+ val_lower = str(final_value).lower()
1406
+ final_value = val_lower in ("true", "1", "yes")
1407
+
1408
+ if key in ("page", "per_page", "revenue_range_min", "revenue_range_max"):
1409
+ try:
1410
+ final_value = int(final_value)
1411
+ except ValueError:
1412
+ pass
1413
+
1414
+ if key == "q_organization_keyword_tags":
1415
+ # Handle both string and list inputs, split by comma if string
1416
+ if isinstance(final_value, str):
1417
+ # Split by comma and strip whitespace
1418
+ final_value = [tag.strip() for tag in final_value.split(",") if tag.strip()]
1419
+ elif isinstance(final_value, list):
1420
+ # If it's already a list, flatten any comma-separated items
1421
+ flattened = []
1422
+ for item in final_value:
1423
+ if isinstance(item, str) and "," in item:
1424
+ flattened.extend([tag.strip() for tag in item.split(",") if tag.strip()])
1425
+ else:
1426
+ flattened.append(item)
1427
+ final_value = flattened
1428
+
1429
+ if raw_key.endswith("[]"):
1430
+ if isinstance(final_value, str):
1431
+ final_value = [final_value]
1432
+ else:
1433
+ if key in (
1434
+ "organization_locations",
1435
+ "organization_industries",
1436
+ "organization_industry_tag_ids",
1437
+ "q_organization_domains",
1438
+ "q_organization_keyword_tags",
1439
+ "organization_ids",
1440
+ "not_organization_ids",
1441
+ "organization_num_employees_ranges",
1442
+ "currently_using_any_of_technology_uids",
1443
+ "organization_latest_funding_stage_cd",
1444
+ ):
1445
+ if isinstance(final_value, str):
1446
+ final_value = [final_value]
1447
+
1448
+ dynamic_payload[key] = final_value
1449
+
1450
+ if dynamic_payload.get("sort_by_field") == "[none]":
1451
+ dynamic_payload.pop("sort_by_field")
1452
+
1453
+ # -----------------------------------
1454
+ # B) No example_url -> build from `query`
1455
+ # -----------------------------------
1456
+ else:
1457
+ dynamic_payload = {}
1458
+
1459
+ # Only add fields if they have values (Apollo doesn't like empty arrays)
1460
+ if query.organization_locations:
1461
+ dynamic_payload["organization_locations"] = query.organization_locations
1462
+ if query.organization_industries:
1463
+ dynamic_payload["organization_industries"] = query.organization_industries
1464
+ if query.organization_industry_tag_ids:
1465
+ dynamic_payload["organization_industry_tag_ids"] = query.organization_industry_tag_ids
1466
+
1467
+ # Handle employee ranges
1468
+ employee_ranges = []
1469
+ if query.organization_num_employees_ranges:
1470
+ employee_ranges = query.organization_num_employees_ranges
1471
+ elif query.min_employees or query.max_employees:
1472
+ employee_ranges = [f"{query.min_employees or 1},{query.max_employees or 1000}"]
1473
+
1474
+ if employee_ranges:
1475
+ dynamic_payload["organization_num_employees_ranges"] = employee_ranges
1476
+
1477
+ # Add optional parameters only if they have values
1478
+ if query.q_keywords:
1479
+ # Split comma-separated keywords into an array for company search
1480
+ if isinstance(query.q_keywords, str):
1481
+ keyword_tags = [tag.strip() for tag in query.q_keywords.split(",") if tag.strip()]
1482
+ else:
1483
+ keyword_tags = query.q_keywords
1484
+
1485
+ if query.q_organization_keyword_tags:
1486
+ dynamic_payload["q_organization_keyword_tags"] = keyword_tags
1487
+
1488
+ if query.q_not_organization_keyword_tags:
1489
+ dynamic_payload["q_not_organization_keyword_tags"] = query.q_not_organization_keyword_tags
1490
+
1491
+ if query.q_organization_domains:
1492
+ dynamic_payload["q_organization_domains"] = query.q_organization_domains
1493
+ if query.revenue_range_min is not None:
1494
+ dynamic_payload["revenue_range_min"] = query.revenue_range_min
1495
+ if query.revenue_range_max is not None:
1496
+ dynamic_payload["revenue_range_max"] = query.revenue_range_max
1497
+ if query.organization_latest_funding_stage_cd:
1498
+ dynamic_payload["organization_latest_funding_stage_cd"] = query.organization_latest_funding_stage_cd
1499
+ if query.currently_using_any_of_technology_uids:
1500
+ dynamic_payload["currently_using_any_of_technology_uids"] = query.currently_using_any_of_technology_uids
1501
+ if query.organization_ids:
1502
+ dynamic_payload["organization_ids"] = query.organization_ids
1503
+ if query.not_organization_ids:
1504
+ dynamic_payload["not_organization_ids"] = query.not_organization_ids
1505
+ if query.q_organization_search_list_id:
1506
+ dynamic_payload["q_organization_search_list_id"] = query.q_organization_search_list_id
1507
+ if query.q_not_organization_search_list_id:
1508
+ dynamic_payload["q_not_organization_search_list_id"] = query.q_not_organization_search_list_id
1509
+ if query.sort_by_field is not None:
1510
+ dynamic_payload["sort_by_field"] = query.sort_by_field
1511
+ if query.sort_ascending is not None:
1512
+ dynamic_payload["sort_ascending"] = query.sort_ascending
1513
+
1514
+ # Remove sorting parameters that may not be supported by organizations endpoint
1515
+ if "sort_by_field" in dynamic_payload:
1516
+ dynamic_payload.pop("sort_by_field")
1517
+ if "sort_ascending" in dynamic_payload:
1518
+ dynamic_payload.pop("sort_ascending")
1519
+
1520
+ page_payload = dict(dynamic_payload)
1521
+ page_payload["page"] = page
1522
+ page_payload["per_page"] = per_page
1523
+
1524
+ # Clean up the payload - remove empty arrays and None values that Apollo doesn't like
1525
+ cleaned_payload = {}
1526
+ for key, value in page_payload.items():
1527
+ if value is not None:
1528
+ if isinstance(value, list):
1529
+ # Only include non-empty lists
1530
+ if value:
1531
+ cleaned_payload[key] = value
1532
+ else:
1533
+ cleaned_payload[key] = value
1534
+
1535
+ # Ensure page and per_page are always included
1536
+ cleaned_payload["page"] = page
1537
+ cleaned_payload["per_page"] = per_page
1538
+
1539
+ print(f"Fetching Apollo companies page {page} with per_page {per_page}..."
1540
+ f" Payload: {json.dumps(cleaned_payload, indent=2)}")
1541
+
1542
+ # Get the full Apollo API response with pagination metadata
1543
+ token, is_oauth = get_apollo_access_token(tool_config)
1544
+ headers = {
1545
+ "Cache-Control": "no-cache",
1546
+ "Content-Type": "application/json",
1547
+ }
1548
+ if is_oauth:
1549
+ headers["Authorization"] = f"Bearer {token}"
1550
+ else:
1551
+ headers["X-Api-Key"] = token
1552
+
1553
+ url = "https://api.apollo.io/api/v1/organizations/search"
1554
+
1555
+ async with aiohttp.ClientSession() as session:
1556
+ apollo_response = await fetch_apollo_data(session, url, headers, cleaned_payload)
1557
+ if not apollo_response:
1558
+ return {
1559
+ "current_page": page,
1560
+ "per_page": per_page,
1561
+ "total_entries": 0,
1562
+ "total_pages": 0,
1563
+ "has_next_page": False,
1564
+ "results": []
1565
+ }
1566
+
1567
+ # Extract pagination metadata
1568
+ pagination = apollo_response.get("pagination", {})
1569
+ current_page = pagination.get("page", page)
1570
+ total_entries = pagination.get("total_entries", 0)
1571
+ total_pages = pagination.get("total_pages", 0)
1572
+ per_page_actual = pagination.get("per_page", per_page)
1573
+
1574
+ # Determine if there are more pages
1575
+ has_next_page = current_page < total_pages
1576
+
1577
+ # Extract organizations and accounts
1578
+ organizations = apollo_response.get("organizations", [])
1579
+ accounts = apollo_response.get("accounts", [])
1580
+ page_results = organizations + accounts
1581
+
1582
+ companies: List[Dict[str, Any]] = []
1583
+ for company_data in page_results:
1584
+ company_properties = fill_in_company_properties(company_data)
1585
+ companies.append(company_properties)
1586
+
1587
+ logger.info(f"Converted {len(companies)} Apollo company records into standardized dictionaries (single page mode). Page {current_page} of {total_pages}")
1588
+
1589
+ return {
1590
+ "current_page": current_page,
1591
+ "per_page": per_page_actual,
1592
+ "total_entries": total_entries,
1593
+ "total_pages": total_pages,
1594
+ "has_next_page": has_next_page,
1595
+ "next_page": current_page + 1 if has_next_page else None,
1596
+ "results": companies
1597
+ }