dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. dhisana/schemas/common.py +33 -0
  2. dhisana/schemas/sales.py +224 -23
  3. dhisana/utils/add_mapping.py +72 -63
  4. dhisana/utils/apollo_tools.py +739 -109
  5. dhisana/utils/built_with_api_tools.py +4 -2
  6. dhisana/utils/cache_output_tools.py +23 -23
  7. dhisana/utils/check_email_validity_tools.py +456 -458
  8. dhisana/utils/check_for_intent_signal.py +1 -2
  9. dhisana/utils/check_linkedin_url_validity.py +34 -8
  10. dhisana/utils/clay_tools.py +3 -2
  11. dhisana/utils/clean_properties.py +3 -1
  12. dhisana/utils/compose_salesnav_query.py +0 -1
  13. dhisana/utils/compose_search_query.py +7 -3
  14. dhisana/utils/composite_tools.py +0 -1
  15. dhisana/utils/dataframe_tools.py +2 -2
  16. dhisana/utils/email_body_utils.py +72 -0
  17. dhisana/utils/email_provider.py +375 -0
  18. dhisana/utils/enrich_lead_information.py +585 -85
  19. dhisana/utils/fetch_openai_config.py +129 -0
  20. dhisana/utils/field_validators.py +1 -1
  21. dhisana/utils/g2_tools.py +0 -1
  22. dhisana/utils/generate_content.py +0 -1
  23. dhisana/utils/generate_email.py +69 -16
  24. dhisana/utils/generate_email_response.py +298 -41
  25. dhisana/utils/generate_flow.py +0 -1
  26. dhisana/utils/generate_linkedin_connect_message.py +19 -6
  27. dhisana/utils/generate_linkedin_response_message.py +156 -65
  28. dhisana/utils/generate_structured_output_internal.py +351 -131
  29. dhisana/utils/google_custom_search.py +150 -44
  30. dhisana/utils/google_oauth_tools.py +721 -0
  31. dhisana/utils/google_workspace_tools.py +391 -25
  32. dhisana/utils/hubspot_clearbit.py +3 -1
  33. dhisana/utils/hubspot_crm_tools.py +771 -167
  34. dhisana/utils/instantly_tools.py +3 -1
  35. dhisana/utils/lusha_tools.py +10 -7
  36. dhisana/utils/mailgun_tools.py +150 -0
  37. dhisana/utils/microsoft365_tools.py +447 -0
  38. dhisana/utils/openai_assistant_and_file_utils.py +121 -177
  39. dhisana/utils/openai_helpers.py +19 -16
  40. dhisana/utils/parse_linkedin_messages_txt.py +2 -3
  41. dhisana/utils/profile.py +37 -0
  42. dhisana/utils/proxy_curl_tools.py +507 -206
  43. dhisana/utils/proxycurl_search_leads.py +426 -0
  44. dhisana/utils/research_lead.py +121 -68
  45. dhisana/utils/sales_navigator_crawler.py +1 -6
  46. dhisana/utils/salesforce_crm_tools.py +323 -50
  47. dhisana/utils/search_router.py +131 -0
  48. dhisana/utils/search_router_jobs.py +51 -0
  49. dhisana/utils/sendgrid_tools.py +126 -91
  50. dhisana/utils/serarch_router_local_business.py +75 -0
  51. dhisana/utils/serpapi_additional_tools.py +290 -0
  52. dhisana/utils/serpapi_google_jobs.py +117 -0
  53. dhisana/utils/serpapi_google_search.py +188 -0
  54. dhisana/utils/serpapi_local_business_search.py +129 -0
  55. dhisana/utils/serpapi_search_tools.py +363 -432
  56. dhisana/utils/serperdev_google_jobs.py +125 -0
  57. dhisana/utils/serperdev_local_business.py +154 -0
  58. dhisana/utils/serperdev_search.py +233 -0
  59. dhisana/utils/smtp_email_tools.py +576 -0
  60. dhisana/utils/test_connect.py +1765 -92
  61. dhisana/utils/trasform_json.py +95 -16
  62. dhisana/utils/web_download_parse_tools.py +0 -1
  63. dhisana/utils/zoominfo_tools.py +2 -3
  64. dhisana/workflow/test.py +1 -1
  65. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
  66. dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
  67. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
  68. dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
  69. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
  70. {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,15 @@
1
1
  import asyncio
2
- import hashlib
3
2
  import json
4
3
  import logging
5
4
  import os
6
5
  import re
7
6
  import aiohttp
8
7
  import backoff
9
- from datetime import datetime, timedelta
10
8
 
11
- from pydantic import BaseModel
12
- from dhisana.schemas.sales import LeadsQueryFilters, SmartList, SmartListLead
13
- from dhisana.utils.cache_output_tools import cache_output, retrieve_output
9
+ from dhisana.schemas.sales import LeadsQueryFilters, CompanyQueryFilters
14
10
  from dhisana.utils.assistant_tool_tag import assistant_tool
15
11
  from urllib.parse import urlparse, parse_qs
16
- from typing import Any, Dict, List, Optional, Union
12
+ from typing import Any, Dict, List, Optional, Tuple, Union
17
13
 
18
14
  from dhisana.utils.clean_properties import cleanup_properties
19
15
 
@@ -21,48 +17,81 @@ logging.basicConfig(level=logging.INFO)
21
17
  logger = logging.getLogger(__name__)
22
18
 
23
19
 
24
- def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> str:
20
+ def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> Tuple[str, bool]:
25
21
  """
26
- Retrieves the APOLLO_API_KEY access token from the provided tool configuration.
22
+ Retrieves an Apollo access token from tool configuration or environment variables.
27
23
 
28
24
  Args:
29
- tool_config (list): A list of dictionaries containing the tool configuration.
30
- Each dictionary should have a "name" key and a "configuration" key,
31
- where "configuration" is a list of dictionaries containing "name" and "value" keys.
25
+ tool_config (list): Optional tool configuration payload provided to the tool.
32
26
 
33
27
  Returns:
34
- str: The APOLLO_API_KEY access token.
28
+ Tuple[str, bool]: A tuple containing the token string and a boolean flag indicating
29
+ whether the token represents an OAuth bearer token (``True``) or an API key (``False``).
35
30
 
36
31
  Raises:
37
- ValueError: If the access token is not found in the tool configuration or environment variable.
32
+ ValueError: If the Apollo integration has not been configured.
38
33
  """
39
- APOLLO_API_KEY = None
34
+ token: Optional[str] = None
35
+ is_oauth = False
40
36
 
41
37
  if tool_config:
42
- logger.debug(f"Tool config provided: {tool_config}")
43
38
  apollo_config = next(
44
39
  (item for item in tool_config if item.get("name") == "apollo"), None
45
40
  )
46
41
  if apollo_config:
47
42
  config_map = {
48
- item["name"]: item["value"]
43
+ item["name"]: item.get("value")
49
44
  for item in apollo_config.get("configuration", [])
50
45
  if item
51
46
  }
52
- APOLLO_API_KEY = config_map.get("apiKey")
47
+
48
+ raw_oauth = config_map.get("oauth_tokens")
49
+ if isinstance(raw_oauth, str):
50
+ try:
51
+ raw_oauth = json.loads(raw_oauth)
52
+ except Exception:
53
+ raw_oauth = None
54
+ if isinstance(raw_oauth, dict):
55
+ token = (
56
+ raw_oauth.get("access_token")
57
+ or raw_oauth.get("token")
58
+ )
59
+ if token:
60
+ is_oauth = True
61
+
62
+ if not token:
63
+ direct_access_token = config_map.get("access_token")
64
+ if direct_access_token:
65
+ token = direct_access_token
66
+ is_oauth = True
67
+
68
+ if not token:
69
+ api_key = config_map.get("apiKey") or config_map.get("api_key")
70
+ if api_key:
71
+ token = api_key
72
+ is_oauth = False
53
73
  else:
54
74
  logger.warning("No 'apollo' config item found in tool_config.")
55
- else:
56
- logger.debug("No tool_config provided or it's None.")
57
75
 
58
- # Check environment variable if no key found yet
59
- APOLLO_API_KEY = APOLLO_API_KEY or os.getenv("APOLLO_API_KEY")
60
-
61
- if not APOLLO_API_KEY:
62
- logger.error("APOLLO_API_KEY not found in configuration or environment.")
63
- raise ValueError("APOLLO_API_KEY access token not found in tool_config or environment variable")
76
+ if not token:
77
+ env_oauth_token = os.getenv("APOLLO_ACCESS_TOKEN")
78
+ if env_oauth_token:
79
+ token = env_oauth_token
80
+ is_oauth = True
81
+
82
+ if not token:
83
+ env_api_key = os.getenv("APOLLO_API_KEY")
84
+ if env_api_key:
85
+ token = env_api_key
86
+ is_oauth = False
87
+
88
+ if not token:
89
+ logger.error("Apollo integration is not configured.")
90
+ raise ValueError(
91
+ "Apollo integration is not configured. Please configure the connection to Apollo in Integrations."
92
+ )
64
93
 
65
- return APOLLO_API_KEY
94
+ return token, is_oauth
66
95
 
67
96
 
68
97
  @assistant_tool
@@ -77,6 +106,7 @@ async def enrich_person_info_from_apollo(
77
106
  linkedin_url: Optional[str] = None,
78
107
  email: Optional[str] = None,
79
108
  phone: Optional[str] = None,
109
+ fetch_valid_phone_number: Optional[bool] = False,
80
110
  tool_config: Optional[List[Dict]] = None,
81
111
  ) -> Dict[str, Any]:
82
112
  """
@@ -86,37 +116,40 @@ async def enrich_person_info_from_apollo(
86
116
  - **linkedin_url** (*str*, optional): LinkedIn profile URL of the person.
87
117
  - **email** (*str*, optional): Email address of the person.
88
118
  - **phone** (*str*, optional): Phone number of the person.
119
+ - **fetch_valid_phone_number** (*bool*, optional): If True, include phone numbers in the API response. Defaults to False.
89
120
 
90
121
  Returns:
91
122
  - **dict**: JSON response containing person information.
92
123
  """
93
124
  logger.info("Entering enrich_person_info_from_apollo")
94
125
 
95
- APOLLO_API_KEY = get_apollo_access_token(tool_config)
126
+ token, is_oauth = get_apollo_access_token(tool_config)
96
127
 
97
128
  if not linkedin_url and not email and not phone:
98
129
  logger.warning("No linkedin_url, email, or phone provided. At least one is required.")
99
130
  return {'error': "At least one of linkedin_url, email, or phone must be provided"}
100
131
 
101
- headers = {
102
- "X-Api-Key": f"{APOLLO_API_KEY}",
103
- "Content-Type": "application/json"
104
- }
132
+ headers = {"Content-Type": "application/json"}
133
+ if is_oauth:
134
+ headers["Authorization"] = f"Bearer {token}"
135
+ else:
136
+ headers["X-Api-Key"] = token
105
137
 
106
138
  data = {}
107
139
  if linkedin_url:
108
140
  logger.debug(f"LinkedIn URL provided: {linkedin_url}")
109
141
  data['linkedin_url'] = linkedin_url
110
- cached_response = retrieve_output("enrich_person_info_from_apollo", linkedin_url)
111
- if cached_response is not None:
112
- logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
113
- return cached_response
114
142
  if email:
115
143
  logger.debug(f"Email provided: {email}")
116
144
  data['email'] = email
117
145
  if phone:
118
146
  logger.debug(f"Phone provided: {phone}")
119
147
  data['phone_numbers'] = [phone] # Apollo expects a list for phone numbers
148
+
149
+ # Add reveal_phone_number parameter if fetch_valid_phone_number is True
150
+ if fetch_valid_phone_number:
151
+ logger.debug("fetch_valid_phone_number flag is True, including phone numbers in API response")
152
+ data['reveal_phone_number'] = True
120
153
 
121
154
  url = 'https://api.apollo.io/api/v1/people/match'
122
155
 
@@ -126,8 +159,6 @@ async def enrich_person_info_from_apollo(
126
159
  logger.debug(f"Received response status: {response.status}")
127
160
  if response.status == 200:
128
161
  result = await response.json()
129
- if linkedin_url:
130
- cache_output("enrich_person_info_from_apollo", linkedin_url, result)
131
162
  logger.info("Successfully retrieved person info from Apollo.")
132
163
  return result
133
164
  elif response.status == 429:
@@ -179,11 +210,12 @@ async def lookup_person_in_apollo_by_name(
179
210
  logger.warning("No full_name provided.")
180
211
  return {'error': "Full name is required"}
181
212
 
182
- APOLLO_API_KEY = get_apollo_access_token(tool_config)
183
- headers = {
184
- "X-Api-Key": f"{APOLLO_API_KEY}",
185
- "Content-Type": "application/json"
186
- }
213
+ token, is_oauth = get_apollo_access_token(tool_config)
214
+ headers = {"Content-Type": "application/json"}
215
+ if is_oauth:
216
+ headers["Authorization"] = f"Bearer {token}"
217
+ else:
218
+ headers["X-Api-Key"] = token
187
219
 
188
220
  # Construct the query payload
189
221
  data = {
@@ -222,7 +254,6 @@ async def lookup_person_in_apollo_by_name(
222
254
  logger.exception("Exception occurred while looking up person by name.")
223
255
  return {'error': str(e)}
224
256
 
225
-
226
257
  @assistant_tool
227
258
  @backoff.on_exception(
228
259
  backoff.expo,
@@ -246,23 +277,21 @@ async def enrich_organization_info_from_apollo(
246
277
  """
247
278
  logger.info("Entering enrich_organization_info_from_apollo")
248
279
 
249
- APOLLO_API_KEY = get_apollo_access_token(tool_config)
280
+ token, is_oauth = get_apollo_access_token(tool_config)
250
281
 
251
282
  if not organization_domain:
252
283
  logger.warning("No organization domain provided.")
253
284
  return {'error': "organization domain must be provided"}
254
285
 
255
286
  headers = {
256
- "X-Api-Key": f"{APOLLO_API_KEY}",
257
287
  "Content-Type": "application/json",
258
288
  "Cache-Control": "no-cache",
259
289
  "accept": "application/json"
260
290
  }
261
-
262
- cached_response = retrieve_output("enrich_organization_info_from_apollo", organization_domain)
263
- if cached_response is not None:
264
- logger.info(f"Cache hit for organization domain: {organization_domain}")
265
- return cached_response
291
+ if is_oauth:
292
+ headers["Authorization"] = f"Bearer {token}"
293
+ else:
294
+ headers["X-Api-Key"] = token
266
295
 
267
296
  url = f'https://api.apollo.io/api/v1/organizations/enrich?domain={organization_domain}'
268
297
  logger.debug(f"Making GET request to Apollo for organization domain: {organization_domain}")
@@ -273,7 +302,6 @@ async def enrich_organization_info_from_apollo(
273
302
  logger.debug(f"Received response status: {response.status}")
274
303
  if response.status == 200:
275
304
  result = await response.json()
276
- cache_output("enrich_organization_info_from_apollo", organization_domain, result)
277
305
  logger.info("Successfully retrieved organization info from Apollo.")
278
306
  return result
279
307
  elif response.status == 429:
@@ -305,22 +333,12 @@ async def enrich_organization_info_from_apollo(
305
333
  )
306
334
  async def fetch_apollo_data(session, url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
307
335
  logger.info("Entering fetch_apollo_data")
308
- key_data = f"{url}_{json.dumps(payload, sort_keys=True)}"
309
- key_hash = hashlib.sha256(key_data.encode()).hexdigest()
310
- logger.debug(f"Cache key hash: {key_hash}")
311
-
312
- cached_response = retrieve_output("fetch_apollo_data", key_hash)
313
- if cached_response is not None:
314
- logger.info("Cache hit for fetch_apollo_data.")
315
- return cached_response
316
-
317
- logger.debug("No cache hit. Making POST request to Apollo.")
336
+ logger.debug("Making POST request to Apollo.")
318
337
  async with session.post(url, headers=headers, json=payload) as response:
319
338
  logger.debug(f"Received response status: {response.status}")
320
339
  if response.status == 200:
321
340
  result = await response.json()
322
- cache_output("fetch_apollo_data", key_hash, result)
323
- logger.info("Successfully fetched data from Apollo and cached it.")
341
+ logger.info("Successfully fetched data from Apollo.")
324
342
  return result
325
343
  elif response.status == 429:
326
344
  msg = "Rate limit exceeded"
@@ -347,12 +365,15 @@ async def search_people_with_apollo(
347
365
  logger.warning("No payload given; returning empty result.")
348
366
  return []
349
367
 
350
- api_key = get_apollo_access_token(tool_config)
368
+ token, is_oauth = get_apollo_access_token(tool_config)
351
369
  headers = {
352
370
  "Cache-Control": "no-cache",
353
371
  "Content-Type": "application/json",
354
- "X-Api-Key": api_key,
355
372
  }
373
+ if is_oauth:
374
+ headers["Authorization"] = f"Bearer {token}"
375
+ else:
376
+ headers["X-Api-Key"] = token
356
377
 
357
378
  url = "https://api.apollo.io/api/v1/mixed_people/search"
358
379
  logger.info(f"Sending payload to Apollo (single page): {json.dumps(dynamic_payload, indent=2)}")
@@ -378,16 +399,6 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
378
399
  """Returns True if the value is None, empty string, or only whitespace."""
379
400
  return value is None or (isinstance(value, str) and not value.strip())
380
401
 
381
- # Email
382
- if is_empty(input_user_properties.get("email")):
383
- input_user_properties["email"] = person_data.get("email", "")
384
-
385
- # Phone
386
- if is_empty(input_user_properties.get("phone")):
387
- # person_data["contact"] might not be defined, so we chain get calls
388
- input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
389
- .get("sanitized_phone", ""))
390
-
391
402
  # Full name
392
403
  # Because `person_data.get("name")` has precedence over input_user_properties,
393
404
  # we only update it if input_user_properties is empty/None for "full_name".
@@ -402,6 +413,16 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
402
413
  if is_empty(input_user_properties.get("last_name")) and person_data.get("last_name"):
403
414
  input_user_properties["last_name"] = person_data["last_name"]
404
415
 
416
+ # Email
417
+ if is_empty(input_user_properties.get("email")):
418
+ input_user_properties["email"] = person_data.get("email", "")
419
+
420
+ # Phone
421
+ if is_empty(input_user_properties.get("phone")):
422
+ # person_data["contact"] might not be defined, so we chain get calls
423
+ input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
424
+ .get("sanitized_phone", ""))
425
+
405
426
  # LinkedIn URL
406
427
  if is_empty(input_user_properties.get("user_linkedin_url")) and person_data.get("linkedin_url"):
407
428
  input_user_properties["user_linkedin_url"] = person_data["linkedin_url"]
@@ -441,11 +462,19 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
441
462
  if is_empty(input_user_properties.get("summary_about_lead")) and person_data.get("headline"):
442
463
  input_user_properties["summary_about_lead"] = person_data["headline"]
443
464
 
444
- # City/State -> lead_location
445
- city = person_data.get("city", "")
446
- state = person_data.get("state", "")
447
- if is_empty(input_user_properties.get("lead_location")) and (city or state):
448
- lead_location = f"{city}, {state}".strip(", ")
465
+ # City/State -> lead_location (avoid literal "None")
466
+ city = person_data.get("city")
467
+ state = person_data.get("state")
468
+ parts = []
469
+ for value in (city, state):
470
+ if value is None:
471
+ continue
472
+ s = str(value).strip()
473
+ if not s or s.lower() == "none":
474
+ continue
475
+ parts.append(s)
476
+ lead_location = ", ".join(parts) if parts else None
477
+ if is_empty(input_user_properties.get("lead_location")) and lead_location:
449
478
  input_user_properties["lead_location"] = lead_location
450
479
 
451
480
  # Filter out placeholder emails
@@ -457,13 +486,13 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
457
486
 
458
487
  async def search_leads_with_apollo(
459
488
  query: LeadsQueryFilters,
460
- request: SmartList,
489
+ max_items_to_search: Optional[int] = 10,
461
490
  example_url: Optional[str] = None,
462
491
  tool_config: Optional[List[Dict[str, Any]]] = None,
463
- ) -> List[SmartListLead]:
492
+ ) -> List[Dict]:
464
493
  logger.info("Entering search_leads_with_apollo")
465
494
 
466
- max_items = request.max_items_to_search or 10
495
+ max_items = max_items_to_search or 10
467
496
  if max_items > 2500:
468
497
  logger.warning("Requested max_items_to_search > 2000, overriding to 2000.")
469
498
  max_items = 2500
@@ -509,7 +538,7 @@ async def search_leads_with_apollo(
509
538
  # Important: handle personNotTitles as well
510
539
  "personNotTitles": "person_not_titles",
511
540
 
512
- "qOrganizationJobTitles": "q_keywords",
541
+ "qOrganizationJobTitles": "q_organization_job_titles",
513
542
  "sortAscending": "sort_ascending",
514
543
  "sortByField": "sort_by_field",
515
544
  "contactEmailStatusV2": "contact_email_status",
@@ -582,6 +611,8 @@ async def search_leads_with_apollo(
582
611
  "organization_ids",
583
612
  "organization_num_employees_ranges",
584
613
  "person_not_titles", # <--- added so single item is forced into list
614
+ "q_organization_job_titles",
615
+ "organization_latest_funding_stage_cd",
585
616
  ):
586
617
  if isinstance(final_value, str):
587
618
  final_value = [final_value]
@@ -602,7 +633,8 @@ async def search_leads_with_apollo(
602
633
  dynamic_payload = {
603
634
  "person_titles": query.person_current_titles or [],
604
635
  "person_locations": query.person_locations or [],
605
- "search_signal_ids": query.search_signal_ids or query.filter_by_signals or [],
636
+ "search_signal_ids": query.filter_by_signals or [],
637
+ "q_keywords": query.search_keywords or "",
606
638
  "organization_num_employees_ranges": (
607
639
  query.organization_num_employees_ranges
608
640
  or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
@@ -610,6 +642,10 @@ async def search_leads_with_apollo(
610
642
  "page": 1,
611
643
  "per_page": min(max_items, 100),
612
644
  }
645
+ if query.job_openings_with_titles:
646
+ dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
647
+ if query.latest_funding_stages:
648
+ dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
613
649
  if query.sort_by_field is not None:
614
650
  dynamic_payload["sort_by_field"] = query.sort_by_field
615
651
  if query.sort_ascending is not None:
@@ -647,9 +683,9 @@ async def search_leads_with_apollo(
647
683
  logger.info(f"Fetched a total of {len(all_people)} items from Apollo (across pages).")
648
684
 
649
685
  # -----------------------------------------------
650
- # Convert raw results -> SmartListLead objects
686
+ # Convert raw results -> dictionary objects
651
687
  # -----------------------------------------------
652
- leads: List[SmartListLead] = []
688
+ leads: List[Dict[str, Any]] = []
653
689
  for user_data_from_apollo in all_people:
654
690
  person_data = user_data_from_apollo
655
691
 
@@ -663,15 +699,217 @@ async def search_leads_with_apollo(
663
699
  additional_props["apollo_person_data"] = json.dumps(person_data)
664
700
  input_user_properties["additional_properties"] = additional_props
665
701
 
666
- lead = SmartListLead(**input_user_properties)
667
- lead.agent_instance_id = request.agent_instance_id
668
- lead.smart_list_id = request.id
669
- lead.organization_id = request.organization_id
670
- leads.append(lead)
702
+ leads.append(input_user_properties)
671
703
 
672
- logger.info(f"Converted {len(leads)} Apollo records into SmartListLead objects.")
704
+ logger.info(f"Converted {len(leads)} Apollo records into dictionaries.")
673
705
  return leads
674
706
 
707
+
708
+ async def search_leads_with_apollo_page(
709
+ query: LeadsQueryFilters,
710
+ page: Optional[int] = 1,
711
+ per_page: Optional[int] = 25,
712
+ example_url: Optional[str] = None,
713
+ tool_config: Optional[List[Dict[str, Any]]] = None,
714
+ ) -> Dict[str, Any]:
715
+ """Fetch a single page of Apollo leads using ``page`` and ``per_page``.
716
+
717
+ This helper performs one request to the Apollo API and returns the fetched
718
+ leads along with comprehensive pagination metadata.
719
+
720
+ Args:
721
+ query: LeadsQueryFilters object containing search criteria
722
+ page: Page number to fetch (1-indexed, defaults to 1)
723
+ per_page: Number of results per page (defaults to 25)
724
+ example_url: Optional URL to parse search parameters from
725
+ tool_config: Optional tool configuration for API keys
726
+
727
+ Returns:
728
+ Dict containing:
729
+ - current_page: The current page number
730
+ - per_page: Number of results per page
731
+ - total_entries: Total number of results available
732
+ - total_pages: Total number of pages available
733
+ - has_next_page: Boolean indicating if more pages exist
734
+ - next_page: Next page number (None if no more pages)
735
+ - results: List of lead dictionaries for this page
736
+ """
737
+ logger.info("Entering search_leads_with_apollo_page")
738
+
739
+ if example_url:
740
+ parsed_url = urlparse(example_url)
741
+ query_string = parsed_url.query
742
+
743
+ if not query_string and "?" in parsed_url.fragment:
744
+ fragment_query = parsed_url.fragment.split("?", 1)[1]
745
+ query_string = fragment_query
746
+
747
+ query_params = parse_qs(query_string)
748
+
749
+ dynamic_payload: Dict[str, Any] = {
750
+ "page": page,
751
+ "per_page": per_page,
752
+ }
753
+
754
+ mapping = {
755
+ "personLocations": "person_locations",
756
+ "organizationNumEmployeesRanges": "organization_num_employees_ranges",
757
+ "personTitles": "person_titles",
758
+ "personNotTitles": "person_not_titles",
759
+ "qOrganizationJobTitles": "q_organization_job_titles",
760
+ "sortAscending": "sort_ascending",
761
+ "sortByField": "sort_by_field",
762
+ "contactEmailStatusV2": "contact_email_status",
763
+ "searchSignalIds": "search_signal_ids",
764
+ "organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
765
+ "revenueRange[max]": "revenue_range_max",
766
+ "revenueRange[min]": "revenue_range_min",
767
+ "currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
768
+ "organizationIndustryTagIds": "organization_industry_tag_ids",
769
+ "notOrganizationIds": "not_organization_ids",
770
+ }
771
+
772
+ for raw_key, raw_value_list in query_params.items():
773
+ if raw_key.endswith("[]"):
774
+ key = raw_key[:-2]
775
+ else:
776
+ key = raw_key
777
+
778
+ if raw_key in mapping:
779
+ key = mapping[raw_key]
780
+ elif key in mapping:
781
+ key = mapping[key]
782
+ else:
783
+ key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
784
+
785
+ if len(raw_value_list) == 1:
786
+ final_value: Union[str, List[str]] = raw_value_list[0]
787
+ else:
788
+ final_value = raw_value_list
789
+
790
+ if key in ("sort_ascending",):
791
+ val_lower = str(final_value).lower()
792
+ final_value = val_lower in ("true", "1", "yes")
793
+
794
+ if key in ("page", "per_page"):
795
+ try:
796
+ final_value = int(final_value)
797
+ except ValueError:
798
+ pass
799
+
800
+ if key == "q_keywords" and isinstance(final_value, list):
801
+ final_value = " ".join(final_value)
802
+
803
+ if raw_key.endswith("[]"):
804
+ if isinstance(final_value, str):
805
+ final_value = [final_value]
806
+ else:
807
+ if key in (
808
+ "person_locations",
809
+ "person_titles",
810
+ "person_seniorities",
811
+ "organization_locations",
812
+ "q_organization_domains",
813
+ "contact_email_status",
814
+ "organization_ids",
815
+ "organization_num_employees_ranges",
816
+ "person_not_titles",
817
+ "q_organization_job_titles",
818
+ "organization_latest_funding_stage_cd",
819
+ ):
820
+ if isinstance(final_value, str):
821
+ final_value = [final_value]
822
+
823
+ dynamic_payload[key] = final_value
824
+
825
+ if dynamic_payload.get("sort_by_field") == "[none]":
826
+ dynamic_payload.pop("sort_by_field")
827
+
828
+ # -----------------------------------
829
+ # B) No example_url -> build from `query`
830
+ # -----------------------------------
831
+ else:
832
+ dynamic_payload = {
833
+ "person_titles": query.person_current_titles or [],
834
+ "person_locations": query.person_locations or [],
835
+ "search_signal_ids": query.filter_by_signals or [],
836
+ "q_keywords": query.search_keywords or "",
837
+ "organization_num_employees_ranges": (
838
+ query.organization_num_employees_ranges
839
+ or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
840
+ ),
841
+ }
842
+ if query.job_openings_with_titles:
843
+ dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
844
+ if query.latest_funding_stages:
845
+ dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
846
+ if query.sort_by_field is not None:
847
+ dynamic_payload["sort_by_field"] = query.sort_by_field
848
+ if query.sort_ascending is not None:
849
+ dynamic_payload["sort_ascending"] = query.sort_ascending
850
+
851
+ page_payload = dict(dynamic_payload)
852
+ page_payload["page"] = page
853
+ page_payload["per_page"] = per_page
854
+
855
+ print(f"Fetching Apollo page {page} with per_page {per_page}..."
856
+ f" Payload: {json.dumps(page_payload, indent=2)}")
857
+
858
+ # Get the full Apollo API response with pagination metadata
859
+ token, is_oauth = get_apollo_access_token(tool_config)
860
+ headers = {
861
+ "Cache-Control": "no-cache",
862
+ "Content-Type": "application/json",
863
+ }
864
+ if is_oauth:
865
+ headers["Authorization"] = f"Bearer {token}"
866
+ else:
867
+ headers["X-Api-Key"] = token
868
+
869
+ url = "https://api.apollo.io/api/v1/mixed_people/search"
870
+
871
+ async with aiohttp.ClientSession() as session:
872
+ apollo_response = await fetch_apollo_data(session, url, headers, page_payload)
873
+ if not apollo_response:
874
+ return {"current_page": page, "per_page": per_page, "total_entries": 0, "total_pages": 0, "has_next_page": False, "results": []}
875
+
876
+ # Extract pagination metadata
877
+ pagination = apollo_response.get("pagination", {})
878
+ current_page = pagination.get("page", page)
879
+ total_entries = pagination.get("total_entries", 0)
880
+ total_pages = pagination.get("total_pages", 0)
881
+ per_page_actual = pagination.get("per_page", per_page)
882
+
883
+ # Determine if there are more pages
884
+ has_next_page = current_page < total_pages
885
+
886
+ # Extract people and contacts
887
+ people = apollo_response.get("people", [])
888
+ contacts = apollo_response.get("contacts", [])
889
+ page_results = people + contacts
890
+
891
+ leads: List[Dict[str, Any]] = []
892
+ for person_data in page_results:
893
+ input_user_properties: Dict[str, Any] = {}
894
+ additional_props = input_user_properties.get("additional_properties") or {}
895
+ input_user_properties = fill_in_properties_with_preference(input_user_properties, person_data)
896
+ person_data = cleanup_properties(person_data)
897
+ additional_props["apollo_person_data"] = json.dumps(person_data)
898
+ input_user_properties["additional_properties"] = additional_props
899
+ leads.append(input_user_properties)
900
+
901
+ logger.info(f"Converted {len(leads)} Apollo records into dictionaries (single page mode). Page {current_page} of {total_pages}")
902
+
903
+ return {
904
+ "current_page": current_page,
905
+ "per_page": per_page_actual,
906
+ "total_entries": total_entries,
907
+ "total_pages": total_pages,
908
+ "has_next_page": has_next_page,
909
+ "next_page": current_page + 1 if has_next_page else None,
910
+ "results": leads
911
+ }
912
+
675
913
  @assistant_tool
676
914
  async def get_organization_domain_from_apollo(
677
915
  organization_id: str,
@@ -731,22 +969,20 @@ async def get_organization_details_from_apollo(
731
969
  """
732
970
  logger.info("Entering get_organization_details_from_apollo")
733
971
 
734
- APOLLO_API_KEY = get_apollo_access_token(tool_config)
972
+ token, is_oauth = get_apollo_access_token(tool_config)
735
973
  if not organization_id:
736
974
  logger.warning("No organization_id provided.")
737
975
  return {'error': "Organization ID must be provided"}
738
976
 
739
977
  headers = {
740
- "X-Api-Key": APOLLO_API_KEY,
741
978
  "Content-Type": "application/json",
742
979
  "Cache-Control": "no-cache",
743
980
  "Accept": "application/json"
744
981
  }
745
-
746
- cached_response = retrieve_output("get_organization_details_from_apollo", organization_id)
747
- if cached_response is not None:
748
- logger.info(f"Cache hit for organization ID: {organization_id}")
749
- return cached_response
982
+ if is_oauth:
983
+ headers["Authorization"] = f"Bearer {token}"
984
+ else:
985
+ headers["X-Api-Key"] = token
750
986
 
751
987
  url = f'https://api.apollo.io/api/v1/organizations/{organization_id}'
752
988
  logger.debug(f"Making GET request to Apollo for organization ID: {organization_id}")
@@ -759,7 +995,6 @@ async def get_organization_details_from_apollo(
759
995
  result = await response.json()
760
996
  org_details = result.get('organization', {})
761
997
  if org_details:
762
- cache_output("get_organization_details_from_apollo", organization_id, org_details)
763
998
  logger.info("Successfully retrieved organization details from Apollo.")
764
999
  return org_details
765
1000
  else:
@@ -826,7 +1061,7 @@ async def enrich_user_info_with_apollo(
826
1061
  email=email,
827
1062
  tool_config=tool_config
828
1063
  )
829
- except Exception as e:
1064
+ except Exception:
830
1065
  logger.exception("Exception occurred while enriching person info from Apollo by LinkedIn or email.")
831
1066
  else:
832
1067
  # Fallback to name-based lookup
@@ -874,11 +1109,11 @@ async def enrich_user_info_with_apollo(
874
1109
  linkedin_url=linkedin_url,
875
1110
  tool_config=tool_config
876
1111
  )
877
- except Exception as e:
1112
+ except Exception:
878
1113
  logger.exception("Exception occurred during second stage Apollo enrichment.")
879
1114
  if user_data_from_apollo:
880
1115
  break
881
- except Exception as e:
1116
+ except Exception:
882
1117
  logger.exception("Exception occurred while performing name-based lookup in Apollo.")
883
1118
 
884
1119
  if not user_data_from_apollo:
@@ -928,11 +1163,20 @@ async def enrich_user_info_with_apollo(
928
1163
  if not input_user_properties.get("summary_about_lead"):
929
1164
  input_user_properties["summary_about_lead"] = person_data["headline"]
930
1165
 
931
- # Derive location
932
- city = person_data.get("city", "")
933
- state = person_data.get("state", "")
934
- if city or state:
935
- input_user_properties["lead_location"] = f"{city}, {state}".strip(", ")
1166
+ # Derive location (avoid literal "None")
1167
+ city = person_data.get("city")
1168
+ state = person_data.get("state")
1169
+ parts = []
1170
+ for value in (city, state):
1171
+ if value is None:
1172
+ continue
1173
+ s = str(value).strip()
1174
+ if not s or s.lower() == "none":
1175
+ continue
1176
+ parts.append(s)
1177
+ lead_location = ", ".join(parts)
1178
+ if lead_location:
1179
+ input_user_properties["lead_location"] = lead_location
936
1180
 
937
1181
  # Verify name match
938
1182
  first_matched = bool(
@@ -952,3 +1196,389 @@ async def enrich_user_info_with_apollo(
952
1196
  input_user_properties["additional_properties"] = additional_props
953
1197
 
954
1198
  return input_user_properties
1199
+
1200
+
1201
+ async def search_companies_with_apollo(
1202
+ tool_config: Optional[List[Dict[str, Any]]] = None,
1203
+ dynamic_payload: Optional[Dict[str, Any]] = None,
1204
+ ) -> List[Dict[str, Any]]:
1205
+ """
1206
+ Search for companies using Apollo's organizations/search endpoint.
1207
+
1208
+ Args:
1209
+ tool_config: Apollo API configuration
1210
+ dynamic_payload: Search parameters for the API call
1211
+
1212
+ Returns:
1213
+ List of company/organization dictionaries
1214
+ """
1215
+ logger.info("Entering search_companies_with_apollo")
1216
+
1217
+ if not dynamic_payload:
1218
+ logger.warning("No payload given; returning empty result.")
1219
+ return []
1220
+
1221
+ token, is_oauth = get_apollo_access_token(tool_config)
1222
+ headers = {
1223
+ "Cache-Control": "no-cache",
1224
+ "Content-Type": "application/json",
1225
+ }
1226
+ if is_oauth:
1227
+ headers["Authorization"] = f"Bearer {token}"
1228
+ else:
1229
+ headers["X-Api-Key"] = token
1230
+
1231
+ url = "https://api.apollo.io/api/v1/organizations/search"
1232
+ logger.info(f"Sending payload to Apollo organizations endpoint (single page): {json.dumps(dynamic_payload, indent=2)}")
1233
+
1234
+ async with aiohttp.ClientSession() as session:
1235
+ data = await fetch_apollo_data(session, url, headers, dynamic_payload)
1236
+ if not data:
1237
+ logger.error("No data returned from Apollo organizations search.")
1238
+ return []
1239
+
1240
+ organizations = data.get("organizations", [])
1241
+ accounts = data.get("accounts", []) # Apollo sometimes returns accounts as well
1242
+ return organizations + accounts
1243
+
1244
+
1245
+ def fill_in_company_properties(company_data: dict) -> dict:
1246
+ """
1247
+ Convert Apollo company/organization data into a standardized format.
1248
+
1249
+ Args:
1250
+ company_data: Raw company data from Apollo API
1251
+
1252
+ Returns:
1253
+ Dictionary with standardized company properties
1254
+ """
1255
+ company_properties = {}
1256
+
1257
+ # Basic company information
1258
+ company_properties["organization_name"] = company_data.get("name", "")
1259
+ company_properties["primary_domain"] = company_data.get("primary_domain", "")
1260
+ company_properties["website_url"] = company_data.get("website_url", "")
1261
+ company_properties["organization_linkedin_url"] = company_data.get("linkedin_url", "")
1262
+
1263
+ # Location information
1264
+ company_properties["organization_city"] = company_data.get("city", "")
1265
+ company_properties["organization_state"] = company_data.get("state", "")
1266
+ company_properties["organization_country"] = company_data.get("country", "")
1267
+
1268
+ # Create a combined location string
1269
+ location_parts = [
1270
+ company_data.get("city", ""),
1271
+ company_data.get("state", ""),
1272
+ company_data.get("country", "")
1273
+ ]
1274
+ company_properties["organization_location"] = ", ".join([part for part in location_parts if part])
1275
+
1276
+ # Company size and financial info
1277
+ company_properties["employee_count"] = company_data.get("estimated_num_employees", 0)
1278
+ company_properties["annual_revenue"] = company_data.get("annual_revenue", 0)
1279
+
1280
+ # Industry and business info
1281
+ company_properties["industry"] = company_data.get("industry", "")
1282
+ company_properties["keywords"] = ", ".join(company_data.get("keywords", []))
1283
+ company_properties["description"] = company_data.get("description", "")
1284
+
1285
+ # Funding and growth
1286
+ company_properties["founded_year"] = company_data.get("founded_year", "")
1287
+ company_properties["funding_stage"] = company_data.get("latest_funding_stage", "")
1288
+ company_properties["total_funding"] = company_data.get("total_funding", 0)
1289
+
1290
+ # Technology stack
1291
+ tech_stack = company_data.get("technology_names", [])
1292
+ if tech_stack:
1293
+ company_properties["technology_stack"] = ", ".join(tech_stack)
1294
+
1295
+ # Apollo-specific IDs
1296
+ company_properties["apollo_organization_id"] = company_data.get("id", "")
1297
+
1298
+ # Additional metadata
1299
+ company_properties["phone"] = company_data.get("phone", "")
1300
+ company_properties["facebook_url"] = company_data.get("facebook_url", "")
1301
+ company_properties["twitter_url"] = company_data.get("twitter_url", "")
1302
+
1303
+ # Store raw data for reference
1304
+ company_properties["additional_properties"] = {
1305
+ "apollo_organization_data": json.dumps(cleanup_properties(company_data))
1306
+ }
1307
+
1308
+ return company_properties
1309
+
1310
+
1311
+ @assistant_tool
1312
+ async def search_companies_with_apollo_page(
1313
+ query: CompanyQueryFilters,
1314
+ page: Optional[int] = 1,
1315
+ per_page: Optional[int] = 25,
1316
+ example_url: Optional[str] = None,
1317
+ tool_config: Optional[List[Dict[str, Any]]] = None,
1318
+ ) -> Dict[str, Any]:
1319
+ """
1320
+ Fetch a single page of Apollo companies using ``page`` and ``per_page``.
1321
+
1322
+ This helper performs one request to the Apollo API and returns the fetched
1323
+ companies along with comprehensive pagination metadata.
1324
+
1325
+ Args:
1326
+ query: CompanyQueryFilters object containing search criteria
1327
+ page: Page number to fetch (1-indexed, defaults to 1)
1328
+ per_page: Number of results per page (defaults to 25)
1329
+ example_url: Optional URL to parse search parameters from
1330
+ tool_config: Optional tool configuration for API keys
1331
+
1332
+ Returns:
1333
+ Dict containing:
1334
+ - current_page: The current page number
1335
+ - per_page: Number of results per page
1336
+ - total_entries: Total number of results available
1337
+ - total_pages: Total number of pages available
1338
+ - has_next_page: Boolean indicating if more pages exist
1339
+ - next_page: Next page number (None if no more pages)
1340
+ - results: List of company dictionaries for this page
1341
+ """
1342
+ logger.info("Entering search_companies_with_apollo_page")
1343
+
1344
+ if example_url:
1345
+ parsed_url = urlparse(example_url)
1346
+ query_string = parsed_url.query
1347
+
1348
+ if not query_string and "?" in parsed_url.fragment:
1349
+ fragment_query = parsed_url.fragment.split("?", 1)[1]
1350
+ query_string = fragment_query
1351
+
1352
+ query_params = parse_qs(query_string)
1353
+
1354
+ dynamic_payload: Dict[str, Any] = {
1355
+ "page": page,
1356
+ "per_page": per_page,
1357
+ }
1358
+
1359
+ # Organization-specific URL parameter mapping
1360
+ mapping = {
1361
+ "organizationLocations": "organization_locations",
1362
+ "organizationNumEmployeesRanges": "organization_num_employees_ranges",
1363
+ "organizationIndustries": "organization_industries",
1364
+ "organizationIndustryTagIds": "organization_industry_tag_ids",
1365
+ "qKeywords": "q_keywords",
1366
+ "qOrganizationDomains": "q_organization_domains",
1367
+ "sortAscending": "sort_ascending",
1368
+ "sortByField": "sort_by_field",
1369
+ "organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
1370
+ "revenueRange[max]": "revenue_range_max",
1371
+ "revenueRange[min]": "revenue_range_min",
1372
+ "currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
1373
+ "organizationIds": "organization_ids",
1374
+ "notOrganizationIds": "not_organization_ids",
1375
+ "qOrganizationSearchListId": "q_organization_search_list_id",
1376
+ "qNotOrganizationSearchListId": "q_not_organization_search_list_id",
1377
+ }
1378
+
1379
+ for raw_key, raw_value_list in query_params.items():
1380
+ if raw_key.endswith("[]"):
1381
+ key = raw_key[:-2]
1382
+ else:
1383
+ key = raw_key
1384
+
1385
+ if raw_key in mapping:
1386
+ key = mapping[raw_key]
1387
+ elif key in mapping:
1388
+ key = mapping[key]
1389
+ else:
1390
+ key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
1391
+
1392
+ if len(raw_value_list) == 1:
1393
+ final_value: Union[str, List[str]] = raw_value_list[0]
1394
+ else:
1395
+ final_value = raw_value_list
1396
+
1397
+ if key in ("sort_ascending",):
1398
+ val_lower = str(final_value).lower()
1399
+ final_value = val_lower in ("true", "1", "yes")
1400
+
1401
+ if key in ("page", "per_page", "revenue_range_min", "revenue_range_max"):
1402
+ try:
1403
+ final_value = int(final_value)
1404
+ except ValueError:
1405
+ pass
1406
+
1407
+ if key == "q_organization_keyword_tags":
1408
+ # Handle both string and list inputs, split by comma if string
1409
+ if isinstance(final_value, str):
1410
+ # Split by comma and strip whitespace
1411
+ final_value = [tag.strip() for tag in final_value.split(",") if tag.strip()]
1412
+ elif isinstance(final_value, list):
1413
+ # If it's already a list, flatten any comma-separated items
1414
+ flattened = []
1415
+ for item in final_value:
1416
+ if isinstance(item, str) and "," in item:
1417
+ flattened.extend([tag.strip() for tag in item.split(",") if tag.strip()])
1418
+ else:
1419
+ flattened.append(item)
1420
+ final_value = flattened
1421
+
1422
+ if raw_key.endswith("[]"):
1423
+ if isinstance(final_value, str):
1424
+ final_value = [final_value]
1425
+ else:
1426
+ if key in (
1427
+ "organization_locations",
1428
+ "organization_industries",
1429
+ "organization_industry_tag_ids",
1430
+ "q_organization_domains",
1431
+ "q_organization_keyword_tags",
1432
+ "organization_ids",
1433
+ "not_organization_ids",
1434
+ "organization_num_employees_ranges",
1435
+ "currently_using_any_of_technology_uids",
1436
+ "organization_latest_funding_stage_cd",
1437
+ ):
1438
+ if isinstance(final_value, str):
1439
+ final_value = [final_value]
1440
+
1441
+ dynamic_payload[key] = final_value
1442
+
1443
+ if dynamic_payload.get("sort_by_field") == "[none]":
1444
+ dynamic_payload.pop("sort_by_field")
1445
+
1446
+ # -----------------------------------
1447
+ # B) No example_url -> build from `query`
1448
+ # -----------------------------------
1449
+ else:
1450
+ dynamic_payload = {}
1451
+
1452
+ # Only add fields if they have values (Apollo doesn't like empty arrays)
1453
+ if query.organization_locations:
1454
+ dynamic_payload["organization_locations"] = query.organization_locations
1455
+ if query.organization_industries:
1456
+ dynamic_payload["organization_industries"] = query.organization_industries
1457
+ if query.organization_industry_tag_ids:
1458
+ dynamic_payload["organization_industry_tag_ids"] = query.organization_industry_tag_ids
1459
+
1460
+ # Handle employee ranges
1461
+ employee_ranges = []
1462
+ if query.organization_num_employees_ranges:
1463
+ employee_ranges = query.organization_num_employees_ranges
1464
+ elif query.min_employees or query.max_employees:
1465
+ employee_ranges = [f"{query.min_employees or 1},{query.max_employees or 1000}"]
1466
+
1467
+ if employee_ranges:
1468
+ dynamic_payload["organization_num_employees_ranges"] = employee_ranges
1469
+
1470
+ # Add optional parameters only if they have values
1471
+ if query.q_keywords:
1472
+ # Split comma-separated keywords into an array for company search
1473
+ if isinstance(query.q_keywords, str):
1474
+ keyword_tags = [tag.strip() for tag in query.q_keywords.split(",") if tag.strip()]
1475
+ else:
1476
+ keyword_tags = query.q_keywords
1477
+ dynamic_payload["q_organization_keyword_tags"] = keyword_tags
1478
+ if query.q_organization_domains:
1479
+ dynamic_payload["q_organization_domains"] = query.q_organization_domains
1480
+ if query.revenue_range_min is not None:
1481
+ dynamic_payload["revenue_range_min"] = query.revenue_range_min
1482
+ if query.revenue_range_max is not None:
1483
+ dynamic_payload["revenue_range_max"] = query.revenue_range_max
1484
+ if query.organization_latest_funding_stage_cd:
1485
+ dynamic_payload["organization_latest_funding_stage_cd"] = query.organization_latest_funding_stage_cd
1486
+ if query.currently_using_any_of_technology_uids:
1487
+ dynamic_payload["currently_using_any_of_technology_uids"] = query.currently_using_any_of_technology_uids
1488
+ if query.organization_ids:
1489
+ dynamic_payload["organization_ids"] = query.organization_ids
1490
+ if query.not_organization_ids:
1491
+ dynamic_payload["not_organization_ids"] = query.not_organization_ids
1492
+ if query.q_organization_search_list_id:
1493
+ dynamic_payload["q_organization_search_list_id"] = query.q_organization_search_list_id
1494
+ if query.q_not_organization_search_list_id:
1495
+ dynamic_payload["q_not_organization_search_list_id"] = query.q_not_organization_search_list_id
1496
+ if query.sort_by_field is not None:
1497
+ dynamic_payload["sort_by_field"] = query.sort_by_field
1498
+ if query.sort_ascending is not None:
1499
+ dynamic_payload["sort_ascending"] = query.sort_ascending
1500
+
1501
+ # Remove sorting parameters that may not be supported by organizations endpoint
1502
+ if "sort_by_field" in dynamic_payload:
1503
+ dynamic_payload.pop("sort_by_field")
1504
+ if "sort_ascending" in dynamic_payload:
1505
+ dynamic_payload.pop("sort_ascending")
1506
+
1507
+ page_payload = dict(dynamic_payload)
1508
+ page_payload["page"] = page
1509
+ page_payload["per_page"] = per_page
1510
+
1511
+ # Clean up the payload - remove empty arrays and None values that Apollo doesn't like
1512
+ cleaned_payload = {}
1513
+ for key, value in page_payload.items():
1514
+ if value is not None:
1515
+ if isinstance(value, list):
1516
+ # Only include non-empty lists
1517
+ if value:
1518
+ cleaned_payload[key] = value
1519
+ else:
1520
+ cleaned_payload[key] = value
1521
+
1522
+ # Ensure page and per_page are always included
1523
+ cleaned_payload["page"] = page
1524
+ cleaned_payload["per_page"] = per_page
1525
+
1526
+ print(f"Fetching Apollo companies page {page} with per_page {per_page}..."
1527
+ f" Payload: {json.dumps(cleaned_payload, indent=2)}")
1528
+
1529
+ # Get the full Apollo API response with pagination metadata
1530
+ token, is_oauth = get_apollo_access_token(tool_config)
1531
+ headers = {
1532
+ "Cache-Control": "no-cache",
1533
+ "Content-Type": "application/json",
1534
+ }
1535
+ if is_oauth:
1536
+ headers["Authorization"] = f"Bearer {token}"
1537
+ else:
1538
+ headers["X-Api-Key"] = token
1539
+
1540
+ url = "https://api.apollo.io/api/v1/organizations/search"
1541
+
1542
+ async with aiohttp.ClientSession() as session:
1543
+ apollo_response = await fetch_apollo_data(session, url, headers, cleaned_payload)
1544
+ if not apollo_response:
1545
+ return {
1546
+ "current_page": page,
1547
+ "per_page": per_page,
1548
+ "total_entries": 0,
1549
+ "total_pages": 0,
1550
+ "has_next_page": False,
1551
+ "results": []
1552
+ }
1553
+
1554
+ # Extract pagination metadata
1555
+ pagination = apollo_response.get("pagination", {})
1556
+ current_page = pagination.get("page", page)
1557
+ total_entries = pagination.get("total_entries", 0)
1558
+ total_pages = pagination.get("total_pages", 0)
1559
+ per_page_actual = pagination.get("per_page", per_page)
1560
+
1561
+ # Determine if there are more pages
1562
+ has_next_page = current_page < total_pages
1563
+
1564
+ # Extract organizations and accounts
1565
+ organizations = apollo_response.get("organizations", [])
1566
+ accounts = apollo_response.get("accounts", [])
1567
+ page_results = organizations + accounts
1568
+
1569
+ companies: List[Dict[str, Any]] = []
1570
+ for company_data in page_results:
1571
+ company_properties = fill_in_company_properties(company_data)
1572
+ companies.append(company_properties)
1573
+
1574
+ logger.info(f"Converted {len(companies)} Apollo company records into standardized dictionaries (single page mode). Page {current_page} of {total_pages}")
1575
+
1576
+ return {
1577
+ "current_page": current_page,
1578
+ "per_page": per_page_actual,
1579
+ "total_entries": total_entries,
1580
+ "total_pages": total_pages,
1581
+ "has_next_page": has_next_page,
1582
+ "next_page": current_page + 1 if has_next_page else None,
1583
+ "results": companies
1584
+ }