dhisana 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. dhisana/schemas/common.py +10 -1
  2. dhisana/schemas/sales.py +203 -22
  3. dhisana/utils/add_mapping.py +0 -2
  4. dhisana/utils/apollo_tools.py +739 -119
  5. dhisana/utils/built_with_api_tools.py +4 -2
  6. dhisana/utils/check_email_validity_tools.py +35 -18
  7. dhisana/utils/check_for_intent_signal.py +1 -2
  8. dhisana/utils/check_linkedin_url_validity.py +34 -8
  9. dhisana/utils/clay_tools.py +3 -2
  10. dhisana/utils/clean_properties.py +1 -4
  11. dhisana/utils/compose_salesnav_query.py +0 -1
  12. dhisana/utils/compose_search_query.py +7 -3
  13. dhisana/utils/composite_tools.py +0 -1
  14. dhisana/utils/dataframe_tools.py +2 -2
  15. dhisana/utils/email_body_utils.py +72 -0
  16. dhisana/utils/email_provider.py +174 -35
  17. dhisana/utils/enrich_lead_information.py +183 -53
  18. dhisana/utils/fetch_openai_config.py +129 -0
  19. dhisana/utils/field_validators.py +1 -1
  20. dhisana/utils/g2_tools.py +0 -1
  21. dhisana/utils/generate_content.py +0 -1
  22. dhisana/utils/generate_email.py +68 -23
  23. dhisana/utils/generate_email_response.py +294 -46
  24. dhisana/utils/generate_flow.py +0 -1
  25. dhisana/utils/generate_linkedin_connect_message.py +9 -2
  26. dhisana/utils/generate_linkedin_response_message.py +137 -66
  27. dhisana/utils/generate_structured_output_internal.py +317 -164
  28. dhisana/utils/google_custom_search.py +150 -44
  29. dhisana/utils/google_oauth_tools.py +721 -0
  30. dhisana/utils/google_workspace_tools.py +278 -54
  31. dhisana/utils/hubspot_clearbit.py +3 -1
  32. dhisana/utils/hubspot_crm_tools.py +718 -272
  33. dhisana/utils/instantly_tools.py +3 -1
  34. dhisana/utils/lusha_tools.py +10 -7
  35. dhisana/utils/mailgun_tools.py +150 -0
  36. dhisana/utils/microsoft365_tools.py +447 -0
  37. dhisana/utils/openai_assistant_and_file_utils.py +121 -177
  38. dhisana/utils/openai_helpers.py +8 -6
  39. dhisana/utils/parse_linkedin_messages_txt.py +1 -3
  40. dhisana/utils/profile.py +37 -0
  41. dhisana/utils/proxy_curl_tools.py +377 -76
  42. dhisana/utils/proxycurl_search_leads.py +426 -0
  43. dhisana/utils/research_lead.py +3 -3
  44. dhisana/utils/sales_navigator_crawler.py +1 -6
  45. dhisana/utils/salesforce_crm_tools.py +323 -50
  46. dhisana/utils/search_router.py +131 -0
  47. dhisana/utils/search_router_jobs.py +51 -0
  48. dhisana/utils/sendgrid_tools.py +126 -91
  49. dhisana/utils/serarch_router_local_business.py +75 -0
  50. dhisana/utils/serpapi_additional_tools.py +290 -0
  51. dhisana/utils/serpapi_google_jobs.py +117 -0
  52. dhisana/utils/serpapi_google_search.py +188 -0
  53. dhisana/utils/serpapi_local_business_search.py +129 -0
  54. dhisana/utils/serpapi_search_tools.py +360 -432
  55. dhisana/utils/serperdev_google_jobs.py +125 -0
  56. dhisana/utils/serperdev_local_business.py +154 -0
  57. dhisana/utils/serperdev_search.py +233 -0
  58. dhisana/utils/smtp_email_tools.py +178 -18
  59. dhisana/utils/test_connect.py +1603 -130
  60. dhisana/utils/trasform_json.py +3 -3
  61. dhisana/utils/web_download_parse_tools.py +0 -1
  62. dhisana/utils/zoominfo_tools.py +2 -3
  63. dhisana/workflow/test.py +1 -1
  64. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +1 -1
  65. dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
  66. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
  67. dhisana-0.0.1.dev116.dist-info/RECORD +0 -83
  68. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
  69. {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,15 @@
1
1
  import asyncio
2
- import hashlib
3
2
  import json
4
3
  import logging
5
4
  import os
6
5
  import re
7
6
  import aiohttp
8
7
  import backoff
9
- from datetime import datetime, timedelta
10
8
 
11
- from pydantic import BaseModel
12
- from dhisana.schemas.sales import LeadsQueryFilters, SmartList, SmartListLead
13
- from dhisana.utils.cache_output_tools import cache_output, retrieve_output
9
+ from dhisana.schemas.sales import LeadsQueryFilters, CompanyQueryFilters
14
10
  from dhisana.utils.assistant_tool_tag import assistant_tool
15
11
  from urllib.parse import urlparse, parse_qs
16
- from typing import Any, Dict, List, Optional, Union
12
+ from typing import Any, Dict, List, Optional, Tuple, Union
17
13
 
18
14
  from dhisana.utils.clean_properties import cleanup_properties
19
15
 
@@ -21,48 +17,81 @@ logging.basicConfig(level=logging.INFO)
21
17
  logger = logging.getLogger(__name__)
22
18
 
23
19
 
24
- def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> str:
20
+ def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> Tuple[str, bool]:
25
21
  """
26
- Retrieves the APOLLO_API_KEY access token from the provided tool configuration.
22
+ Retrieves an Apollo access token from tool configuration or environment variables.
27
23
 
28
24
  Args:
29
- tool_config (list): A list of dictionaries containing the tool configuration.
30
- Each dictionary should have a "name" key and a "configuration" key,
31
- where "configuration" is a list of dictionaries containing "name" and "value" keys.
25
+ tool_config (list): Optional tool configuration payload provided to the tool.
32
26
 
33
27
  Returns:
34
- str: The APOLLO_API_KEY access token.
28
+ Tuple[str, bool]: A tuple containing the token string and a boolean flag indicating
29
+ whether the token represents an OAuth bearer token (``True``) or an API key (``False``).
35
30
 
36
31
  Raises:
37
- ValueError: If the access token is not found in the tool configuration or environment variable.
32
+ ValueError: If the Apollo integration has not been configured.
38
33
  """
39
- APOLLO_API_KEY = None
34
+ token: Optional[str] = None
35
+ is_oauth = False
40
36
 
41
37
  if tool_config:
42
- logger.debug(f"Tool config provided: {tool_config}")
43
38
  apollo_config = next(
44
39
  (item for item in tool_config if item.get("name") == "apollo"), None
45
40
  )
46
41
  if apollo_config:
47
42
  config_map = {
48
- item["name"]: item["value"]
43
+ item["name"]: item.get("value")
49
44
  for item in apollo_config.get("configuration", [])
50
45
  if item
51
46
  }
52
- APOLLO_API_KEY = config_map.get("apiKey")
47
+
48
+ raw_oauth = config_map.get("oauth_tokens")
49
+ if isinstance(raw_oauth, str):
50
+ try:
51
+ raw_oauth = json.loads(raw_oauth)
52
+ except Exception:
53
+ raw_oauth = None
54
+ if isinstance(raw_oauth, dict):
55
+ token = (
56
+ raw_oauth.get("access_token")
57
+ or raw_oauth.get("token")
58
+ )
59
+ if token:
60
+ is_oauth = True
61
+
62
+ if not token:
63
+ direct_access_token = config_map.get("access_token")
64
+ if direct_access_token:
65
+ token = direct_access_token
66
+ is_oauth = True
67
+
68
+ if not token:
69
+ api_key = config_map.get("apiKey") or config_map.get("api_key")
70
+ if api_key:
71
+ token = api_key
72
+ is_oauth = False
53
73
  else:
54
74
  logger.warning("No 'apollo' config item found in tool_config.")
55
- else:
56
- logger.debug("No tool_config provided or it's None.")
57
75
 
58
- # Check environment variable if no key found yet
59
- APOLLO_API_KEY = APOLLO_API_KEY or os.getenv("APOLLO_API_KEY")
60
-
61
- if not APOLLO_API_KEY:
62
- logger.error("APOLLO_API_KEY not found in configuration or environment.")
63
- raise ValueError("APOLLO_API_KEY access token not found in tool_config or environment variable")
76
+ if not token:
77
+ env_oauth_token = os.getenv("APOLLO_ACCESS_TOKEN")
78
+ if env_oauth_token:
79
+ token = env_oauth_token
80
+ is_oauth = True
81
+
82
+ if not token:
83
+ env_api_key = os.getenv("APOLLO_API_KEY")
84
+ if env_api_key:
85
+ token = env_api_key
86
+ is_oauth = False
87
+
88
+ if not token:
89
+ logger.error("Apollo integration is not configured.")
90
+ raise ValueError(
91
+ "Apollo integration is not configured. Please configure the connection to Apollo in Integrations."
92
+ )
64
93
 
65
- return APOLLO_API_KEY
94
+ return token, is_oauth
66
95
 
67
96
 
68
97
  @assistant_tool
@@ -77,6 +106,7 @@ async def enrich_person_info_from_apollo(
77
106
  linkedin_url: Optional[str] = None,
78
107
  email: Optional[str] = None,
79
108
  phone: Optional[str] = None,
109
+ fetch_valid_phone_number: Optional[bool] = False,
80
110
  tool_config: Optional[List[Dict]] = None,
81
111
  ) -> Dict[str, Any]:
82
112
  """
@@ -86,37 +116,40 @@ async def enrich_person_info_from_apollo(
86
116
  - **linkedin_url** (*str*, optional): LinkedIn profile URL of the person.
87
117
  - **email** (*str*, optional): Email address of the person.
88
118
  - **phone** (*str*, optional): Phone number of the person.
119
+ - **fetch_valid_phone_number** (*bool*, optional): If True, include phone numbers in the API response. Defaults to False.
89
120
 
90
121
  Returns:
91
122
  - **dict**: JSON response containing person information.
92
123
  """
93
124
  logger.info("Entering enrich_person_info_from_apollo")
94
125
 
95
- APOLLO_API_KEY = get_apollo_access_token(tool_config)
126
+ token, is_oauth = get_apollo_access_token(tool_config)
96
127
 
97
128
  if not linkedin_url and not email and not phone:
98
129
  logger.warning("No linkedin_url, email, or phone provided. At least one is required.")
99
130
  return {'error': "At least one of linkedin_url, email, or phone must be provided"}
100
131
 
101
- headers = {
102
- "X-Api-Key": f"{APOLLO_API_KEY}",
103
- "Content-Type": "application/json"
104
- }
132
+ headers = {"Content-Type": "application/json"}
133
+ if is_oauth:
134
+ headers["Authorization"] = f"Bearer {token}"
135
+ else:
136
+ headers["X-Api-Key"] = token
105
137
 
106
138
  data = {}
107
139
  if linkedin_url:
108
140
  logger.debug(f"LinkedIn URL provided: {linkedin_url}")
109
141
  data['linkedin_url'] = linkedin_url
110
- cached_response = retrieve_output("enrich_person_info_from_apollo", linkedin_url)
111
- if cached_response is not None:
112
- logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
113
- return cached_response
114
142
  if email:
115
143
  logger.debug(f"Email provided: {email}")
116
144
  data['email'] = email
117
145
  if phone:
118
146
  logger.debug(f"Phone provided: {phone}")
119
147
  data['phone_numbers'] = [phone] # Apollo expects a list for phone numbers
148
+
149
+ # Add reveal_phone_number parameter if fetch_valid_phone_number is True
150
+ if fetch_valid_phone_number:
151
+ logger.debug("fetch_valid_phone_number flag is True, including phone numbers in API response")
152
+ data['reveal_phone_number'] = True
120
153
 
121
154
  url = 'https://api.apollo.io/api/v1/people/match'
122
155
 
@@ -126,8 +159,6 @@ async def enrich_person_info_from_apollo(
126
159
  logger.debug(f"Received response status: {response.status}")
127
160
  if response.status == 200:
128
161
  result = await response.json()
129
- if linkedin_url:
130
- cache_output("enrich_person_info_from_apollo", linkedin_url, result)
131
162
  logger.info("Successfully retrieved person info from Apollo.")
132
163
  return result
133
164
  elif response.status == 429:
@@ -179,11 +210,12 @@ async def lookup_person_in_apollo_by_name(
179
210
  logger.warning("No full_name provided.")
180
211
  return {'error': "Full name is required"}
181
212
 
182
- APOLLO_API_KEY = get_apollo_access_token(tool_config)
183
- headers = {
184
- "X-Api-Key": f"{APOLLO_API_KEY}",
185
- "Content-Type": "application/json"
186
- }
213
+ token, is_oauth = get_apollo_access_token(tool_config)
214
+ headers = {"Content-Type": "application/json"}
215
+ if is_oauth:
216
+ headers["Authorization"] = f"Bearer {token}"
217
+ else:
218
+ headers["X-Api-Key"] = token
187
219
 
188
220
  # Construct the query payload
189
221
  data = {
@@ -192,16 +224,6 @@ async def lookup_person_in_apollo_by_name(
192
224
  "per_page": 10
193
225
  }
194
226
 
195
- # Build a cache key that includes full_name and company_name (if provided)
196
- # so that results are correctly cached and retrieved.
197
- key_item = f"lookup_person_in_apollo_by_name_{full_name}_{company_name or ''}".lower()
198
-
199
- # Attempt to retrieve a cached response first
200
- cached_response = retrieve_output("lookup_person_in_apollo_by_name", key_item)
201
- if cached_response is not None:
202
- logger.info(f"Cache hit for user: {full_name}, company: {company_name or ''}")
203
- return cached_response
204
-
205
227
  url = 'https://api.apollo.io/api/v1/mixed_people/search'
206
228
  logger.debug(f"Making request to Apollo with payload: {data}")
207
229
 
@@ -212,7 +234,6 @@ async def lookup_person_in_apollo_by_name(
212
234
  if response.status == 200:
213
235
  result = await response.json()
214
236
  logger.info("Successfully looked up person by name on Apollo.")
215
- cache_output("lookup_person_in_apollo_by_name", key_item, result)
216
237
  return result
217
238
  elif response.status == 429:
218
239
  msg = "Rate limit exceeded"
@@ -256,23 +277,21 @@ async def enrich_organization_info_from_apollo(
256
277
  """
257
278
  logger.info("Entering enrich_organization_info_from_apollo")
258
279
 
259
- APOLLO_API_KEY = get_apollo_access_token(tool_config)
280
+ token, is_oauth = get_apollo_access_token(tool_config)
260
281
 
261
282
  if not organization_domain:
262
283
  logger.warning("No organization domain provided.")
263
284
  return {'error': "organization domain must be provided"}
264
285
 
265
286
  headers = {
266
- "X-Api-Key": f"{APOLLO_API_KEY}",
267
287
  "Content-Type": "application/json",
268
288
  "Cache-Control": "no-cache",
269
289
  "accept": "application/json"
270
290
  }
271
-
272
- cached_response = retrieve_output("enrich_organization_info_from_apollo", organization_domain)
273
- if cached_response is not None:
274
- logger.info(f"Cache hit for organization domain: {organization_domain}")
275
- return cached_response
291
+ if is_oauth:
292
+ headers["Authorization"] = f"Bearer {token}"
293
+ else:
294
+ headers["X-Api-Key"] = token
276
295
 
277
296
  url = f'https://api.apollo.io/api/v1/organizations/enrich?domain={organization_domain}'
278
297
  logger.debug(f"Making GET request to Apollo for organization domain: {organization_domain}")
@@ -283,7 +302,6 @@ async def enrich_organization_info_from_apollo(
283
302
  logger.debug(f"Received response status: {response.status}")
284
303
  if response.status == 200:
285
304
  result = await response.json()
286
- cache_output("enrich_organization_info_from_apollo", organization_domain, result)
287
305
  logger.info("Successfully retrieved organization info from Apollo.")
288
306
  return result
289
307
  elif response.status == 429:
@@ -315,22 +333,12 @@ async def enrich_organization_info_from_apollo(
315
333
  )
316
334
  async def fetch_apollo_data(session, url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
317
335
  logger.info("Entering fetch_apollo_data")
318
- key_data = f"{url}_{json.dumps(payload, sort_keys=True)}"
319
- key_hash = hashlib.sha256(key_data.encode()).hexdigest()
320
- logger.debug(f"Cache key hash: {key_hash}")
321
-
322
- cached_response = retrieve_output("fetch_apollo_data", key_hash)
323
- if cached_response is not None:
324
- logger.info("Cache hit for fetch_apollo_data.")
325
- return cached_response
326
-
327
- logger.debug("No cache hit. Making POST request to Apollo.")
336
+ logger.debug("Making POST request to Apollo.")
328
337
  async with session.post(url, headers=headers, json=payload) as response:
329
338
  logger.debug(f"Received response status: {response.status}")
330
339
  if response.status == 200:
331
340
  result = await response.json()
332
- cache_output("fetch_apollo_data", key_hash, result)
333
- logger.info("Successfully fetched data from Apollo and cached it.")
341
+ logger.info("Successfully fetched data from Apollo.")
334
342
  return result
335
343
  elif response.status == 429:
336
344
  msg = "Rate limit exceeded"
@@ -357,12 +365,15 @@ async def search_people_with_apollo(
357
365
  logger.warning("No payload given; returning empty result.")
358
366
  return []
359
367
 
360
- api_key = get_apollo_access_token(tool_config)
368
+ token, is_oauth = get_apollo_access_token(tool_config)
361
369
  headers = {
362
370
  "Cache-Control": "no-cache",
363
371
  "Content-Type": "application/json",
364
- "X-Api-Key": api_key,
365
372
  }
373
+ if is_oauth:
374
+ headers["Authorization"] = f"Bearer {token}"
375
+ else:
376
+ headers["X-Api-Key"] = token
366
377
 
367
378
  url = "https://api.apollo.io/api/v1/mixed_people/search"
368
379
  logger.info(f"Sending payload to Apollo (single page): {json.dumps(dynamic_payload, indent=2)}")
@@ -388,16 +399,6 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
388
399
  """Returns True if the value is None, empty string, or only whitespace."""
389
400
  return value is None or (isinstance(value, str) and not value.strip())
390
401
 
391
- # Email
392
- if is_empty(input_user_properties.get("email")):
393
- input_user_properties["email"] = person_data.get("email", "")
394
-
395
- # Phone
396
- if is_empty(input_user_properties.get("phone")):
397
- # person_data["contact"] might not be defined, so we chain get calls
398
- input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
399
- .get("sanitized_phone", ""))
400
-
401
402
  # Full name
402
403
  # Because `person_data.get("name")` has precedence over input_user_properties,
403
404
  # we only update it if input_user_properties is empty/None for "full_name".
@@ -412,6 +413,16 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
412
413
  if is_empty(input_user_properties.get("last_name")) and person_data.get("last_name"):
413
414
  input_user_properties["last_name"] = person_data["last_name"]
414
415
 
416
+ # Email
417
+ if is_empty(input_user_properties.get("email")):
418
+ input_user_properties["email"] = person_data.get("email", "")
419
+
420
+ # Phone
421
+ if is_empty(input_user_properties.get("phone")):
422
+ # person_data["contact"] might not be defined, so we chain get calls
423
+ input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
424
+ .get("sanitized_phone", ""))
425
+
415
426
  # LinkedIn URL
416
427
  if is_empty(input_user_properties.get("user_linkedin_url")) and person_data.get("linkedin_url"):
417
428
  input_user_properties["user_linkedin_url"] = person_data["linkedin_url"]
@@ -451,11 +462,19 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
451
462
  if is_empty(input_user_properties.get("summary_about_lead")) and person_data.get("headline"):
452
463
  input_user_properties["summary_about_lead"] = person_data["headline"]
453
464
 
454
- # City/State -> lead_location
455
- city = person_data.get("city", "")
456
- state = person_data.get("state", "")
457
- if is_empty(input_user_properties.get("lead_location")) and (city or state):
458
- lead_location = f"{city}, {state}".strip(", ")
465
+ # City/State -> lead_location (avoid literal "None")
466
+ city = person_data.get("city")
467
+ state = person_data.get("state")
468
+ parts = []
469
+ for value in (city, state):
470
+ if value is None:
471
+ continue
472
+ s = str(value).strip()
473
+ if not s or s.lower() == "none":
474
+ continue
475
+ parts.append(s)
476
+ lead_location = ", ".join(parts) if parts else None
477
+ if is_empty(input_user_properties.get("lead_location")) and lead_location:
459
478
  input_user_properties["lead_location"] = lead_location
460
479
 
461
480
  # Filter out placeholder emails
@@ -467,13 +486,13 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
467
486
 
468
487
  async def search_leads_with_apollo(
469
488
  query: LeadsQueryFilters,
470
- request: SmartList,
489
+ max_items_to_search: Optional[int] = 10,
471
490
  example_url: Optional[str] = None,
472
491
  tool_config: Optional[List[Dict[str, Any]]] = None,
473
- ) -> List[SmartListLead]:
492
+ ) -> List[Dict]:
474
493
  logger.info("Entering search_leads_with_apollo")
475
494
 
476
- max_items = request.max_items_to_search or 10
495
+ max_items = max_items_to_search or 10
477
496
  if max_items > 2500:
478
497
  logger.warning("Requested max_items_to_search > 2000, overriding to 2000.")
479
498
  max_items = 2500
@@ -519,7 +538,7 @@ async def search_leads_with_apollo(
519
538
  # Important: handle personNotTitles as well
520
539
  "personNotTitles": "person_not_titles",
521
540
 
522
- "qOrganizationJobTitles": "q_keywords",
541
+ "qOrganizationJobTitles": "q_organization_job_titles",
523
542
  "sortAscending": "sort_ascending",
524
543
  "sortByField": "sort_by_field",
525
544
  "contactEmailStatusV2": "contact_email_status",
@@ -592,6 +611,8 @@ async def search_leads_with_apollo(
592
611
  "organization_ids",
593
612
  "organization_num_employees_ranges",
594
613
  "person_not_titles", # <--- added so single item is forced into list
614
+ "q_organization_job_titles",
615
+ "organization_latest_funding_stage_cd",
595
616
  ):
596
617
  if isinstance(final_value, str):
597
618
  final_value = [final_value]
@@ -612,7 +633,8 @@ async def search_leads_with_apollo(
612
633
  dynamic_payload = {
613
634
  "person_titles": query.person_current_titles or [],
614
635
  "person_locations": query.person_locations or [],
615
- "search_signal_ids": query.search_signal_ids or query.filter_by_signals or [],
636
+ "search_signal_ids": query.filter_by_signals or [],
637
+ "q_keywords": query.search_keywords or "",
616
638
  "organization_num_employees_ranges": (
617
639
  query.organization_num_employees_ranges
618
640
  or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
@@ -620,6 +642,10 @@ async def search_leads_with_apollo(
620
642
  "page": 1,
621
643
  "per_page": min(max_items, 100),
622
644
  }
645
+ if query.job_openings_with_titles:
646
+ dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
647
+ if query.latest_funding_stages:
648
+ dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
623
649
  if query.sort_by_field is not None:
624
650
  dynamic_payload["sort_by_field"] = query.sort_by_field
625
651
  if query.sort_ascending is not None:
@@ -657,9 +683,9 @@ async def search_leads_with_apollo(
657
683
  logger.info(f"Fetched a total of {len(all_people)} items from Apollo (across pages).")
658
684
 
659
685
  # -----------------------------------------------
660
- # Convert raw results -> SmartListLead objects
686
+ # Convert raw results -> dictionary objects
661
687
  # -----------------------------------------------
662
- leads: List[SmartListLead] = []
688
+ leads: List[Dict[str, Any]] = []
663
689
  for user_data_from_apollo in all_people:
664
690
  person_data = user_data_from_apollo
665
691
 
@@ -673,15 +699,217 @@ async def search_leads_with_apollo(
673
699
  additional_props["apollo_person_data"] = json.dumps(person_data)
674
700
  input_user_properties["additional_properties"] = additional_props
675
701
 
676
- lead = SmartListLead(**input_user_properties)
677
- lead.agent_instance_id = request.agent_instance_id
678
- lead.smart_list_id = request.id
679
- lead.organization_id = request.organization_id
680
- leads.append(lead)
702
+ leads.append(input_user_properties)
681
703
 
682
- logger.info(f"Converted {len(leads)} Apollo records into SmartListLead objects.")
704
+ logger.info(f"Converted {len(leads)} Apollo records into dictionaries.")
683
705
  return leads
684
706
 
707
+
708
+ async def search_leads_with_apollo_page(
709
+ query: LeadsQueryFilters,
710
+ page: Optional[int] = 1,
711
+ per_page: Optional[int] = 25,
712
+ example_url: Optional[str] = None,
713
+ tool_config: Optional[List[Dict[str, Any]]] = None,
714
+ ) -> Dict[str, Any]:
715
+ """Fetch a single page of Apollo leads using ``page`` and ``per_page``.
716
+
717
+ This helper performs one request to the Apollo API and returns the fetched
718
+ leads along with comprehensive pagination metadata.
719
+
720
+ Args:
721
+ query: LeadsQueryFilters object containing search criteria
722
+ page: Page number to fetch (1-indexed, defaults to 1)
723
+ per_page: Number of results per page (defaults to 25)
724
+ example_url: Optional URL to parse search parameters from
725
+ tool_config: Optional tool configuration for API keys
726
+
727
+ Returns:
728
+ Dict containing:
729
+ - current_page: The current page number
730
+ - per_page: Number of results per page
731
+ - total_entries: Total number of results available
732
+ - total_pages: Total number of pages available
733
+ - has_next_page: Boolean indicating if more pages exist
734
+ - next_page: Next page number (None if no more pages)
735
+ - results: List of lead dictionaries for this page
736
+ """
737
+ logger.info("Entering search_leads_with_apollo_page")
738
+
739
+ if example_url:
740
+ parsed_url = urlparse(example_url)
741
+ query_string = parsed_url.query
742
+
743
+ if not query_string and "?" in parsed_url.fragment:
744
+ fragment_query = parsed_url.fragment.split("?", 1)[1]
745
+ query_string = fragment_query
746
+
747
+ query_params = parse_qs(query_string)
748
+
749
+ dynamic_payload: Dict[str, Any] = {
750
+ "page": page,
751
+ "per_page": per_page,
752
+ }
753
+
754
+ mapping = {
755
+ "personLocations": "person_locations",
756
+ "organizationNumEmployeesRanges": "organization_num_employees_ranges",
757
+ "personTitles": "person_titles",
758
+ "personNotTitles": "person_not_titles",
759
+ "qOrganizationJobTitles": "q_organization_job_titles",
760
+ "sortAscending": "sort_ascending",
761
+ "sortByField": "sort_by_field",
762
+ "contactEmailStatusV2": "contact_email_status",
763
+ "searchSignalIds": "search_signal_ids",
764
+ "organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
765
+ "revenueRange[max]": "revenue_range_max",
766
+ "revenueRange[min]": "revenue_range_min",
767
+ "currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
768
+ "organizationIndustryTagIds": "organization_industry_tag_ids",
769
+ "notOrganizationIds": "not_organization_ids",
770
+ }
771
+
772
+ for raw_key, raw_value_list in query_params.items():
773
+ if raw_key.endswith("[]"):
774
+ key = raw_key[:-2]
775
+ else:
776
+ key = raw_key
777
+
778
+ if raw_key in mapping:
779
+ key = mapping[raw_key]
780
+ elif key in mapping:
781
+ key = mapping[key]
782
+ else:
783
+ key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
784
+
785
+ if len(raw_value_list) == 1:
786
+ final_value: Union[str, List[str]] = raw_value_list[0]
787
+ else:
788
+ final_value = raw_value_list
789
+
790
+ if key in ("sort_ascending",):
791
+ val_lower = str(final_value).lower()
792
+ final_value = val_lower in ("true", "1", "yes")
793
+
794
+ if key in ("page", "per_page"):
795
+ try:
796
+ final_value = int(final_value)
797
+ except ValueError:
798
+ pass
799
+
800
+ if key == "q_keywords" and isinstance(final_value, list):
801
+ final_value = " ".join(final_value)
802
+
803
+ if raw_key.endswith("[]"):
804
+ if isinstance(final_value, str):
805
+ final_value = [final_value]
806
+ else:
807
+ if key in (
808
+ "person_locations",
809
+ "person_titles",
810
+ "person_seniorities",
811
+ "organization_locations",
812
+ "q_organization_domains",
813
+ "contact_email_status",
814
+ "organization_ids",
815
+ "organization_num_employees_ranges",
816
+ "person_not_titles",
817
+ "q_organization_job_titles",
818
+ "organization_latest_funding_stage_cd",
819
+ ):
820
+ if isinstance(final_value, str):
821
+ final_value = [final_value]
822
+
823
+ dynamic_payload[key] = final_value
824
+
825
+ if dynamic_payload.get("sort_by_field") == "[none]":
826
+ dynamic_payload.pop("sort_by_field")
827
+
828
+ # -----------------------------------
829
+ # B) No example_url -> build from `query`
830
+ # -----------------------------------
831
+ else:
832
+ dynamic_payload = {
833
+ "person_titles": query.person_current_titles or [],
834
+ "person_locations": query.person_locations or [],
835
+ "search_signal_ids": query.filter_by_signals or [],
836
+ "q_keywords": query.search_keywords or "",
837
+ "organization_num_employees_ranges": (
838
+ query.organization_num_employees_ranges
839
+ or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
840
+ ),
841
+ }
842
+ if query.job_openings_with_titles:
843
+ dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
844
+ if query.latest_funding_stages:
845
+ dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
846
+ if query.sort_by_field is not None:
847
+ dynamic_payload["sort_by_field"] = query.sort_by_field
848
+ if query.sort_ascending is not None:
849
+ dynamic_payload["sort_ascending"] = query.sort_ascending
850
+
851
+ page_payload = dict(dynamic_payload)
852
+ page_payload["page"] = page
853
+ page_payload["per_page"] = per_page
854
+
855
+ print(f"Fetching Apollo page {page} with per_page {per_page}..."
856
+ f" Payload: {json.dumps(page_payload, indent=2)}")
857
+
858
+ # Get the full Apollo API response with pagination metadata
859
+ token, is_oauth = get_apollo_access_token(tool_config)
860
+ headers = {
861
+ "Cache-Control": "no-cache",
862
+ "Content-Type": "application/json",
863
+ }
864
+ if is_oauth:
865
+ headers["Authorization"] = f"Bearer {token}"
866
+ else:
867
+ headers["X-Api-Key"] = token
868
+
869
+ url = "https://api.apollo.io/api/v1/mixed_people/search"
870
+
871
+ async with aiohttp.ClientSession() as session:
872
+ apollo_response = await fetch_apollo_data(session, url, headers, page_payload)
873
+ if not apollo_response:
874
+ return {"current_page": page, "per_page": per_page, "total_entries": 0, "total_pages": 0, "has_next_page": False, "results": []}
875
+
876
+ # Extract pagination metadata
877
+ pagination = apollo_response.get("pagination", {})
878
+ current_page = pagination.get("page", page)
879
+ total_entries = pagination.get("total_entries", 0)
880
+ total_pages = pagination.get("total_pages", 0)
881
+ per_page_actual = pagination.get("per_page", per_page)
882
+
883
+ # Determine if there are more pages
884
+ has_next_page = current_page < total_pages
885
+
886
+ # Extract people and contacts
887
+ people = apollo_response.get("people", [])
888
+ contacts = apollo_response.get("contacts", [])
889
+ page_results = people + contacts
890
+
891
+ leads: List[Dict[str, Any]] = []
892
+ for person_data in page_results:
893
+ input_user_properties: Dict[str, Any] = {}
894
+ additional_props = input_user_properties.get("additional_properties") or {}
895
+ input_user_properties = fill_in_properties_with_preference(input_user_properties, person_data)
896
+ person_data = cleanup_properties(person_data)
897
+ additional_props["apollo_person_data"] = json.dumps(person_data)
898
+ input_user_properties["additional_properties"] = additional_props
899
+ leads.append(input_user_properties)
900
+
901
+ logger.info(f"Converted {len(leads)} Apollo records into dictionaries (single page mode). Page {current_page} of {total_pages}")
902
+
903
+ return {
904
+ "current_page": current_page,
905
+ "per_page": per_page_actual,
906
+ "total_entries": total_entries,
907
+ "total_pages": total_pages,
908
+ "has_next_page": has_next_page,
909
+ "next_page": current_page + 1 if has_next_page else None,
910
+ "results": leads
911
+ }
912
+
685
913
  @assistant_tool
686
914
  async def get_organization_domain_from_apollo(
687
915
  organization_id: str,
@@ -741,22 +969,20 @@ async def get_organization_details_from_apollo(
741
969
  """
742
970
  logger.info("Entering get_organization_details_from_apollo")
743
971
 
744
- APOLLO_API_KEY = get_apollo_access_token(tool_config)
972
+ token, is_oauth = get_apollo_access_token(tool_config)
745
973
  if not organization_id:
746
974
  logger.warning("No organization_id provided.")
747
975
  return {'error': "Organization ID must be provided"}
748
976
 
749
977
  headers = {
750
- "X-Api-Key": APOLLO_API_KEY,
751
978
  "Content-Type": "application/json",
752
979
  "Cache-Control": "no-cache",
753
980
  "Accept": "application/json"
754
981
  }
755
-
756
- cached_response = retrieve_output("get_organization_details_from_apollo", organization_id)
757
- if cached_response is not None:
758
- logger.info(f"Cache hit for organization ID: {organization_id}")
759
- return cached_response
982
+ if is_oauth:
983
+ headers["Authorization"] = f"Bearer {token}"
984
+ else:
985
+ headers["X-Api-Key"] = token
760
986
 
761
987
  url = f'https://api.apollo.io/api/v1/organizations/{organization_id}'
762
988
  logger.debug(f"Making GET request to Apollo for organization ID: {organization_id}")
@@ -769,7 +995,6 @@ async def get_organization_details_from_apollo(
769
995
  result = await response.json()
770
996
  org_details = result.get('organization', {})
771
997
  if org_details:
772
- cache_output("get_organization_details_from_apollo", organization_id, org_details)
773
998
  logger.info("Successfully retrieved organization details from Apollo.")
774
999
  return org_details
775
1000
  else:
@@ -836,7 +1061,7 @@ async def enrich_user_info_with_apollo(
836
1061
  email=email,
837
1062
  tool_config=tool_config
838
1063
  )
839
- except Exception as e:
1064
+ except Exception:
840
1065
  logger.exception("Exception occurred while enriching person info from Apollo by LinkedIn or email.")
841
1066
  else:
842
1067
  # Fallback to name-based lookup
@@ -884,11 +1109,11 @@ async def enrich_user_info_with_apollo(
884
1109
  linkedin_url=linkedin_url,
885
1110
  tool_config=tool_config
886
1111
  )
887
- except Exception as e:
1112
+ except Exception:
888
1113
  logger.exception("Exception occurred during second stage Apollo enrichment.")
889
1114
  if user_data_from_apollo:
890
1115
  break
891
- except Exception as e:
1116
+ except Exception:
892
1117
  logger.exception("Exception occurred while performing name-based lookup in Apollo.")
893
1118
 
894
1119
  if not user_data_from_apollo:
@@ -938,11 +1163,20 @@ async def enrich_user_info_with_apollo(
938
1163
  if not input_user_properties.get("summary_about_lead"):
939
1164
  input_user_properties["summary_about_lead"] = person_data["headline"]
940
1165
 
941
- # Derive location
942
- city = person_data.get("city", "")
943
- state = person_data.get("state", "")
944
- if city or state:
945
- input_user_properties["lead_location"] = f"{city}, {state}".strip(", ")
1166
+ # Derive location (avoid literal "None")
1167
+ city = person_data.get("city")
1168
+ state = person_data.get("state")
1169
+ parts = []
1170
+ for value in (city, state):
1171
+ if value is None:
1172
+ continue
1173
+ s = str(value).strip()
1174
+ if not s or s.lower() == "none":
1175
+ continue
1176
+ parts.append(s)
1177
+ lead_location = ", ".join(parts)
1178
+ if lead_location:
1179
+ input_user_properties["lead_location"] = lead_location
946
1180
 
947
1181
  # Verify name match
948
1182
  first_matched = bool(
@@ -962,3 +1196,389 @@ async def enrich_user_info_with_apollo(
962
1196
  input_user_properties["additional_properties"] = additional_props
963
1197
 
964
1198
  return input_user_properties
1199
+
1200
+
1201
+ async def search_companies_with_apollo(
1202
+ tool_config: Optional[List[Dict[str, Any]]] = None,
1203
+ dynamic_payload: Optional[Dict[str, Any]] = None,
1204
+ ) -> List[Dict[str, Any]]:
1205
+ """
1206
+ Search for companies using Apollo's organizations/search endpoint.
1207
+
1208
+ Args:
1209
+ tool_config: Apollo API configuration
1210
+ dynamic_payload: Search parameters for the API call
1211
+
1212
+ Returns:
1213
+ List of company/organization dictionaries
1214
+ """
1215
+ logger.info("Entering search_companies_with_apollo")
1216
+
1217
+ if not dynamic_payload:
1218
+ logger.warning("No payload given; returning empty result.")
1219
+ return []
1220
+
1221
+ token, is_oauth = get_apollo_access_token(tool_config)
1222
+ headers = {
1223
+ "Cache-Control": "no-cache",
1224
+ "Content-Type": "application/json",
1225
+ }
1226
+ if is_oauth:
1227
+ headers["Authorization"] = f"Bearer {token}"
1228
+ else:
1229
+ headers["X-Api-Key"] = token
1230
+
1231
+ url = "https://api.apollo.io/api/v1/organizations/search"
1232
+ logger.info(f"Sending payload to Apollo organizations endpoint (single page): {json.dumps(dynamic_payload, indent=2)}")
1233
+
1234
+ async with aiohttp.ClientSession() as session:
1235
+ data = await fetch_apollo_data(session, url, headers, dynamic_payload)
1236
+ if not data:
1237
+ logger.error("No data returned from Apollo organizations search.")
1238
+ return []
1239
+
1240
+ organizations = data.get("organizations", [])
1241
+ accounts = data.get("accounts", []) # Apollo sometimes returns accounts as well
1242
+ return organizations + accounts
1243
+
1244
+
1245
+ def fill_in_company_properties(company_data: dict) -> dict:
1246
+ """
1247
+ Convert Apollo company/organization data into a standardized format.
1248
+
1249
+ Args:
1250
+ company_data: Raw company data from Apollo API
1251
+
1252
+ Returns:
1253
+ Dictionary with standardized company properties
1254
+ """
1255
+ company_properties = {}
1256
+
1257
+ # Basic company information
1258
+ company_properties["organization_name"] = company_data.get("name", "")
1259
+ company_properties["primary_domain"] = company_data.get("primary_domain", "")
1260
+ company_properties["website_url"] = company_data.get("website_url", "")
1261
+ company_properties["organization_linkedin_url"] = company_data.get("linkedin_url", "")
1262
+
1263
+ # Location information
1264
+ company_properties["organization_city"] = company_data.get("city", "")
1265
+ company_properties["organization_state"] = company_data.get("state", "")
1266
+ company_properties["organization_country"] = company_data.get("country", "")
1267
+
1268
+ # Create a combined location string
1269
+ location_parts = [
1270
+ company_data.get("city", ""),
1271
+ company_data.get("state", ""),
1272
+ company_data.get("country", "")
1273
+ ]
1274
+ company_properties["organization_location"] = ", ".join([part for part in location_parts if part])
1275
+
1276
+ # Company size and financial info
1277
+ company_properties["employee_count"] = company_data.get("estimated_num_employees", 0)
1278
+ company_properties["annual_revenue"] = company_data.get("annual_revenue", 0)
1279
+
1280
+ # Industry and business info
1281
+ company_properties["industry"] = company_data.get("industry", "")
1282
+ company_properties["keywords"] = ", ".join(company_data.get("keywords", []))
1283
+ company_properties["description"] = company_data.get("description", "")
1284
+
1285
+ # Funding and growth
1286
+ company_properties["founded_year"] = company_data.get("founded_year", "")
1287
+ company_properties["funding_stage"] = company_data.get("latest_funding_stage", "")
1288
+ company_properties["total_funding"] = company_data.get("total_funding", 0)
1289
+
1290
+ # Technology stack
1291
+ tech_stack = company_data.get("technology_names", [])
1292
+ if tech_stack:
1293
+ company_properties["technology_stack"] = ", ".join(tech_stack)
1294
+
1295
+ # Apollo-specific IDs
1296
+ company_properties["apollo_organization_id"] = company_data.get("id", "")
1297
+
1298
+ # Additional metadata
1299
+ company_properties["phone"] = company_data.get("phone", "")
1300
+ company_properties["facebook_url"] = company_data.get("facebook_url", "")
1301
+ company_properties["twitter_url"] = company_data.get("twitter_url", "")
1302
+
1303
+ # Store raw data for reference
1304
+ company_properties["additional_properties"] = {
1305
+ "apollo_organization_data": json.dumps(cleanup_properties(company_data))
1306
+ }
1307
+
1308
+ return company_properties
1309
+
1310
+
1311
+ @assistant_tool
1312
+ async def search_companies_with_apollo_page(
1313
+ query: CompanyQueryFilters,
1314
+ page: Optional[int] = 1,
1315
+ per_page: Optional[int] = 25,
1316
+ example_url: Optional[str] = None,
1317
+ tool_config: Optional[List[Dict[str, Any]]] = None,
1318
+ ) -> Dict[str, Any]:
1319
+ """
1320
+ Fetch a single page of Apollo companies using ``page`` and ``per_page``.
1321
+
1322
+ This helper performs one request to the Apollo API and returns the fetched
1323
+ companies along with comprehensive pagination metadata.
1324
+
1325
+ Args:
1326
+ query: CompanyQueryFilters object containing search criteria
1327
+ page: Page number to fetch (1-indexed, defaults to 1)
1328
+ per_page: Number of results per page (defaults to 25)
1329
+ example_url: Optional URL to parse search parameters from
1330
+ tool_config: Optional tool configuration for API keys
1331
+
1332
+ Returns:
1333
+ Dict containing:
1334
+ - current_page: The current page number
1335
+ - per_page: Number of results per page
1336
+ - total_entries: Total number of results available
1337
+ - total_pages: Total number of pages available
1338
+ - has_next_page: Boolean indicating if more pages exist
1339
+ - next_page: Next page number (None if no more pages)
1340
+ - results: List of company dictionaries for this page
1341
+ """
1342
+ logger.info("Entering search_companies_with_apollo_page")
1343
+
1344
+ if example_url:
1345
+ parsed_url = urlparse(example_url)
1346
+ query_string = parsed_url.query
1347
+
1348
+ if not query_string and "?" in parsed_url.fragment:
1349
+ fragment_query = parsed_url.fragment.split("?", 1)[1]
1350
+ query_string = fragment_query
1351
+
1352
+ query_params = parse_qs(query_string)
1353
+
1354
+ dynamic_payload: Dict[str, Any] = {
1355
+ "page": page,
1356
+ "per_page": per_page,
1357
+ }
1358
+
1359
+ # Organization-specific URL parameter mapping
1360
+ mapping = {
1361
+ "organizationLocations": "organization_locations",
1362
+ "organizationNumEmployeesRanges": "organization_num_employees_ranges",
1363
+ "organizationIndustries": "organization_industries",
1364
+ "organizationIndustryTagIds": "organization_industry_tag_ids",
1365
+ "qKeywords": "q_keywords",
1366
+ "qOrganizationDomains": "q_organization_domains",
1367
+ "sortAscending": "sort_ascending",
1368
+ "sortByField": "sort_by_field",
1369
+ "organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
1370
+ "revenueRange[max]": "revenue_range_max",
1371
+ "revenueRange[min]": "revenue_range_min",
1372
+ "currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
1373
+ "organizationIds": "organization_ids",
1374
+ "notOrganizationIds": "not_organization_ids",
1375
+ "qOrganizationSearchListId": "q_organization_search_list_id",
1376
+ "qNotOrganizationSearchListId": "q_not_organization_search_list_id",
1377
+ }
1378
+
1379
+ for raw_key, raw_value_list in query_params.items():
1380
+ if raw_key.endswith("[]"):
1381
+ key = raw_key[:-2]
1382
+ else:
1383
+ key = raw_key
1384
+
1385
+ if raw_key in mapping:
1386
+ key = mapping[raw_key]
1387
+ elif key in mapping:
1388
+ key = mapping[key]
1389
+ else:
1390
+ key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
1391
+
1392
+ if len(raw_value_list) == 1:
1393
+ final_value: Union[str, List[str]] = raw_value_list[0]
1394
+ else:
1395
+ final_value = raw_value_list
1396
+
1397
+ if key in ("sort_ascending",):
1398
+ val_lower = str(final_value).lower()
1399
+ final_value = val_lower in ("true", "1", "yes")
1400
+
1401
+ if key in ("page", "per_page", "revenue_range_min", "revenue_range_max"):
1402
+ try:
1403
+ final_value = int(final_value)
1404
+ except ValueError:
1405
+ pass
1406
+
1407
+ if key == "q_organization_keyword_tags":
1408
+ # Handle both string and list inputs, split by comma if string
1409
+ if isinstance(final_value, str):
1410
+ # Split by comma and strip whitespace
1411
+ final_value = [tag.strip() for tag in final_value.split(",") if tag.strip()]
1412
+ elif isinstance(final_value, list):
1413
+ # If it's already a list, flatten any comma-separated items
1414
+ flattened = []
1415
+ for item in final_value:
1416
+ if isinstance(item, str) and "," in item:
1417
+ flattened.extend([tag.strip() for tag in item.split(",") if tag.strip()])
1418
+ else:
1419
+ flattened.append(item)
1420
+ final_value = flattened
1421
+
1422
+ if raw_key.endswith("[]"):
1423
+ if isinstance(final_value, str):
1424
+ final_value = [final_value]
1425
+ else:
1426
+ if key in (
1427
+ "organization_locations",
1428
+ "organization_industries",
1429
+ "organization_industry_tag_ids",
1430
+ "q_organization_domains",
1431
+ "q_organization_keyword_tags",
1432
+ "organization_ids",
1433
+ "not_organization_ids",
1434
+ "organization_num_employees_ranges",
1435
+ "currently_using_any_of_technology_uids",
1436
+ "organization_latest_funding_stage_cd",
1437
+ ):
1438
+ if isinstance(final_value, str):
1439
+ final_value = [final_value]
1440
+
1441
+ dynamic_payload[key] = final_value
1442
+
1443
+ if dynamic_payload.get("sort_by_field") == "[none]":
1444
+ dynamic_payload.pop("sort_by_field")
1445
+
1446
+ # -----------------------------------
1447
+ # B) No example_url -> build from `query`
1448
+ # -----------------------------------
1449
+ else:
1450
+ dynamic_payload = {}
1451
+
1452
+ # Only add fields if they have values (Apollo doesn't like empty arrays)
1453
+ if query.organization_locations:
1454
+ dynamic_payload["organization_locations"] = query.organization_locations
1455
+ if query.organization_industries:
1456
+ dynamic_payload["organization_industries"] = query.organization_industries
1457
+ if query.organization_industry_tag_ids:
1458
+ dynamic_payload["organization_industry_tag_ids"] = query.organization_industry_tag_ids
1459
+
1460
+ # Handle employee ranges
1461
+ employee_ranges = []
1462
+ if query.organization_num_employees_ranges:
1463
+ employee_ranges = query.organization_num_employees_ranges
1464
+ elif query.min_employees or query.max_employees:
1465
+ employee_ranges = [f"{query.min_employees or 1},{query.max_employees or 1000}"]
1466
+
1467
+ if employee_ranges:
1468
+ dynamic_payload["organization_num_employees_ranges"] = employee_ranges
1469
+
1470
+ # Add optional parameters only if they have values
1471
+ if query.q_keywords:
1472
+ # Split comma-separated keywords into an array for company search
1473
+ if isinstance(query.q_keywords, str):
1474
+ keyword_tags = [tag.strip() for tag in query.q_keywords.split(",") if tag.strip()]
1475
+ else:
1476
+ keyword_tags = query.q_keywords
1477
+ dynamic_payload["q_organization_keyword_tags"] = keyword_tags
1478
+ if query.q_organization_domains:
1479
+ dynamic_payload["q_organization_domains"] = query.q_organization_domains
1480
+ if query.revenue_range_min is not None:
1481
+ dynamic_payload["revenue_range_min"] = query.revenue_range_min
1482
+ if query.revenue_range_max is not None:
1483
+ dynamic_payload["revenue_range_max"] = query.revenue_range_max
1484
+ if query.organization_latest_funding_stage_cd:
1485
+ dynamic_payload["organization_latest_funding_stage_cd"] = query.organization_latest_funding_stage_cd
1486
+ if query.currently_using_any_of_technology_uids:
1487
+ dynamic_payload["currently_using_any_of_technology_uids"] = query.currently_using_any_of_technology_uids
1488
+ if query.organization_ids:
1489
+ dynamic_payload["organization_ids"] = query.organization_ids
1490
+ if query.not_organization_ids:
1491
+ dynamic_payload["not_organization_ids"] = query.not_organization_ids
1492
+ if query.q_organization_search_list_id:
1493
+ dynamic_payload["q_organization_search_list_id"] = query.q_organization_search_list_id
1494
+ if query.q_not_organization_search_list_id:
1495
+ dynamic_payload["q_not_organization_search_list_id"] = query.q_not_organization_search_list_id
1496
+ if query.sort_by_field is not None:
1497
+ dynamic_payload["sort_by_field"] = query.sort_by_field
1498
+ if query.sort_ascending is not None:
1499
+ dynamic_payload["sort_ascending"] = query.sort_ascending
1500
+
1501
+ # Remove sorting parameters that may not be supported by organizations endpoint
1502
+ if "sort_by_field" in dynamic_payload:
1503
+ dynamic_payload.pop("sort_by_field")
1504
+ if "sort_ascending" in dynamic_payload:
1505
+ dynamic_payload.pop("sort_ascending")
1506
+
1507
+ page_payload = dict(dynamic_payload)
1508
+ page_payload["page"] = page
1509
+ page_payload["per_page"] = per_page
1510
+
1511
+ # Clean up the payload - remove empty arrays and None values that Apollo doesn't like
1512
+ cleaned_payload = {}
1513
+ for key, value in page_payload.items():
1514
+ if value is not None:
1515
+ if isinstance(value, list):
1516
+ # Only include non-empty lists
1517
+ if value:
1518
+ cleaned_payload[key] = value
1519
+ else:
1520
+ cleaned_payload[key] = value
1521
+
1522
+ # Ensure page and per_page are always included
1523
+ cleaned_payload["page"] = page
1524
+ cleaned_payload["per_page"] = per_page
1525
+
1526
+ print(f"Fetching Apollo companies page {page} with per_page {per_page}..."
1527
+ f" Payload: {json.dumps(cleaned_payload, indent=2)}")
1528
+
1529
+ # Get the full Apollo API response with pagination metadata
1530
+ token, is_oauth = get_apollo_access_token(tool_config)
1531
+ headers = {
1532
+ "Cache-Control": "no-cache",
1533
+ "Content-Type": "application/json",
1534
+ }
1535
+ if is_oauth:
1536
+ headers["Authorization"] = f"Bearer {token}"
1537
+ else:
1538
+ headers["X-Api-Key"] = token
1539
+
1540
+ url = "https://api.apollo.io/api/v1/organizations/search"
1541
+
1542
+ async with aiohttp.ClientSession() as session:
1543
+ apollo_response = await fetch_apollo_data(session, url, headers, cleaned_payload)
1544
+ if not apollo_response:
1545
+ return {
1546
+ "current_page": page,
1547
+ "per_page": per_page,
1548
+ "total_entries": 0,
1549
+ "total_pages": 0,
1550
+ "has_next_page": False,
1551
+ "results": []
1552
+ }
1553
+
1554
+ # Extract pagination metadata
1555
+ pagination = apollo_response.get("pagination", {})
1556
+ current_page = pagination.get("page", page)
1557
+ total_entries = pagination.get("total_entries", 0)
1558
+ total_pages = pagination.get("total_pages", 0)
1559
+ per_page_actual = pagination.get("per_page", per_page)
1560
+
1561
+ # Determine if there are more pages
1562
+ has_next_page = current_page < total_pages
1563
+
1564
+ # Extract organizations and accounts
1565
+ organizations = apollo_response.get("organizations", [])
1566
+ accounts = apollo_response.get("accounts", [])
1567
+ page_results = organizations + accounts
1568
+
1569
+ companies: List[Dict[str, Any]] = []
1570
+ for company_data in page_results:
1571
+ company_properties = fill_in_company_properties(company_data)
1572
+ companies.append(company_properties)
1573
+
1574
+ logger.info(f"Converted {len(companies)} Apollo company records into standardized dictionaries (single page mode). Page {current_page} of {total_pages}")
1575
+
1576
+ return {
1577
+ "current_page": current_page,
1578
+ "per_page": per_page_actual,
1579
+ "total_entries": total_entries,
1580
+ "total_pages": total_pages,
1581
+ "has_next_page": has_next_page,
1582
+ "next_page": current_page + 1 if has_next_page else None,
1583
+ "results": companies
1584
+ }