dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/schemas/common.py +33 -0
- dhisana/schemas/sales.py +224 -23
- dhisana/utils/add_mapping.py +72 -63
- dhisana/utils/apollo_tools.py +739 -109
- dhisana/utils/built_with_api_tools.py +4 -2
- dhisana/utils/cache_output_tools.py +23 -23
- dhisana/utils/check_email_validity_tools.py +456 -458
- dhisana/utils/check_for_intent_signal.py +1 -2
- dhisana/utils/check_linkedin_url_validity.py +34 -8
- dhisana/utils/clay_tools.py +3 -2
- dhisana/utils/clean_properties.py +3 -1
- dhisana/utils/compose_salesnav_query.py +0 -1
- dhisana/utils/compose_search_query.py +7 -3
- dhisana/utils/composite_tools.py +0 -1
- dhisana/utils/dataframe_tools.py +2 -2
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +585 -85
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +1 -1
- dhisana/utils/g2_tools.py +0 -1
- dhisana/utils/generate_content.py +0 -1
- dhisana/utils/generate_email.py +69 -16
- dhisana/utils/generate_email_response.py +298 -41
- dhisana/utils/generate_flow.py +0 -1
- dhisana/utils/generate_linkedin_connect_message.py +19 -6
- dhisana/utils/generate_linkedin_response_message.py +156 -65
- dhisana/utils/generate_structured_output_internal.py +351 -131
- dhisana/utils/google_custom_search.py +150 -44
- dhisana/utils/google_oauth_tools.py +721 -0
- dhisana/utils/google_workspace_tools.py +391 -25
- dhisana/utils/hubspot_clearbit.py +3 -1
- dhisana/utils/hubspot_crm_tools.py +771 -167
- dhisana/utils/instantly_tools.py +3 -1
- dhisana/utils/lusha_tools.py +10 -7
- dhisana/utils/mailgun_tools.py +150 -0
- dhisana/utils/microsoft365_tools.py +447 -0
- dhisana/utils/openai_assistant_and_file_utils.py +121 -177
- dhisana/utils/openai_helpers.py +19 -16
- dhisana/utils/parse_linkedin_messages_txt.py +2 -3
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +507 -206
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/research_lead.py +121 -68
- dhisana/utils/sales_navigator_crawler.py +1 -6
- dhisana/utils/salesforce_crm_tools.py +323 -50
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +126 -91
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +363 -432
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +576 -0
- dhisana/utils/test_connect.py +1765 -92
- dhisana/utils/trasform_json.py +95 -16
- dhisana/utils/web_download_parse_tools.py +0 -1
- dhisana/utils/zoominfo_tools.py +2 -3
- dhisana/workflow/test.py +1 -1
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
- dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
- dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
dhisana/utils/apollo_tools.py
CHANGED
|
@@ -1,19 +1,15 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import hashlib
|
|
3
2
|
import json
|
|
4
3
|
import logging
|
|
5
4
|
import os
|
|
6
5
|
import re
|
|
7
6
|
import aiohttp
|
|
8
7
|
import backoff
|
|
9
|
-
from datetime import datetime, timedelta
|
|
10
8
|
|
|
11
|
-
from
|
|
12
|
-
from dhisana.schemas.sales import LeadsQueryFilters, SmartList, SmartListLead
|
|
13
|
-
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
9
|
+
from dhisana.schemas.sales import LeadsQueryFilters, CompanyQueryFilters
|
|
14
10
|
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
15
11
|
from urllib.parse import urlparse, parse_qs
|
|
16
|
-
from typing import Any, Dict, List, Optional, Union
|
|
12
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
17
13
|
|
|
18
14
|
from dhisana.utils.clean_properties import cleanup_properties
|
|
19
15
|
|
|
@@ -21,48 +17,81 @@ logging.basicConfig(level=logging.INFO)
|
|
|
21
17
|
logger = logging.getLogger(__name__)
|
|
22
18
|
|
|
23
19
|
|
|
24
|
-
def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
20
|
+
def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> Tuple[str, bool]:
|
|
25
21
|
"""
|
|
26
|
-
Retrieves
|
|
22
|
+
Retrieves an Apollo access token from tool configuration or environment variables.
|
|
27
23
|
|
|
28
24
|
Args:
|
|
29
|
-
tool_config (list):
|
|
30
|
-
Each dictionary should have a "name" key and a "configuration" key,
|
|
31
|
-
where "configuration" is a list of dictionaries containing "name" and "value" keys.
|
|
25
|
+
tool_config (list): Optional tool configuration payload provided to the tool.
|
|
32
26
|
|
|
33
27
|
Returns:
|
|
34
|
-
str:
|
|
28
|
+
Tuple[str, bool]: A tuple containing the token string and a boolean flag indicating
|
|
29
|
+
whether the token represents an OAuth bearer token (``True``) or an API key (``False``).
|
|
35
30
|
|
|
36
31
|
Raises:
|
|
37
|
-
ValueError: If the
|
|
32
|
+
ValueError: If the Apollo integration has not been configured.
|
|
38
33
|
"""
|
|
39
|
-
|
|
34
|
+
token: Optional[str] = None
|
|
35
|
+
is_oauth = False
|
|
40
36
|
|
|
41
37
|
if tool_config:
|
|
42
|
-
logger.debug(f"Tool config provided: {tool_config}")
|
|
43
38
|
apollo_config = next(
|
|
44
39
|
(item for item in tool_config if item.get("name") == "apollo"), None
|
|
45
40
|
)
|
|
46
41
|
if apollo_config:
|
|
47
42
|
config_map = {
|
|
48
|
-
item["name"]: item
|
|
43
|
+
item["name"]: item.get("value")
|
|
49
44
|
for item in apollo_config.get("configuration", [])
|
|
50
45
|
if item
|
|
51
46
|
}
|
|
52
|
-
|
|
47
|
+
|
|
48
|
+
raw_oauth = config_map.get("oauth_tokens")
|
|
49
|
+
if isinstance(raw_oauth, str):
|
|
50
|
+
try:
|
|
51
|
+
raw_oauth = json.loads(raw_oauth)
|
|
52
|
+
except Exception:
|
|
53
|
+
raw_oauth = None
|
|
54
|
+
if isinstance(raw_oauth, dict):
|
|
55
|
+
token = (
|
|
56
|
+
raw_oauth.get("access_token")
|
|
57
|
+
or raw_oauth.get("token")
|
|
58
|
+
)
|
|
59
|
+
if token:
|
|
60
|
+
is_oauth = True
|
|
61
|
+
|
|
62
|
+
if not token:
|
|
63
|
+
direct_access_token = config_map.get("access_token")
|
|
64
|
+
if direct_access_token:
|
|
65
|
+
token = direct_access_token
|
|
66
|
+
is_oauth = True
|
|
67
|
+
|
|
68
|
+
if not token:
|
|
69
|
+
api_key = config_map.get("apiKey") or config_map.get("api_key")
|
|
70
|
+
if api_key:
|
|
71
|
+
token = api_key
|
|
72
|
+
is_oauth = False
|
|
53
73
|
else:
|
|
54
74
|
logger.warning("No 'apollo' config item found in tool_config.")
|
|
55
|
-
else:
|
|
56
|
-
logger.debug("No tool_config provided or it's None.")
|
|
57
75
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
76
|
+
if not token:
|
|
77
|
+
env_oauth_token = os.getenv("APOLLO_ACCESS_TOKEN")
|
|
78
|
+
if env_oauth_token:
|
|
79
|
+
token = env_oauth_token
|
|
80
|
+
is_oauth = True
|
|
81
|
+
|
|
82
|
+
if not token:
|
|
83
|
+
env_api_key = os.getenv("APOLLO_API_KEY")
|
|
84
|
+
if env_api_key:
|
|
85
|
+
token = env_api_key
|
|
86
|
+
is_oauth = False
|
|
87
|
+
|
|
88
|
+
if not token:
|
|
89
|
+
logger.error("Apollo integration is not configured.")
|
|
90
|
+
raise ValueError(
|
|
91
|
+
"Apollo integration is not configured. Please configure the connection to Apollo in Integrations."
|
|
92
|
+
)
|
|
64
93
|
|
|
65
|
-
return
|
|
94
|
+
return token, is_oauth
|
|
66
95
|
|
|
67
96
|
|
|
68
97
|
@assistant_tool
|
|
@@ -77,6 +106,7 @@ async def enrich_person_info_from_apollo(
|
|
|
77
106
|
linkedin_url: Optional[str] = None,
|
|
78
107
|
email: Optional[str] = None,
|
|
79
108
|
phone: Optional[str] = None,
|
|
109
|
+
fetch_valid_phone_number: Optional[bool] = False,
|
|
80
110
|
tool_config: Optional[List[Dict]] = None,
|
|
81
111
|
) -> Dict[str, Any]:
|
|
82
112
|
"""
|
|
@@ -86,37 +116,40 @@ async def enrich_person_info_from_apollo(
|
|
|
86
116
|
- **linkedin_url** (*str*, optional): LinkedIn profile URL of the person.
|
|
87
117
|
- **email** (*str*, optional): Email address of the person.
|
|
88
118
|
- **phone** (*str*, optional): Phone number of the person.
|
|
119
|
+
- **fetch_valid_phone_number** (*bool*, optional): If True, include phone numbers in the API response. Defaults to False.
|
|
89
120
|
|
|
90
121
|
Returns:
|
|
91
122
|
- **dict**: JSON response containing person information.
|
|
92
123
|
"""
|
|
93
124
|
logger.info("Entering enrich_person_info_from_apollo")
|
|
94
125
|
|
|
95
|
-
|
|
126
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
96
127
|
|
|
97
128
|
if not linkedin_url and not email and not phone:
|
|
98
129
|
logger.warning("No linkedin_url, email, or phone provided. At least one is required.")
|
|
99
130
|
return {'error': "At least one of linkedin_url, email, or phone must be provided"}
|
|
100
131
|
|
|
101
|
-
headers = {
|
|
102
|
-
|
|
103
|
-
"
|
|
104
|
-
|
|
132
|
+
headers = {"Content-Type": "application/json"}
|
|
133
|
+
if is_oauth:
|
|
134
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
135
|
+
else:
|
|
136
|
+
headers["X-Api-Key"] = token
|
|
105
137
|
|
|
106
138
|
data = {}
|
|
107
139
|
if linkedin_url:
|
|
108
140
|
logger.debug(f"LinkedIn URL provided: {linkedin_url}")
|
|
109
141
|
data['linkedin_url'] = linkedin_url
|
|
110
|
-
cached_response = retrieve_output("enrich_person_info_from_apollo", linkedin_url)
|
|
111
|
-
if cached_response is not None:
|
|
112
|
-
logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
|
|
113
|
-
return cached_response
|
|
114
142
|
if email:
|
|
115
143
|
logger.debug(f"Email provided: {email}")
|
|
116
144
|
data['email'] = email
|
|
117
145
|
if phone:
|
|
118
146
|
logger.debug(f"Phone provided: {phone}")
|
|
119
147
|
data['phone_numbers'] = [phone] # Apollo expects a list for phone numbers
|
|
148
|
+
|
|
149
|
+
# Add reveal_phone_number parameter if fetch_valid_phone_number is True
|
|
150
|
+
if fetch_valid_phone_number:
|
|
151
|
+
logger.debug("fetch_valid_phone_number flag is True, including phone numbers in API response")
|
|
152
|
+
data['reveal_phone_number'] = True
|
|
120
153
|
|
|
121
154
|
url = 'https://api.apollo.io/api/v1/people/match'
|
|
122
155
|
|
|
@@ -126,8 +159,6 @@ async def enrich_person_info_from_apollo(
|
|
|
126
159
|
logger.debug(f"Received response status: {response.status}")
|
|
127
160
|
if response.status == 200:
|
|
128
161
|
result = await response.json()
|
|
129
|
-
if linkedin_url:
|
|
130
|
-
cache_output("enrich_person_info_from_apollo", linkedin_url, result)
|
|
131
162
|
logger.info("Successfully retrieved person info from Apollo.")
|
|
132
163
|
return result
|
|
133
164
|
elif response.status == 429:
|
|
@@ -179,11 +210,12 @@ async def lookup_person_in_apollo_by_name(
|
|
|
179
210
|
logger.warning("No full_name provided.")
|
|
180
211
|
return {'error': "Full name is required"}
|
|
181
212
|
|
|
182
|
-
|
|
183
|
-
headers = {
|
|
184
|
-
|
|
185
|
-
"
|
|
186
|
-
|
|
213
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
214
|
+
headers = {"Content-Type": "application/json"}
|
|
215
|
+
if is_oauth:
|
|
216
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
217
|
+
else:
|
|
218
|
+
headers["X-Api-Key"] = token
|
|
187
219
|
|
|
188
220
|
# Construct the query payload
|
|
189
221
|
data = {
|
|
@@ -222,7 +254,6 @@ async def lookup_person_in_apollo_by_name(
|
|
|
222
254
|
logger.exception("Exception occurred while looking up person by name.")
|
|
223
255
|
return {'error': str(e)}
|
|
224
256
|
|
|
225
|
-
|
|
226
257
|
@assistant_tool
|
|
227
258
|
@backoff.on_exception(
|
|
228
259
|
backoff.expo,
|
|
@@ -246,23 +277,21 @@ async def enrich_organization_info_from_apollo(
|
|
|
246
277
|
"""
|
|
247
278
|
logger.info("Entering enrich_organization_info_from_apollo")
|
|
248
279
|
|
|
249
|
-
|
|
280
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
250
281
|
|
|
251
282
|
if not organization_domain:
|
|
252
283
|
logger.warning("No organization domain provided.")
|
|
253
284
|
return {'error': "organization domain must be provided"}
|
|
254
285
|
|
|
255
286
|
headers = {
|
|
256
|
-
"X-Api-Key": f"{APOLLO_API_KEY}",
|
|
257
287
|
"Content-Type": "application/json",
|
|
258
288
|
"Cache-Control": "no-cache",
|
|
259
289
|
"accept": "application/json"
|
|
260
290
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
return cached_response
|
|
291
|
+
if is_oauth:
|
|
292
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
293
|
+
else:
|
|
294
|
+
headers["X-Api-Key"] = token
|
|
266
295
|
|
|
267
296
|
url = f'https://api.apollo.io/api/v1/organizations/enrich?domain={organization_domain}'
|
|
268
297
|
logger.debug(f"Making GET request to Apollo for organization domain: {organization_domain}")
|
|
@@ -273,7 +302,6 @@ async def enrich_organization_info_from_apollo(
|
|
|
273
302
|
logger.debug(f"Received response status: {response.status}")
|
|
274
303
|
if response.status == 200:
|
|
275
304
|
result = await response.json()
|
|
276
|
-
cache_output("enrich_organization_info_from_apollo", organization_domain, result)
|
|
277
305
|
logger.info("Successfully retrieved organization info from Apollo.")
|
|
278
306
|
return result
|
|
279
307
|
elif response.status == 429:
|
|
@@ -305,22 +333,12 @@ async def enrich_organization_info_from_apollo(
|
|
|
305
333
|
)
|
|
306
334
|
async def fetch_apollo_data(session, url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
307
335
|
logger.info("Entering fetch_apollo_data")
|
|
308
|
-
|
|
309
|
-
key_hash = hashlib.sha256(key_data.encode()).hexdigest()
|
|
310
|
-
logger.debug(f"Cache key hash: {key_hash}")
|
|
311
|
-
|
|
312
|
-
cached_response = retrieve_output("fetch_apollo_data", key_hash)
|
|
313
|
-
if cached_response is not None:
|
|
314
|
-
logger.info("Cache hit for fetch_apollo_data.")
|
|
315
|
-
return cached_response
|
|
316
|
-
|
|
317
|
-
logger.debug("No cache hit. Making POST request to Apollo.")
|
|
336
|
+
logger.debug("Making POST request to Apollo.")
|
|
318
337
|
async with session.post(url, headers=headers, json=payload) as response:
|
|
319
338
|
logger.debug(f"Received response status: {response.status}")
|
|
320
339
|
if response.status == 200:
|
|
321
340
|
result = await response.json()
|
|
322
|
-
|
|
323
|
-
logger.info("Successfully fetched data from Apollo and cached it.")
|
|
341
|
+
logger.info("Successfully fetched data from Apollo.")
|
|
324
342
|
return result
|
|
325
343
|
elif response.status == 429:
|
|
326
344
|
msg = "Rate limit exceeded"
|
|
@@ -347,12 +365,15 @@ async def search_people_with_apollo(
|
|
|
347
365
|
logger.warning("No payload given; returning empty result.")
|
|
348
366
|
return []
|
|
349
367
|
|
|
350
|
-
|
|
368
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
351
369
|
headers = {
|
|
352
370
|
"Cache-Control": "no-cache",
|
|
353
371
|
"Content-Type": "application/json",
|
|
354
|
-
"X-Api-Key": api_key,
|
|
355
372
|
}
|
|
373
|
+
if is_oauth:
|
|
374
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
375
|
+
else:
|
|
376
|
+
headers["X-Api-Key"] = token
|
|
356
377
|
|
|
357
378
|
url = "https://api.apollo.io/api/v1/mixed_people/search"
|
|
358
379
|
logger.info(f"Sending payload to Apollo (single page): {json.dumps(dynamic_payload, indent=2)}")
|
|
@@ -378,16 +399,6 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
|
|
|
378
399
|
"""Returns True if the value is None, empty string, or only whitespace."""
|
|
379
400
|
return value is None or (isinstance(value, str) and not value.strip())
|
|
380
401
|
|
|
381
|
-
# Email
|
|
382
|
-
if is_empty(input_user_properties.get("email")):
|
|
383
|
-
input_user_properties["email"] = person_data.get("email", "")
|
|
384
|
-
|
|
385
|
-
# Phone
|
|
386
|
-
if is_empty(input_user_properties.get("phone")):
|
|
387
|
-
# person_data["contact"] might not be defined, so we chain get calls
|
|
388
|
-
input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
|
|
389
|
-
.get("sanitized_phone", ""))
|
|
390
|
-
|
|
391
402
|
# Full name
|
|
392
403
|
# Because `person_data.get("name")` has precedence over input_user_properties,
|
|
393
404
|
# we only update it if input_user_properties is empty/None for "full_name".
|
|
@@ -402,6 +413,16 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
|
|
|
402
413
|
if is_empty(input_user_properties.get("last_name")) and person_data.get("last_name"):
|
|
403
414
|
input_user_properties["last_name"] = person_data["last_name"]
|
|
404
415
|
|
|
416
|
+
# Email
|
|
417
|
+
if is_empty(input_user_properties.get("email")):
|
|
418
|
+
input_user_properties["email"] = person_data.get("email", "")
|
|
419
|
+
|
|
420
|
+
# Phone
|
|
421
|
+
if is_empty(input_user_properties.get("phone")):
|
|
422
|
+
# person_data["contact"] might not be defined, so we chain get calls
|
|
423
|
+
input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
|
|
424
|
+
.get("sanitized_phone", ""))
|
|
425
|
+
|
|
405
426
|
# LinkedIn URL
|
|
406
427
|
if is_empty(input_user_properties.get("user_linkedin_url")) and person_data.get("linkedin_url"):
|
|
407
428
|
input_user_properties["user_linkedin_url"] = person_data["linkedin_url"]
|
|
@@ -441,11 +462,19 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
|
|
|
441
462
|
if is_empty(input_user_properties.get("summary_about_lead")) and person_data.get("headline"):
|
|
442
463
|
input_user_properties["summary_about_lead"] = person_data["headline"]
|
|
443
464
|
|
|
444
|
-
# City/State -> lead_location
|
|
445
|
-
city = person_data.get("city"
|
|
446
|
-
state = person_data.get("state"
|
|
447
|
-
|
|
448
|
-
|
|
465
|
+
# City/State -> lead_location (avoid literal "None")
|
|
466
|
+
city = person_data.get("city")
|
|
467
|
+
state = person_data.get("state")
|
|
468
|
+
parts = []
|
|
469
|
+
for value in (city, state):
|
|
470
|
+
if value is None:
|
|
471
|
+
continue
|
|
472
|
+
s = str(value).strip()
|
|
473
|
+
if not s or s.lower() == "none":
|
|
474
|
+
continue
|
|
475
|
+
parts.append(s)
|
|
476
|
+
lead_location = ", ".join(parts) if parts else None
|
|
477
|
+
if is_empty(input_user_properties.get("lead_location")) and lead_location:
|
|
449
478
|
input_user_properties["lead_location"] = lead_location
|
|
450
479
|
|
|
451
480
|
# Filter out placeholder emails
|
|
@@ -457,13 +486,13 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
|
|
|
457
486
|
|
|
458
487
|
async def search_leads_with_apollo(
|
|
459
488
|
query: LeadsQueryFilters,
|
|
460
|
-
|
|
489
|
+
max_items_to_search: Optional[int] = 10,
|
|
461
490
|
example_url: Optional[str] = None,
|
|
462
491
|
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
463
|
-
) -> List[
|
|
492
|
+
) -> List[Dict]:
|
|
464
493
|
logger.info("Entering search_leads_with_apollo")
|
|
465
494
|
|
|
466
|
-
max_items =
|
|
495
|
+
max_items = max_items_to_search or 10
|
|
467
496
|
if max_items > 2500:
|
|
468
497
|
logger.warning("Requested max_items_to_search > 2000, overriding to 2000.")
|
|
469
498
|
max_items = 2500
|
|
@@ -509,7 +538,7 @@ async def search_leads_with_apollo(
|
|
|
509
538
|
# Important: handle personNotTitles as well
|
|
510
539
|
"personNotTitles": "person_not_titles",
|
|
511
540
|
|
|
512
|
-
"qOrganizationJobTitles": "
|
|
541
|
+
"qOrganizationJobTitles": "q_organization_job_titles",
|
|
513
542
|
"sortAscending": "sort_ascending",
|
|
514
543
|
"sortByField": "sort_by_field",
|
|
515
544
|
"contactEmailStatusV2": "contact_email_status",
|
|
@@ -582,6 +611,8 @@ async def search_leads_with_apollo(
|
|
|
582
611
|
"organization_ids",
|
|
583
612
|
"organization_num_employees_ranges",
|
|
584
613
|
"person_not_titles", # <--- added so single item is forced into list
|
|
614
|
+
"q_organization_job_titles",
|
|
615
|
+
"organization_latest_funding_stage_cd",
|
|
585
616
|
):
|
|
586
617
|
if isinstance(final_value, str):
|
|
587
618
|
final_value = [final_value]
|
|
@@ -602,7 +633,8 @@ async def search_leads_with_apollo(
|
|
|
602
633
|
dynamic_payload = {
|
|
603
634
|
"person_titles": query.person_current_titles or [],
|
|
604
635
|
"person_locations": query.person_locations or [],
|
|
605
|
-
"search_signal_ids": query.
|
|
636
|
+
"search_signal_ids": query.filter_by_signals or [],
|
|
637
|
+
"q_keywords": query.search_keywords or "",
|
|
606
638
|
"organization_num_employees_ranges": (
|
|
607
639
|
query.organization_num_employees_ranges
|
|
608
640
|
or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
|
|
@@ -610,6 +642,10 @@ async def search_leads_with_apollo(
|
|
|
610
642
|
"page": 1,
|
|
611
643
|
"per_page": min(max_items, 100),
|
|
612
644
|
}
|
|
645
|
+
if query.job_openings_with_titles:
|
|
646
|
+
dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
|
|
647
|
+
if query.latest_funding_stages:
|
|
648
|
+
dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
|
|
613
649
|
if query.sort_by_field is not None:
|
|
614
650
|
dynamic_payload["sort_by_field"] = query.sort_by_field
|
|
615
651
|
if query.sort_ascending is not None:
|
|
@@ -647,9 +683,9 @@ async def search_leads_with_apollo(
|
|
|
647
683
|
logger.info(f"Fetched a total of {len(all_people)} items from Apollo (across pages).")
|
|
648
684
|
|
|
649
685
|
# -----------------------------------------------
|
|
650
|
-
# Convert raw results ->
|
|
686
|
+
# Convert raw results -> dictionary objects
|
|
651
687
|
# -----------------------------------------------
|
|
652
|
-
leads: List[
|
|
688
|
+
leads: List[Dict[str, Any]] = []
|
|
653
689
|
for user_data_from_apollo in all_people:
|
|
654
690
|
person_data = user_data_from_apollo
|
|
655
691
|
|
|
@@ -663,15 +699,217 @@ async def search_leads_with_apollo(
|
|
|
663
699
|
additional_props["apollo_person_data"] = json.dumps(person_data)
|
|
664
700
|
input_user_properties["additional_properties"] = additional_props
|
|
665
701
|
|
|
666
|
-
|
|
667
|
-
lead.agent_instance_id = request.agent_instance_id
|
|
668
|
-
lead.smart_list_id = request.id
|
|
669
|
-
lead.organization_id = request.organization_id
|
|
670
|
-
leads.append(lead)
|
|
702
|
+
leads.append(input_user_properties)
|
|
671
703
|
|
|
672
|
-
logger.info(f"Converted {len(leads)} Apollo records into
|
|
704
|
+
logger.info(f"Converted {len(leads)} Apollo records into dictionaries.")
|
|
673
705
|
return leads
|
|
674
706
|
|
|
707
|
+
|
|
708
|
+
async def search_leads_with_apollo_page(
|
|
709
|
+
query: LeadsQueryFilters,
|
|
710
|
+
page: Optional[int] = 1,
|
|
711
|
+
per_page: Optional[int] = 25,
|
|
712
|
+
example_url: Optional[str] = None,
|
|
713
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
714
|
+
) -> Dict[str, Any]:
|
|
715
|
+
"""Fetch a single page of Apollo leads using ``page`` and ``per_page``.
|
|
716
|
+
|
|
717
|
+
This helper performs one request to the Apollo API and returns the fetched
|
|
718
|
+
leads along with comprehensive pagination metadata.
|
|
719
|
+
|
|
720
|
+
Args:
|
|
721
|
+
query: LeadsQueryFilters object containing search criteria
|
|
722
|
+
page: Page number to fetch (1-indexed, defaults to 1)
|
|
723
|
+
per_page: Number of results per page (defaults to 25)
|
|
724
|
+
example_url: Optional URL to parse search parameters from
|
|
725
|
+
tool_config: Optional tool configuration for API keys
|
|
726
|
+
|
|
727
|
+
Returns:
|
|
728
|
+
Dict containing:
|
|
729
|
+
- current_page: The current page number
|
|
730
|
+
- per_page: Number of results per page
|
|
731
|
+
- total_entries: Total number of results available
|
|
732
|
+
- total_pages: Total number of pages available
|
|
733
|
+
- has_next_page: Boolean indicating if more pages exist
|
|
734
|
+
- next_page: Next page number (None if no more pages)
|
|
735
|
+
- results: List of lead dictionaries for this page
|
|
736
|
+
"""
|
|
737
|
+
logger.info("Entering search_leads_with_apollo_page")
|
|
738
|
+
|
|
739
|
+
if example_url:
|
|
740
|
+
parsed_url = urlparse(example_url)
|
|
741
|
+
query_string = parsed_url.query
|
|
742
|
+
|
|
743
|
+
if not query_string and "?" in parsed_url.fragment:
|
|
744
|
+
fragment_query = parsed_url.fragment.split("?", 1)[1]
|
|
745
|
+
query_string = fragment_query
|
|
746
|
+
|
|
747
|
+
query_params = parse_qs(query_string)
|
|
748
|
+
|
|
749
|
+
dynamic_payload: Dict[str, Any] = {
|
|
750
|
+
"page": page,
|
|
751
|
+
"per_page": per_page,
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
mapping = {
|
|
755
|
+
"personLocations": "person_locations",
|
|
756
|
+
"organizationNumEmployeesRanges": "organization_num_employees_ranges",
|
|
757
|
+
"personTitles": "person_titles",
|
|
758
|
+
"personNotTitles": "person_not_titles",
|
|
759
|
+
"qOrganizationJobTitles": "q_organization_job_titles",
|
|
760
|
+
"sortAscending": "sort_ascending",
|
|
761
|
+
"sortByField": "sort_by_field",
|
|
762
|
+
"contactEmailStatusV2": "contact_email_status",
|
|
763
|
+
"searchSignalIds": "search_signal_ids",
|
|
764
|
+
"organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
|
|
765
|
+
"revenueRange[max]": "revenue_range_max",
|
|
766
|
+
"revenueRange[min]": "revenue_range_min",
|
|
767
|
+
"currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
|
|
768
|
+
"organizationIndustryTagIds": "organization_industry_tag_ids",
|
|
769
|
+
"notOrganizationIds": "not_organization_ids",
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
for raw_key, raw_value_list in query_params.items():
|
|
773
|
+
if raw_key.endswith("[]"):
|
|
774
|
+
key = raw_key[:-2]
|
|
775
|
+
else:
|
|
776
|
+
key = raw_key
|
|
777
|
+
|
|
778
|
+
if raw_key in mapping:
|
|
779
|
+
key = mapping[raw_key]
|
|
780
|
+
elif key in mapping:
|
|
781
|
+
key = mapping[key]
|
|
782
|
+
else:
|
|
783
|
+
key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
|
|
784
|
+
|
|
785
|
+
if len(raw_value_list) == 1:
|
|
786
|
+
final_value: Union[str, List[str]] = raw_value_list[0]
|
|
787
|
+
else:
|
|
788
|
+
final_value = raw_value_list
|
|
789
|
+
|
|
790
|
+
if key in ("sort_ascending",):
|
|
791
|
+
val_lower = str(final_value).lower()
|
|
792
|
+
final_value = val_lower in ("true", "1", "yes")
|
|
793
|
+
|
|
794
|
+
if key in ("page", "per_page"):
|
|
795
|
+
try:
|
|
796
|
+
final_value = int(final_value)
|
|
797
|
+
except ValueError:
|
|
798
|
+
pass
|
|
799
|
+
|
|
800
|
+
if key == "q_keywords" and isinstance(final_value, list):
|
|
801
|
+
final_value = " ".join(final_value)
|
|
802
|
+
|
|
803
|
+
if raw_key.endswith("[]"):
|
|
804
|
+
if isinstance(final_value, str):
|
|
805
|
+
final_value = [final_value]
|
|
806
|
+
else:
|
|
807
|
+
if key in (
|
|
808
|
+
"person_locations",
|
|
809
|
+
"person_titles",
|
|
810
|
+
"person_seniorities",
|
|
811
|
+
"organization_locations",
|
|
812
|
+
"q_organization_domains",
|
|
813
|
+
"contact_email_status",
|
|
814
|
+
"organization_ids",
|
|
815
|
+
"organization_num_employees_ranges",
|
|
816
|
+
"person_not_titles",
|
|
817
|
+
"q_organization_job_titles",
|
|
818
|
+
"organization_latest_funding_stage_cd",
|
|
819
|
+
):
|
|
820
|
+
if isinstance(final_value, str):
|
|
821
|
+
final_value = [final_value]
|
|
822
|
+
|
|
823
|
+
dynamic_payload[key] = final_value
|
|
824
|
+
|
|
825
|
+
if dynamic_payload.get("sort_by_field") == "[none]":
|
|
826
|
+
dynamic_payload.pop("sort_by_field")
|
|
827
|
+
|
|
828
|
+
# -----------------------------------
|
|
829
|
+
# B) No example_url -> build from `query`
|
|
830
|
+
# -----------------------------------
|
|
831
|
+
else:
|
|
832
|
+
dynamic_payload = {
|
|
833
|
+
"person_titles": query.person_current_titles or [],
|
|
834
|
+
"person_locations": query.person_locations or [],
|
|
835
|
+
"search_signal_ids": query.filter_by_signals or [],
|
|
836
|
+
"q_keywords": query.search_keywords or "",
|
|
837
|
+
"organization_num_employees_ranges": (
|
|
838
|
+
query.organization_num_employees_ranges
|
|
839
|
+
or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
|
|
840
|
+
),
|
|
841
|
+
}
|
|
842
|
+
if query.job_openings_with_titles:
|
|
843
|
+
dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
|
|
844
|
+
if query.latest_funding_stages:
|
|
845
|
+
dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
|
|
846
|
+
if query.sort_by_field is not None:
|
|
847
|
+
dynamic_payload["sort_by_field"] = query.sort_by_field
|
|
848
|
+
if query.sort_ascending is not None:
|
|
849
|
+
dynamic_payload["sort_ascending"] = query.sort_ascending
|
|
850
|
+
|
|
851
|
+
page_payload = dict(dynamic_payload)
|
|
852
|
+
page_payload["page"] = page
|
|
853
|
+
page_payload["per_page"] = per_page
|
|
854
|
+
|
|
855
|
+
print(f"Fetching Apollo page {page} with per_page {per_page}..."
|
|
856
|
+
f" Payload: {json.dumps(page_payload, indent=2)}")
|
|
857
|
+
|
|
858
|
+
# Get the full Apollo API response with pagination metadata
|
|
859
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
860
|
+
headers = {
|
|
861
|
+
"Cache-Control": "no-cache",
|
|
862
|
+
"Content-Type": "application/json",
|
|
863
|
+
}
|
|
864
|
+
if is_oauth:
|
|
865
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
866
|
+
else:
|
|
867
|
+
headers["X-Api-Key"] = token
|
|
868
|
+
|
|
869
|
+
url = "https://api.apollo.io/api/v1/mixed_people/search"
|
|
870
|
+
|
|
871
|
+
async with aiohttp.ClientSession() as session:
|
|
872
|
+
apollo_response = await fetch_apollo_data(session, url, headers, page_payload)
|
|
873
|
+
if not apollo_response:
|
|
874
|
+
return {"current_page": page, "per_page": per_page, "total_entries": 0, "total_pages": 0, "has_next_page": False, "results": []}
|
|
875
|
+
|
|
876
|
+
# Extract pagination metadata
|
|
877
|
+
pagination = apollo_response.get("pagination", {})
|
|
878
|
+
current_page = pagination.get("page", page)
|
|
879
|
+
total_entries = pagination.get("total_entries", 0)
|
|
880
|
+
total_pages = pagination.get("total_pages", 0)
|
|
881
|
+
per_page_actual = pagination.get("per_page", per_page)
|
|
882
|
+
|
|
883
|
+
# Determine if there are more pages
|
|
884
|
+
has_next_page = current_page < total_pages
|
|
885
|
+
|
|
886
|
+
# Extract people and contacts
|
|
887
|
+
people = apollo_response.get("people", [])
|
|
888
|
+
contacts = apollo_response.get("contacts", [])
|
|
889
|
+
page_results = people + contacts
|
|
890
|
+
|
|
891
|
+
leads: List[Dict[str, Any]] = []
|
|
892
|
+
for person_data in page_results:
|
|
893
|
+
input_user_properties: Dict[str, Any] = {}
|
|
894
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
895
|
+
input_user_properties = fill_in_properties_with_preference(input_user_properties, person_data)
|
|
896
|
+
person_data = cleanup_properties(person_data)
|
|
897
|
+
additional_props["apollo_person_data"] = json.dumps(person_data)
|
|
898
|
+
input_user_properties["additional_properties"] = additional_props
|
|
899
|
+
leads.append(input_user_properties)
|
|
900
|
+
|
|
901
|
+
logger.info(f"Converted {len(leads)} Apollo records into dictionaries (single page mode). Page {current_page} of {total_pages}")
|
|
902
|
+
|
|
903
|
+
return {
|
|
904
|
+
"current_page": current_page,
|
|
905
|
+
"per_page": per_page_actual,
|
|
906
|
+
"total_entries": total_entries,
|
|
907
|
+
"total_pages": total_pages,
|
|
908
|
+
"has_next_page": has_next_page,
|
|
909
|
+
"next_page": current_page + 1 if has_next_page else None,
|
|
910
|
+
"results": leads
|
|
911
|
+
}
|
|
912
|
+
|
|
675
913
|
@assistant_tool
|
|
676
914
|
async def get_organization_domain_from_apollo(
|
|
677
915
|
organization_id: str,
|
|
@@ -731,22 +969,20 @@ async def get_organization_details_from_apollo(
|
|
|
731
969
|
"""
|
|
732
970
|
logger.info("Entering get_organization_details_from_apollo")
|
|
733
971
|
|
|
734
|
-
|
|
972
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
735
973
|
if not organization_id:
|
|
736
974
|
logger.warning("No organization_id provided.")
|
|
737
975
|
return {'error': "Organization ID must be provided"}
|
|
738
976
|
|
|
739
977
|
headers = {
|
|
740
|
-
"X-Api-Key": APOLLO_API_KEY,
|
|
741
978
|
"Content-Type": "application/json",
|
|
742
979
|
"Cache-Control": "no-cache",
|
|
743
980
|
"Accept": "application/json"
|
|
744
981
|
}
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
return cached_response
|
|
982
|
+
if is_oauth:
|
|
983
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
984
|
+
else:
|
|
985
|
+
headers["X-Api-Key"] = token
|
|
750
986
|
|
|
751
987
|
url = f'https://api.apollo.io/api/v1/organizations/{organization_id}'
|
|
752
988
|
logger.debug(f"Making GET request to Apollo for organization ID: {organization_id}")
|
|
@@ -759,7 +995,6 @@ async def get_organization_details_from_apollo(
|
|
|
759
995
|
result = await response.json()
|
|
760
996
|
org_details = result.get('organization', {})
|
|
761
997
|
if org_details:
|
|
762
|
-
cache_output("get_organization_details_from_apollo", organization_id, org_details)
|
|
763
998
|
logger.info("Successfully retrieved organization details from Apollo.")
|
|
764
999
|
return org_details
|
|
765
1000
|
else:
|
|
@@ -826,7 +1061,7 @@ async def enrich_user_info_with_apollo(
|
|
|
826
1061
|
email=email,
|
|
827
1062
|
tool_config=tool_config
|
|
828
1063
|
)
|
|
829
|
-
except Exception
|
|
1064
|
+
except Exception:
|
|
830
1065
|
logger.exception("Exception occurred while enriching person info from Apollo by LinkedIn or email.")
|
|
831
1066
|
else:
|
|
832
1067
|
# Fallback to name-based lookup
|
|
@@ -874,11 +1109,11 @@ async def enrich_user_info_with_apollo(
|
|
|
874
1109
|
linkedin_url=linkedin_url,
|
|
875
1110
|
tool_config=tool_config
|
|
876
1111
|
)
|
|
877
|
-
except Exception
|
|
1112
|
+
except Exception:
|
|
878
1113
|
logger.exception("Exception occurred during second stage Apollo enrichment.")
|
|
879
1114
|
if user_data_from_apollo:
|
|
880
1115
|
break
|
|
881
|
-
except Exception
|
|
1116
|
+
except Exception:
|
|
882
1117
|
logger.exception("Exception occurred while performing name-based lookup in Apollo.")
|
|
883
1118
|
|
|
884
1119
|
if not user_data_from_apollo:
|
|
@@ -928,11 +1163,20 @@ async def enrich_user_info_with_apollo(
|
|
|
928
1163
|
if not input_user_properties.get("summary_about_lead"):
|
|
929
1164
|
input_user_properties["summary_about_lead"] = person_data["headline"]
|
|
930
1165
|
|
|
931
|
-
# Derive location
|
|
932
|
-
city = person_data.get("city"
|
|
933
|
-
state = person_data.get("state"
|
|
934
|
-
|
|
935
|
-
|
|
1166
|
+
# Derive location (avoid literal "None")
|
|
1167
|
+
city = person_data.get("city")
|
|
1168
|
+
state = person_data.get("state")
|
|
1169
|
+
parts = []
|
|
1170
|
+
for value in (city, state):
|
|
1171
|
+
if value is None:
|
|
1172
|
+
continue
|
|
1173
|
+
s = str(value).strip()
|
|
1174
|
+
if not s or s.lower() == "none":
|
|
1175
|
+
continue
|
|
1176
|
+
parts.append(s)
|
|
1177
|
+
lead_location = ", ".join(parts)
|
|
1178
|
+
if lead_location:
|
|
1179
|
+
input_user_properties["lead_location"] = lead_location
|
|
936
1180
|
|
|
937
1181
|
# Verify name match
|
|
938
1182
|
first_matched = bool(
|
|
@@ -952,3 +1196,389 @@ async def enrich_user_info_with_apollo(
|
|
|
952
1196
|
input_user_properties["additional_properties"] = additional_props
|
|
953
1197
|
|
|
954
1198
|
return input_user_properties
|
|
1199
|
+
|
|
1200
|
+
|
|
1201
|
+
async def search_companies_with_apollo(
|
|
1202
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
1203
|
+
dynamic_payload: Optional[Dict[str, Any]] = None,
|
|
1204
|
+
) -> List[Dict[str, Any]]:
|
|
1205
|
+
"""
|
|
1206
|
+
Search for companies using Apollo's organizations/search endpoint.
|
|
1207
|
+
|
|
1208
|
+
Args:
|
|
1209
|
+
tool_config: Apollo API configuration
|
|
1210
|
+
dynamic_payload: Search parameters for the API call
|
|
1211
|
+
|
|
1212
|
+
Returns:
|
|
1213
|
+
List of company/organization dictionaries
|
|
1214
|
+
"""
|
|
1215
|
+
logger.info("Entering search_companies_with_apollo")
|
|
1216
|
+
|
|
1217
|
+
if not dynamic_payload:
|
|
1218
|
+
logger.warning("No payload given; returning empty result.")
|
|
1219
|
+
return []
|
|
1220
|
+
|
|
1221
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
1222
|
+
headers = {
|
|
1223
|
+
"Cache-Control": "no-cache",
|
|
1224
|
+
"Content-Type": "application/json",
|
|
1225
|
+
}
|
|
1226
|
+
if is_oauth:
|
|
1227
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
1228
|
+
else:
|
|
1229
|
+
headers["X-Api-Key"] = token
|
|
1230
|
+
|
|
1231
|
+
url = "https://api.apollo.io/api/v1/organizations/search"
|
|
1232
|
+
logger.info(f"Sending payload to Apollo organizations endpoint (single page): {json.dumps(dynamic_payload, indent=2)}")
|
|
1233
|
+
|
|
1234
|
+
async with aiohttp.ClientSession() as session:
|
|
1235
|
+
data = await fetch_apollo_data(session, url, headers, dynamic_payload)
|
|
1236
|
+
if not data:
|
|
1237
|
+
logger.error("No data returned from Apollo organizations search.")
|
|
1238
|
+
return []
|
|
1239
|
+
|
|
1240
|
+
organizations = data.get("organizations", [])
|
|
1241
|
+
accounts = data.get("accounts", []) # Apollo sometimes returns accounts as well
|
|
1242
|
+
return organizations + accounts
|
|
1243
|
+
|
|
1244
|
+
|
|
1245
|
+
def fill_in_company_properties(company_data: dict) -> dict:
|
|
1246
|
+
"""
|
|
1247
|
+
Convert Apollo company/organization data into a standardized format.
|
|
1248
|
+
|
|
1249
|
+
Args:
|
|
1250
|
+
company_data: Raw company data from Apollo API
|
|
1251
|
+
|
|
1252
|
+
Returns:
|
|
1253
|
+
Dictionary with standardized company properties
|
|
1254
|
+
"""
|
|
1255
|
+
company_properties = {}
|
|
1256
|
+
|
|
1257
|
+
# Basic company information
|
|
1258
|
+
company_properties["organization_name"] = company_data.get("name", "")
|
|
1259
|
+
company_properties["primary_domain"] = company_data.get("primary_domain", "")
|
|
1260
|
+
company_properties["website_url"] = company_data.get("website_url", "")
|
|
1261
|
+
company_properties["organization_linkedin_url"] = company_data.get("linkedin_url", "")
|
|
1262
|
+
|
|
1263
|
+
# Location information
|
|
1264
|
+
company_properties["organization_city"] = company_data.get("city", "")
|
|
1265
|
+
company_properties["organization_state"] = company_data.get("state", "")
|
|
1266
|
+
company_properties["organization_country"] = company_data.get("country", "")
|
|
1267
|
+
|
|
1268
|
+
# Create a combined location string
|
|
1269
|
+
location_parts = [
|
|
1270
|
+
company_data.get("city", ""),
|
|
1271
|
+
company_data.get("state", ""),
|
|
1272
|
+
company_data.get("country", "")
|
|
1273
|
+
]
|
|
1274
|
+
company_properties["organization_location"] = ", ".join([part for part in location_parts if part])
|
|
1275
|
+
|
|
1276
|
+
# Company size and financial info
|
|
1277
|
+
company_properties["employee_count"] = company_data.get("estimated_num_employees", 0)
|
|
1278
|
+
company_properties["annual_revenue"] = company_data.get("annual_revenue", 0)
|
|
1279
|
+
|
|
1280
|
+
# Industry and business info
|
|
1281
|
+
company_properties["industry"] = company_data.get("industry", "")
|
|
1282
|
+
company_properties["keywords"] = ", ".join(company_data.get("keywords", []))
|
|
1283
|
+
company_properties["description"] = company_data.get("description", "")
|
|
1284
|
+
|
|
1285
|
+
# Funding and growth
|
|
1286
|
+
company_properties["founded_year"] = company_data.get("founded_year", "")
|
|
1287
|
+
company_properties["funding_stage"] = company_data.get("latest_funding_stage", "")
|
|
1288
|
+
company_properties["total_funding"] = company_data.get("total_funding", 0)
|
|
1289
|
+
|
|
1290
|
+
# Technology stack
|
|
1291
|
+
tech_stack = company_data.get("technology_names", [])
|
|
1292
|
+
if tech_stack:
|
|
1293
|
+
company_properties["technology_stack"] = ", ".join(tech_stack)
|
|
1294
|
+
|
|
1295
|
+
# Apollo-specific IDs
|
|
1296
|
+
company_properties["apollo_organization_id"] = company_data.get("id", "")
|
|
1297
|
+
|
|
1298
|
+
# Additional metadata
|
|
1299
|
+
company_properties["phone"] = company_data.get("phone", "")
|
|
1300
|
+
company_properties["facebook_url"] = company_data.get("facebook_url", "")
|
|
1301
|
+
company_properties["twitter_url"] = company_data.get("twitter_url", "")
|
|
1302
|
+
|
|
1303
|
+
# Store raw data for reference
|
|
1304
|
+
company_properties["additional_properties"] = {
|
|
1305
|
+
"apollo_organization_data": json.dumps(cleanup_properties(company_data))
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
return company_properties
|
|
1309
|
+
|
|
1310
|
+
|
|
1311
|
+
@assistant_tool
|
|
1312
|
+
async def search_companies_with_apollo_page(
|
|
1313
|
+
query: CompanyQueryFilters,
|
|
1314
|
+
page: Optional[int] = 1,
|
|
1315
|
+
per_page: Optional[int] = 25,
|
|
1316
|
+
example_url: Optional[str] = None,
|
|
1317
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
1318
|
+
) -> Dict[str, Any]:
|
|
1319
|
+
"""
|
|
1320
|
+
Fetch a single page of Apollo companies using ``page`` and ``per_page``.
|
|
1321
|
+
|
|
1322
|
+
This helper performs one request to the Apollo API and returns the fetched
|
|
1323
|
+
companies along with comprehensive pagination metadata.
|
|
1324
|
+
|
|
1325
|
+
Args:
|
|
1326
|
+
query: CompanyQueryFilters object containing search criteria
|
|
1327
|
+
page: Page number to fetch (1-indexed, defaults to 1)
|
|
1328
|
+
per_page: Number of results per page (defaults to 25)
|
|
1329
|
+
example_url: Optional URL to parse search parameters from
|
|
1330
|
+
tool_config: Optional tool configuration for API keys
|
|
1331
|
+
|
|
1332
|
+
Returns:
|
|
1333
|
+
Dict containing:
|
|
1334
|
+
- current_page: The current page number
|
|
1335
|
+
- per_page: Number of results per page
|
|
1336
|
+
- total_entries: Total number of results available
|
|
1337
|
+
- total_pages: Total number of pages available
|
|
1338
|
+
- has_next_page: Boolean indicating if more pages exist
|
|
1339
|
+
- next_page: Next page number (None if no more pages)
|
|
1340
|
+
- results: List of company dictionaries for this page
|
|
1341
|
+
"""
|
|
1342
|
+
logger.info("Entering search_companies_with_apollo_page")
|
|
1343
|
+
|
|
1344
|
+
if example_url:
|
|
1345
|
+
parsed_url = urlparse(example_url)
|
|
1346
|
+
query_string = parsed_url.query
|
|
1347
|
+
|
|
1348
|
+
if not query_string and "?" in parsed_url.fragment:
|
|
1349
|
+
fragment_query = parsed_url.fragment.split("?", 1)[1]
|
|
1350
|
+
query_string = fragment_query
|
|
1351
|
+
|
|
1352
|
+
query_params = parse_qs(query_string)
|
|
1353
|
+
|
|
1354
|
+
dynamic_payload: Dict[str, Any] = {
|
|
1355
|
+
"page": page,
|
|
1356
|
+
"per_page": per_page,
|
|
1357
|
+
}
|
|
1358
|
+
|
|
1359
|
+
# Organization-specific URL parameter mapping
|
|
1360
|
+
mapping = {
|
|
1361
|
+
"organizationLocations": "organization_locations",
|
|
1362
|
+
"organizationNumEmployeesRanges": "organization_num_employees_ranges",
|
|
1363
|
+
"organizationIndustries": "organization_industries",
|
|
1364
|
+
"organizationIndustryTagIds": "organization_industry_tag_ids",
|
|
1365
|
+
"qKeywords": "q_keywords",
|
|
1366
|
+
"qOrganizationDomains": "q_organization_domains",
|
|
1367
|
+
"sortAscending": "sort_ascending",
|
|
1368
|
+
"sortByField": "sort_by_field",
|
|
1369
|
+
"organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
|
|
1370
|
+
"revenueRange[max]": "revenue_range_max",
|
|
1371
|
+
"revenueRange[min]": "revenue_range_min",
|
|
1372
|
+
"currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
|
|
1373
|
+
"organizationIds": "organization_ids",
|
|
1374
|
+
"notOrganizationIds": "not_organization_ids",
|
|
1375
|
+
"qOrganizationSearchListId": "q_organization_search_list_id",
|
|
1376
|
+
"qNotOrganizationSearchListId": "q_not_organization_search_list_id",
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
for raw_key, raw_value_list in query_params.items():
|
|
1380
|
+
if raw_key.endswith("[]"):
|
|
1381
|
+
key = raw_key[:-2]
|
|
1382
|
+
else:
|
|
1383
|
+
key = raw_key
|
|
1384
|
+
|
|
1385
|
+
if raw_key in mapping:
|
|
1386
|
+
key = mapping[raw_key]
|
|
1387
|
+
elif key in mapping:
|
|
1388
|
+
key = mapping[key]
|
|
1389
|
+
else:
|
|
1390
|
+
key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
|
|
1391
|
+
|
|
1392
|
+
if len(raw_value_list) == 1:
|
|
1393
|
+
final_value: Union[str, List[str]] = raw_value_list[0]
|
|
1394
|
+
else:
|
|
1395
|
+
final_value = raw_value_list
|
|
1396
|
+
|
|
1397
|
+
if key in ("sort_ascending",):
|
|
1398
|
+
val_lower = str(final_value).lower()
|
|
1399
|
+
final_value = val_lower in ("true", "1", "yes")
|
|
1400
|
+
|
|
1401
|
+
if key in ("page", "per_page", "revenue_range_min", "revenue_range_max"):
|
|
1402
|
+
try:
|
|
1403
|
+
final_value = int(final_value)
|
|
1404
|
+
except ValueError:
|
|
1405
|
+
pass
|
|
1406
|
+
|
|
1407
|
+
if key == "q_organization_keyword_tags":
|
|
1408
|
+
# Handle both string and list inputs, split by comma if string
|
|
1409
|
+
if isinstance(final_value, str):
|
|
1410
|
+
# Split by comma and strip whitespace
|
|
1411
|
+
final_value = [tag.strip() for tag in final_value.split(",") if tag.strip()]
|
|
1412
|
+
elif isinstance(final_value, list):
|
|
1413
|
+
# If it's already a list, flatten any comma-separated items
|
|
1414
|
+
flattened = []
|
|
1415
|
+
for item in final_value:
|
|
1416
|
+
if isinstance(item, str) and "," in item:
|
|
1417
|
+
flattened.extend([tag.strip() for tag in item.split(",") if tag.strip()])
|
|
1418
|
+
else:
|
|
1419
|
+
flattened.append(item)
|
|
1420
|
+
final_value = flattened
|
|
1421
|
+
|
|
1422
|
+
if raw_key.endswith("[]"):
|
|
1423
|
+
if isinstance(final_value, str):
|
|
1424
|
+
final_value = [final_value]
|
|
1425
|
+
else:
|
|
1426
|
+
if key in (
|
|
1427
|
+
"organization_locations",
|
|
1428
|
+
"organization_industries",
|
|
1429
|
+
"organization_industry_tag_ids",
|
|
1430
|
+
"q_organization_domains",
|
|
1431
|
+
"q_organization_keyword_tags",
|
|
1432
|
+
"organization_ids",
|
|
1433
|
+
"not_organization_ids",
|
|
1434
|
+
"organization_num_employees_ranges",
|
|
1435
|
+
"currently_using_any_of_technology_uids",
|
|
1436
|
+
"organization_latest_funding_stage_cd",
|
|
1437
|
+
):
|
|
1438
|
+
if isinstance(final_value, str):
|
|
1439
|
+
final_value = [final_value]
|
|
1440
|
+
|
|
1441
|
+
dynamic_payload[key] = final_value
|
|
1442
|
+
|
|
1443
|
+
if dynamic_payload.get("sort_by_field") == "[none]":
|
|
1444
|
+
dynamic_payload.pop("sort_by_field")
|
|
1445
|
+
|
|
1446
|
+
# -----------------------------------
|
|
1447
|
+
# B) No example_url -> build from `query`
|
|
1448
|
+
# -----------------------------------
|
|
1449
|
+
else:
|
|
1450
|
+
dynamic_payload = {}
|
|
1451
|
+
|
|
1452
|
+
# Only add fields if they have values (Apollo doesn't like empty arrays)
|
|
1453
|
+
if query.organization_locations:
|
|
1454
|
+
dynamic_payload["organization_locations"] = query.organization_locations
|
|
1455
|
+
if query.organization_industries:
|
|
1456
|
+
dynamic_payload["organization_industries"] = query.organization_industries
|
|
1457
|
+
if query.organization_industry_tag_ids:
|
|
1458
|
+
dynamic_payload["organization_industry_tag_ids"] = query.organization_industry_tag_ids
|
|
1459
|
+
|
|
1460
|
+
# Handle employee ranges
|
|
1461
|
+
employee_ranges = []
|
|
1462
|
+
if query.organization_num_employees_ranges:
|
|
1463
|
+
employee_ranges = query.organization_num_employees_ranges
|
|
1464
|
+
elif query.min_employees or query.max_employees:
|
|
1465
|
+
employee_ranges = [f"{query.min_employees or 1},{query.max_employees or 1000}"]
|
|
1466
|
+
|
|
1467
|
+
if employee_ranges:
|
|
1468
|
+
dynamic_payload["organization_num_employees_ranges"] = employee_ranges
|
|
1469
|
+
|
|
1470
|
+
# Add optional parameters only if they have values
|
|
1471
|
+
if query.q_keywords:
|
|
1472
|
+
# Split comma-separated keywords into an array for company search
|
|
1473
|
+
if isinstance(query.q_keywords, str):
|
|
1474
|
+
keyword_tags = [tag.strip() for tag in query.q_keywords.split(",") if tag.strip()]
|
|
1475
|
+
else:
|
|
1476
|
+
keyword_tags = query.q_keywords
|
|
1477
|
+
dynamic_payload["q_organization_keyword_tags"] = keyword_tags
|
|
1478
|
+
if query.q_organization_domains:
|
|
1479
|
+
dynamic_payload["q_organization_domains"] = query.q_organization_domains
|
|
1480
|
+
if query.revenue_range_min is not None:
|
|
1481
|
+
dynamic_payload["revenue_range_min"] = query.revenue_range_min
|
|
1482
|
+
if query.revenue_range_max is not None:
|
|
1483
|
+
dynamic_payload["revenue_range_max"] = query.revenue_range_max
|
|
1484
|
+
if query.organization_latest_funding_stage_cd:
|
|
1485
|
+
dynamic_payload["organization_latest_funding_stage_cd"] = query.organization_latest_funding_stage_cd
|
|
1486
|
+
if query.currently_using_any_of_technology_uids:
|
|
1487
|
+
dynamic_payload["currently_using_any_of_technology_uids"] = query.currently_using_any_of_technology_uids
|
|
1488
|
+
if query.organization_ids:
|
|
1489
|
+
dynamic_payload["organization_ids"] = query.organization_ids
|
|
1490
|
+
if query.not_organization_ids:
|
|
1491
|
+
dynamic_payload["not_organization_ids"] = query.not_organization_ids
|
|
1492
|
+
if query.q_organization_search_list_id:
|
|
1493
|
+
dynamic_payload["q_organization_search_list_id"] = query.q_organization_search_list_id
|
|
1494
|
+
if query.q_not_organization_search_list_id:
|
|
1495
|
+
dynamic_payload["q_not_organization_search_list_id"] = query.q_not_organization_search_list_id
|
|
1496
|
+
if query.sort_by_field is not None:
|
|
1497
|
+
dynamic_payload["sort_by_field"] = query.sort_by_field
|
|
1498
|
+
if query.sort_ascending is not None:
|
|
1499
|
+
dynamic_payload["sort_ascending"] = query.sort_ascending
|
|
1500
|
+
|
|
1501
|
+
# Remove sorting parameters that may not be supported by organizations endpoint
|
|
1502
|
+
if "sort_by_field" in dynamic_payload:
|
|
1503
|
+
dynamic_payload.pop("sort_by_field")
|
|
1504
|
+
if "sort_ascending" in dynamic_payload:
|
|
1505
|
+
dynamic_payload.pop("sort_ascending")
|
|
1506
|
+
|
|
1507
|
+
page_payload = dict(dynamic_payload)
|
|
1508
|
+
page_payload["page"] = page
|
|
1509
|
+
page_payload["per_page"] = per_page
|
|
1510
|
+
|
|
1511
|
+
# Clean up the payload - remove empty arrays and None values that Apollo doesn't like
|
|
1512
|
+
cleaned_payload = {}
|
|
1513
|
+
for key, value in page_payload.items():
|
|
1514
|
+
if value is not None:
|
|
1515
|
+
if isinstance(value, list):
|
|
1516
|
+
# Only include non-empty lists
|
|
1517
|
+
if value:
|
|
1518
|
+
cleaned_payload[key] = value
|
|
1519
|
+
else:
|
|
1520
|
+
cleaned_payload[key] = value
|
|
1521
|
+
|
|
1522
|
+
# Ensure page and per_page are always included
|
|
1523
|
+
cleaned_payload["page"] = page
|
|
1524
|
+
cleaned_payload["per_page"] = per_page
|
|
1525
|
+
|
|
1526
|
+
print(f"Fetching Apollo companies page {page} with per_page {per_page}..."
|
|
1527
|
+
f" Payload: {json.dumps(cleaned_payload, indent=2)}")
|
|
1528
|
+
|
|
1529
|
+
# Get the full Apollo API response with pagination metadata
|
|
1530
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
1531
|
+
headers = {
|
|
1532
|
+
"Cache-Control": "no-cache",
|
|
1533
|
+
"Content-Type": "application/json",
|
|
1534
|
+
}
|
|
1535
|
+
if is_oauth:
|
|
1536
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
1537
|
+
else:
|
|
1538
|
+
headers["X-Api-Key"] = token
|
|
1539
|
+
|
|
1540
|
+
url = "https://api.apollo.io/api/v1/organizations/search"
|
|
1541
|
+
|
|
1542
|
+
async with aiohttp.ClientSession() as session:
|
|
1543
|
+
apollo_response = await fetch_apollo_data(session, url, headers, cleaned_payload)
|
|
1544
|
+
if not apollo_response:
|
|
1545
|
+
return {
|
|
1546
|
+
"current_page": page,
|
|
1547
|
+
"per_page": per_page,
|
|
1548
|
+
"total_entries": 0,
|
|
1549
|
+
"total_pages": 0,
|
|
1550
|
+
"has_next_page": False,
|
|
1551
|
+
"results": []
|
|
1552
|
+
}
|
|
1553
|
+
|
|
1554
|
+
# Extract pagination metadata
|
|
1555
|
+
pagination = apollo_response.get("pagination", {})
|
|
1556
|
+
current_page = pagination.get("page", page)
|
|
1557
|
+
total_entries = pagination.get("total_entries", 0)
|
|
1558
|
+
total_pages = pagination.get("total_pages", 0)
|
|
1559
|
+
per_page_actual = pagination.get("per_page", per_page)
|
|
1560
|
+
|
|
1561
|
+
# Determine if there are more pages
|
|
1562
|
+
has_next_page = current_page < total_pages
|
|
1563
|
+
|
|
1564
|
+
# Extract organizations and accounts
|
|
1565
|
+
organizations = apollo_response.get("organizations", [])
|
|
1566
|
+
accounts = apollo_response.get("accounts", [])
|
|
1567
|
+
page_results = organizations + accounts
|
|
1568
|
+
|
|
1569
|
+
companies: List[Dict[str, Any]] = []
|
|
1570
|
+
for company_data in page_results:
|
|
1571
|
+
company_properties = fill_in_company_properties(company_data)
|
|
1572
|
+
companies.append(company_properties)
|
|
1573
|
+
|
|
1574
|
+
logger.info(f"Converted {len(companies)} Apollo company records into standardized dictionaries (single page mode). Page {current_page} of {total_pages}")
|
|
1575
|
+
|
|
1576
|
+
return {
|
|
1577
|
+
"current_page": current_page,
|
|
1578
|
+
"per_page": per_page_actual,
|
|
1579
|
+
"total_entries": total_entries,
|
|
1580
|
+
"total_pages": total_pages,
|
|
1581
|
+
"has_next_page": has_next_page,
|
|
1582
|
+
"next_page": current_page + 1 if has_next_page else None,
|
|
1583
|
+
"results": companies
|
|
1584
|
+
}
|