dhisana 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/schemas/common.py +10 -1
- dhisana/schemas/sales.py +203 -22
- dhisana/utils/add_mapping.py +0 -2
- dhisana/utils/apollo_tools.py +739 -119
- dhisana/utils/built_with_api_tools.py +4 -2
- dhisana/utils/check_email_validity_tools.py +35 -18
- dhisana/utils/check_for_intent_signal.py +1 -2
- dhisana/utils/check_linkedin_url_validity.py +34 -8
- dhisana/utils/clay_tools.py +3 -2
- dhisana/utils/clean_properties.py +1 -4
- dhisana/utils/compose_salesnav_query.py +0 -1
- dhisana/utils/compose_search_query.py +7 -3
- dhisana/utils/composite_tools.py +0 -1
- dhisana/utils/dataframe_tools.py +2 -2
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_provider.py +174 -35
- dhisana/utils/enrich_lead_information.py +183 -53
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +1 -1
- dhisana/utils/g2_tools.py +0 -1
- dhisana/utils/generate_content.py +0 -1
- dhisana/utils/generate_email.py +68 -23
- dhisana/utils/generate_email_response.py +294 -46
- dhisana/utils/generate_flow.py +0 -1
- dhisana/utils/generate_linkedin_connect_message.py +9 -2
- dhisana/utils/generate_linkedin_response_message.py +137 -66
- dhisana/utils/generate_structured_output_internal.py +317 -164
- dhisana/utils/google_custom_search.py +150 -44
- dhisana/utils/google_oauth_tools.py +721 -0
- dhisana/utils/google_workspace_tools.py +278 -54
- dhisana/utils/hubspot_clearbit.py +3 -1
- dhisana/utils/hubspot_crm_tools.py +718 -272
- dhisana/utils/instantly_tools.py +3 -1
- dhisana/utils/lusha_tools.py +10 -7
- dhisana/utils/mailgun_tools.py +150 -0
- dhisana/utils/microsoft365_tools.py +447 -0
- dhisana/utils/openai_assistant_and_file_utils.py +121 -177
- dhisana/utils/openai_helpers.py +8 -6
- dhisana/utils/parse_linkedin_messages_txt.py +1 -3
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +377 -76
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/research_lead.py +3 -3
- dhisana/utils/sales_navigator_crawler.py +1 -6
- dhisana/utils/salesforce_crm_tools.py +323 -50
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +126 -91
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +360 -432
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +178 -18
- dhisana/utils/test_connect.py +1603 -130
- dhisana/utils/trasform_json.py +3 -3
- dhisana/utils/web_download_parse_tools.py +0 -1
- dhisana/utils/zoominfo_tools.py +2 -3
- dhisana/workflow/test.py +1 -1
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +1 -1
- dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
- dhisana-0.0.1.dev116.dist-info/RECORD +0 -83
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
dhisana/utils/apollo_tools.py
CHANGED
|
@@ -1,19 +1,15 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
import hashlib
|
|
3
2
|
import json
|
|
4
3
|
import logging
|
|
5
4
|
import os
|
|
6
5
|
import re
|
|
7
6
|
import aiohttp
|
|
8
7
|
import backoff
|
|
9
|
-
from datetime import datetime, timedelta
|
|
10
8
|
|
|
11
|
-
from
|
|
12
|
-
from dhisana.schemas.sales import LeadsQueryFilters, SmartList, SmartListLead
|
|
13
|
-
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
9
|
+
from dhisana.schemas.sales import LeadsQueryFilters, CompanyQueryFilters
|
|
14
10
|
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
15
11
|
from urllib.parse import urlparse, parse_qs
|
|
16
|
-
from typing import Any, Dict, List, Optional, Union
|
|
12
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
17
13
|
|
|
18
14
|
from dhisana.utils.clean_properties import cleanup_properties
|
|
19
15
|
|
|
@@ -21,48 +17,81 @@ logging.basicConfig(level=logging.INFO)
|
|
|
21
17
|
logger = logging.getLogger(__name__)
|
|
22
18
|
|
|
23
19
|
|
|
24
|
-
def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
20
|
+
def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> Tuple[str, bool]:
|
|
25
21
|
"""
|
|
26
|
-
Retrieves
|
|
22
|
+
Retrieves an Apollo access token from tool configuration or environment variables.
|
|
27
23
|
|
|
28
24
|
Args:
|
|
29
|
-
tool_config (list):
|
|
30
|
-
Each dictionary should have a "name" key and a "configuration" key,
|
|
31
|
-
where "configuration" is a list of dictionaries containing "name" and "value" keys.
|
|
25
|
+
tool_config (list): Optional tool configuration payload provided to the tool.
|
|
32
26
|
|
|
33
27
|
Returns:
|
|
34
|
-
str:
|
|
28
|
+
Tuple[str, bool]: A tuple containing the token string and a boolean flag indicating
|
|
29
|
+
whether the token represents an OAuth bearer token (``True``) or an API key (``False``).
|
|
35
30
|
|
|
36
31
|
Raises:
|
|
37
|
-
ValueError: If the
|
|
32
|
+
ValueError: If the Apollo integration has not been configured.
|
|
38
33
|
"""
|
|
39
|
-
|
|
34
|
+
token: Optional[str] = None
|
|
35
|
+
is_oauth = False
|
|
40
36
|
|
|
41
37
|
if tool_config:
|
|
42
|
-
logger.debug(f"Tool config provided: {tool_config}")
|
|
43
38
|
apollo_config = next(
|
|
44
39
|
(item for item in tool_config if item.get("name") == "apollo"), None
|
|
45
40
|
)
|
|
46
41
|
if apollo_config:
|
|
47
42
|
config_map = {
|
|
48
|
-
item["name"]: item
|
|
43
|
+
item["name"]: item.get("value")
|
|
49
44
|
for item in apollo_config.get("configuration", [])
|
|
50
45
|
if item
|
|
51
46
|
}
|
|
52
|
-
|
|
47
|
+
|
|
48
|
+
raw_oauth = config_map.get("oauth_tokens")
|
|
49
|
+
if isinstance(raw_oauth, str):
|
|
50
|
+
try:
|
|
51
|
+
raw_oauth = json.loads(raw_oauth)
|
|
52
|
+
except Exception:
|
|
53
|
+
raw_oauth = None
|
|
54
|
+
if isinstance(raw_oauth, dict):
|
|
55
|
+
token = (
|
|
56
|
+
raw_oauth.get("access_token")
|
|
57
|
+
or raw_oauth.get("token")
|
|
58
|
+
)
|
|
59
|
+
if token:
|
|
60
|
+
is_oauth = True
|
|
61
|
+
|
|
62
|
+
if not token:
|
|
63
|
+
direct_access_token = config_map.get("access_token")
|
|
64
|
+
if direct_access_token:
|
|
65
|
+
token = direct_access_token
|
|
66
|
+
is_oauth = True
|
|
67
|
+
|
|
68
|
+
if not token:
|
|
69
|
+
api_key = config_map.get("apiKey") or config_map.get("api_key")
|
|
70
|
+
if api_key:
|
|
71
|
+
token = api_key
|
|
72
|
+
is_oauth = False
|
|
53
73
|
else:
|
|
54
74
|
logger.warning("No 'apollo' config item found in tool_config.")
|
|
55
|
-
else:
|
|
56
|
-
logger.debug("No tool_config provided or it's None.")
|
|
57
75
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
76
|
+
if not token:
|
|
77
|
+
env_oauth_token = os.getenv("APOLLO_ACCESS_TOKEN")
|
|
78
|
+
if env_oauth_token:
|
|
79
|
+
token = env_oauth_token
|
|
80
|
+
is_oauth = True
|
|
81
|
+
|
|
82
|
+
if not token:
|
|
83
|
+
env_api_key = os.getenv("APOLLO_API_KEY")
|
|
84
|
+
if env_api_key:
|
|
85
|
+
token = env_api_key
|
|
86
|
+
is_oauth = False
|
|
87
|
+
|
|
88
|
+
if not token:
|
|
89
|
+
logger.error("Apollo integration is not configured.")
|
|
90
|
+
raise ValueError(
|
|
91
|
+
"Apollo integration is not configured. Please configure the connection to Apollo in Integrations."
|
|
92
|
+
)
|
|
64
93
|
|
|
65
|
-
return
|
|
94
|
+
return token, is_oauth
|
|
66
95
|
|
|
67
96
|
|
|
68
97
|
@assistant_tool
|
|
@@ -77,6 +106,7 @@ async def enrich_person_info_from_apollo(
|
|
|
77
106
|
linkedin_url: Optional[str] = None,
|
|
78
107
|
email: Optional[str] = None,
|
|
79
108
|
phone: Optional[str] = None,
|
|
109
|
+
fetch_valid_phone_number: Optional[bool] = False,
|
|
80
110
|
tool_config: Optional[List[Dict]] = None,
|
|
81
111
|
) -> Dict[str, Any]:
|
|
82
112
|
"""
|
|
@@ -86,37 +116,40 @@ async def enrich_person_info_from_apollo(
|
|
|
86
116
|
- **linkedin_url** (*str*, optional): LinkedIn profile URL of the person.
|
|
87
117
|
- **email** (*str*, optional): Email address of the person.
|
|
88
118
|
- **phone** (*str*, optional): Phone number of the person.
|
|
119
|
+
- **fetch_valid_phone_number** (*bool*, optional): If True, include phone numbers in the API response. Defaults to False.
|
|
89
120
|
|
|
90
121
|
Returns:
|
|
91
122
|
- **dict**: JSON response containing person information.
|
|
92
123
|
"""
|
|
93
124
|
logger.info("Entering enrich_person_info_from_apollo")
|
|
94
125
|
|
|
95
|
-
|
|
126
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
96
127
|
|
|
97
128
|
if not linkedin_url and not email and not phone:
|
|
98
129
|
logger.warning("No linkedin_url, email, or phone provided. At least one is required.")
|
|
99
130
|
return {'error': "At least one of linkedin_url, email, or phone must be provided"}
|
|
100
131
|
|
|
101
|
-
headers = {
|
|
102
|
-
|
|
103
|
-
"
|
|
104
|
-
|
|
132
|
+
headers = {"Content-Type": "application/json"}
|
|
133
|
+
if is_oauth:
|
|
134
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
135
|
+
else:
|
|
136
|
+
headers["X-Api-Key"] = token
|
|
105
137
|
|
|
106
138
|
data = {}
|
|
107
139
|
if linkedin_url:
|
|
108
140
|
logger.debug(f"LinkedIn URL provided: {linkedin_url}")
|
|
109
141
|
data['linkedin_url'] = linkedin_url
|
|
110
|
-
cached_response = retrieve_output("enrich_person_info_from_apollo", linkedin_url)
|
|
111
|
-
if cached_response is not None:
|
|
112
|
-
logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
|
|
113
|
-
return cached_response
|
|
114
142
|
if email:
|
|
115
143
|
logger.debug(f"Email provided: {email}")
|
|
116
144
|
data['email'] = email
|
|
117
145
|
if phone:
|
|
118
146
|
logger.debug(f"Phone provided: {phone}")
|
|
119
147
|
data['phone_numbers'] = [phone] # Apollo expects a list for phone numbers
|
|
148
|
+
|
|
149
|
+
# Add reveal_phone_number parameter if fetch_valid_phone_number is True
|
|
150
|
+
if fetch_valid_phone_number:
|
|
151
|
+
logger.debug("fetch_valid_phone_number flag is True, including phone numbers in API response")
|
|
152
|
+
data['reveal_phone_number'] = True
|
|
120
153
|
|
|
121
154
|
url = 'https://api.apollo.io/api/v1/people/match'
|
|
122
155
|
|
|
@@ -126,8 +159,6 @@ async def enrich_person_info_from_apollo(
|
|
|
126
159
|
logger.debug(f"Received response status: {response.status}")
|
|
127
160
|
if response.status == 200:
|
|
128
161
|
result = await response.json()
|
|
129
|
-
if linkedin_url:
|
|
130
|
-
cache_output("enrich_person_info_from_apollo", linkedin_url, result)
|
|
131
162
|
logger.info("Successfully retrieved person info from Apollo.")
|
|
132
163
|
return result
|
|
133
164
|
elif response.status == 429:
|
|
@@ -179,11 +210,12 @@ async def lookup_person_in_apollo_by_name(
|
|
|
179
210
|
logger.warning("No full_name provided.")
|
|
180
211
|
return {'error': "Full name is required"}
|
|
181
212
|
|
|
182
|
-
|
|
183
|
-
headers = {
|
|
184
|
-
|
|
185
|
-
"
|
|
186
|
-
|
|
213
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
214
|
+
headers = {"Content-Type": "application/json"}
|
|
215
|
+
if is_oauth:
|
|
216
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
217
|
+
else:
|
|
218
|
+
headers["X-Api-Key"] = token
|
|
187
219
|
|
|
188
220
|
# Construct the query payload
|
|
189
221
|
data = {
|
|
@@ -192,16 +224,6 @@ async def lookup_person_in_apollo_by_name(
|
|
|
192
224
|
"per_page": 10
|
|
193
225
|
}
|
|
194
226
|
|
|
195
|
-
# Build a cache key that includes full_name and company_name (if provided)
|
|
196
|
-
# so that results are correctly cached and retrieved.
|
|
197
|
-
key_item = f"lookup_person_in_apollo_by_name_{full_name}_{company_name or ''}".lower()
|
|
198
|
-
|
|
199
|
-
# Attempt to retrieve a cached response first
|
|
200
|
-
cached_response = retrieve_output("lookup_person_in_apollo_by_name", key_item)
|
|
201
|
-
if cached_response is not None:
|
|
202
|
-
logger.info(f"Cache hit for user: {full_name}, company: {company_name or ''}")
|
|
203
|
-
return cached_response
|
|
204
|
-
|
|
205
227
|
url = 'https://api.apollo.io/api/v1/mixed_people/search'
|
|
206
228
|
logger.debug(f"Making request to Apollo with payload: {data}")
|
|
207
229
|
|
|
@@ -212,7 +234,6 @@ async def lookup_person_in_apollo_by_name(
|
|
|
212
234
|
if response.status == 200:
|
|
213
235
|
result = await response.json()
|
|
214
236
|
logger.info("Successfully looked up person by name on Apollo.")
|
|
215
|
-
cache_output("lookup_person_in_apollo_by_name", key_item, result)
|
|
216
237
|
return result
|
|
217
238
|
elif response.status == 429:
|
|
218
239
|
msg = "Rate limit exceeded"
|
|
@@ -256,23 +277,21 @@ async def enrich_organization_info_from_apollo(
|
|
|
256
277
|
"""
|
|
257
278
|
logger.info("Entering enrich_organization_info_from_apollo")
|
|
258
279
|
|
|
259
|
-
|
|
280
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
260
281
|
|
|
261
282
|
if not organization_domain:
|
|
262
283
|
logger.warning("No organization domain provided.")
|
|
263
284
|
return {'error': "organization domain must be provided"}
|
|
264
285
|
|
|
265
286
|
headers = {
|
|
266
|
-
"X-Api-Key": f"{APOLLO_API_KEY}",
|
|
267
287
|
"Content-Type": "application/json",
|
|
268
288
|
"Cache-Control": "no-cache",
|
|
269
289
|
"accept": "application/json"
|
|
270
290
|
}
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
return cached_response
|
|
291
|
+
if is_oauth:
|
|
292
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
293
|
+
else:
|
|
294
|
+
headers["X-Api-Key"] = token
|
|
276
295
|
|
|
277
296
|
url = f'https://api.apollo.io/api/v1/organizations/enrich?domain={organization_domain}'
|
|
278
297
|
logger.debug(f"Making GET request to Apollo for organization domain: {organization_domain}")
|
|
@@ -283,7 +302,6 @@ async def enrich_organization_info_from_apollo(
|
|
|
283
302
|
logger.debug(f"Received response status: {response.status}")
|
|
284
303
|
if response.status == 200:
|
|
285
304
|
result = await response.json()
|
|
286
|
-
cache_output("enrich_organization_info_from_apollo", organization_domain, result)
|
|
287
305
|
logger.info("Successfully retrieved organization info from Apollo.")
|
|
288
306
|
return result
|
|
289
307
|
elif response.status == 429:
|
|
@@ -315,22 +333,12 @@ async def enrich_organization_info_from_apollo(
|
|
|
315
333
|
)
|
|
316
334
|
async def fetch_apollo_data(session, url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
317
335
|
logger.info("Entering fetch_apollo_data")
|
|
318
|
-
|
|
319
|
-
key_hash = hashlib.sha256(key_data.encode()).hexdigest()
|
|
320
|
-
logger.debug(f"Cache key hash: {key_hash}")
|
|
321
|
-
|
|
322
|
-
cached_response = retrieve_output("fetch_apollo_data", key_hash)
|
|
323
|
-
if cached_response is not None:
|
|
324
|
-
logger.info("Cache hit for fetch_apollo_data.")
|
|
325
|
-
return cached_response
|
|
326
|
-
|
|
327
|
-
logger.debug("No cache hit. Making POST request to Apollo.")
|
|
336
|
+
logger.debug("Making POST request to Apollo.")
|
|
328
337
|
async with session.post(url, headers=headers, json=payload) as response:
|
|
329
338
|
logger.debug(f"Received response status: {response.status}")
|
|
330
339
|
if response.status == 200:
|
|
331
340
|
result = await response.json()
|
|
332
|
-
|
|
333
|
-
logger.info("Successfully fetched data from Apollo and cached it.")
|
|
341
|
+
logger.info("Successfully fetched data from Apollo.")
|
|
334
342
|
return result
|
|
335
343
|
elif response.status == 429:
|
|
336
344
|
msg = "Rate limit exceeded"
|
|
@@ -357,12 +365,15 @@ async def search_people_with_apollo(
|
|
|
357
365
|
logger.warning("No payload given; returning empty result.")
|
|
358
366
|
return []
|
|
359
367
|
|
|
360
|
-
|
|
368
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
361
369
|
headers = {
|
|
362
370
|
"Cache-Control": "no-cache",
|
|
363
371
|
"Content-Type": "application/json",
|
|
364
|
-
"X-Api-Key": api_key,
|
|
365
372
|
}
|
|
373
|
+
if is_oauth:
|
|
374
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
375
|
+
else:
|
|
376
|
+
headers["X-Api-Key"] = token
|
|
366
377
|
|
|
367
378
|
url = "https://api.apollo.io/api/v1/mixed_people/search"
|
|
368
379
|
logger.info(f"Sending payload to Apollo (single page): {json.dumps(dynamic_payload, indent=2)}")
|
|
@@ -388,16 +399,6 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
|
|
|
388
399
|
"""Returns True if the value is None, empty string, or only whitespace."""
|
|
389
400
|
return value is None or (isinstance(value, str) and not value.strip())
|
|
390
401
|
|
|
391
|
-
# Email
|
|
392
|
-
if is_empty(input_user_properties.get("email")):
|
|
393
|
-
input_user_properties["email"] = person_data.get("email", "")
|
|
394
|
-
|
|
395
|
-
# Phone
|
|
396
|
-
if is_empty(input_user_properties.get("phone")):
|
|
397
|
-
# person_data["contact"] might not be defined, so we chain get calls
|
|
398
|
-
input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
|
|
399
|
-
.get("sanitized_phone", ""))
|
|
400
|
-
|
|
401
402
|
# Full name
|
|
402
403
|
# Because `person_data.get("name")` has precedence over input_user_properties,
|
|
403
404
|
# we only update it if input_user_properties is empty/None for "full_name".
|
|
@@ -412,6 +413,16 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
|
|
|
412
413
|
if is_empty(input_user_properties.get("last_name")) and person_data.get("last_name"):
|
|
413
414
|
input_user_properties["last_name"] = person_data["last_name"]
|
|
414
415
|
|
|
416
|
+
# Email
|
|
417
|
+
if is_empty(input_user_properties.get("email")):
|
|
418
|
+
input_user_properties["email"] = person_data.get("email", "")
|
|
419
|
+
|
|
420
|
+
# Phone
|
|
421
|
+
if is_empty(input_user_properties.get("phone")):
|
|
422
|
+
# person_data["contact"] might not be defined, so we chain get calls
|
|
423
|
+
input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
|
|
424
|
+
.get("sanitized_phone", ""))
|
|
425
|
+
|
|
415
426
|
# LinkedIn URL
|
|
416
427
|
if is_empty(input_user_properties.get("user_linkedin_url")) and person_data.get("linkedin_url"):
|
|
417
428
|
input_user_properties["user_linkedin_url"] = person_data["linkedin_url"]
|
|
@@ -451,11 +462,19 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
|
|
|
451
462
|
if is_empty(input_user_properties.get("summary_about_lead")) and person_data.get("headline"):
|
|
452
463
|
input_user_properties["summary_about_lead"] = person_data["headline"]
|
|
453
464
|
|
|
454
|
-
# City/State -> lead_location
|
|
455
|
-
city = person_data.get("city"
|
|
456
|
-
state = person_data.get("state"
|
|
457
|
-
|
|
458
|
-
|
|
465
|
+
# City/State -> lead_location (avoid literal "None")
|
|
466
|
+
city = person_data.get("city")
|
|
467
|
+
state = person_data.get("state")
|
|
468
|
+
parts = []
|
|
469
|
+
for value in (city, state):
|
|
470
|
+
if value is None:
|
|
471
|
+
continue
|
|
472
|
+
s = str(value).strip()
|
|
473
|
+
if not s or s.lower() == "none":
|
|
474
|
+
continue
|
|
475
|
+
parts.append(s)
|
|
476
|
+
lead_location = ", ".join(parts) if parts else None
|
|
477
|
+
if is_empty(input_user_properties.get("lead_location")) and lead_location:
|
|
459
478
|
input_user_properties["lead_location"] = lead_location
|
|
460
479
|
|
|
461
480
|
# Filter out placeholder emails
|
|
@@ -467,13 +486,13 @@ def fill_in_properties_with_preference(input_user_properties: dict, person_data:
|
|
|
467
486
|
|
|
468
487
|
async def search_leads_with_apollo(
|
|
469
488
|
query: LeadsQueryFilters,
|
|
470
|
-
|
|
489
|
+
max_items_to_search: Optional[int] = 10,
|
|
471
490
|
example_url: Optional[str] = None,
|
|
472
491
|
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
473
|
-
) -> List[
|
|
492
|
+
) -> List[Dict]:
|
|
474
493
|
logger.info("Entering search_leads_with_apollo")
|
|
475
494
|
|
|
476
|
-
max_items =
|
|
495
|
+
max_items = max_items_to_search or 10
|
|
477
496
|
if max_items > 2500:
|
|
478
497
|
logger.warning("Requested max_items_to_search > 2000, overriding to 2000.")
|
|
479
498
|
max_items = 2500
|
|
@@ -519,7 +538,7 @@ async def search_leads_with_apollo(
|
|
|
519
538
|
# Important: handle personNotTitles as well
|
|
520
539
|
"personNotTitles": "person_not_titles",
|
|
521
540
|
|
|
522
|
-
"qOrganizationJobTitles": "
|
|
541
|
+
"qOrganizationJobTitles": "q_organization_job_titles",
|
|
523
542
|
"sortAscending": "sort_ascending",
|
|
524
543
|
"sortByField": "sort_by_field",
|
|
525
544
|
"contactEmailStatusV2": "contact_email_status",
|
|
@@ -592,6 +611,8 @@ async def search_leads_with_apollo(
|
|
|
592
611
|
"organization_ids",
|
|
593
612
|
"organization_num_employees_ranges",
|
|
594
613
|
"person_not_titles", # <--- added so single item is forced into list
|
|
614
|
+
"q_organization_job_titles",
|
|
615
|
+
"organization_latest_funding_stage_cd",
|
|
595
616
|
):
|
|
596
617
|
if isinstance(final_value, str):
|
|
597
618
|
final_value = [final_value]
|
|
@@ -612,7 +633,8 @@ async def search_leads_with_apollo(
|
|
|
612
633
|
dynamic_payload = {
|
|
613
634
|
"person_titles": query.person_current_titles or [],
|
|
614
635
|
"person_locations": query.person_locations or [],
|
|
615
|
-
"search_signal_ids": query.
|
|
636
|
+
"search_signal_ids": query.filter_by_signals or [],
|
|
637
|
+
"q_keywords": query.search_keywords or "",
|
|
616
638
|
"organization_num_employees_ranges": (
|
|
617
639
|
query.organization_num_employees_ranges
|
|
618
640
|
or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
|
|
@@ -620,6 +642,10 @@ async def search_leads_with_apollo(
|
|
|
620
642
|
"page": 1,
|
|
621
643
|
"per_page": min(max_items, 100),
|
|
622
644
|
}
|
|
645
|
+
if query.job_openings_with_titles:
|
|
646
|
+
dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
|
|
647
|
+
if query.latest_funding_stages:
|
|
648
|
+
dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
|
|
623
649
|
if query.sort_by_field is not None:
|
|
624
650
|
dynamic_payload["sort_by_field"] = query.sort_by_field
|
|
625
651
|
if query.sort_ascending is not None:
|
|
@@ -657,9 +683,9 @@ async def search_leads_with_apollo(
|
|
|
657
683
|
logger.info(f"Fetched a total of {len(all_people)} items from Apollo (across pages).")
|
|
658
684
|
|
|
659
685
|
# -----------------------------------------------
|
|
660
|
-
# Convert raw results ->
|
|
686
|
+
# Convert raw results -> dictionary objects
|
|
661
687
|
# -----------------------------------------------
|
|
662
|
-
leads: List[
|
|
688
|
+
leads: List[Dict[str, Any]] = []
|
|
663
689
|
for user_data_from_apollo in all_people:
|
|
664
690
|
person_data = user_data_from_apollo
|
|
665
691
|
|
|
@@ -673,15 +699,217 @@ async def search_leads_with_apollo(
|
|
|
673
699
|
additional_props["apollo_person_data"] = json.dumps(person_data)
|
|
674
700
|
input_user_properties["additional_properties"] = additional_props
|
|
675
701
|
|
|
676
|
-
|
|
677
|
-
lead.agent_instance_id = request.agent_instance_id
|
|
678
|
-
lead.smart_list_id = request.id
|
|
679
|
-
lead.organization_id = request.organization_id
|
|
680
|
-
leads.append(lead)
|
|
702
|
+
leads.append(input_user_properties)
|
|
681
703
|
|
|
682
|
-
logger.info(f"Converted {len(leads)} Apollo records into
|
|
704
|
+
logger.info(f"Converted {len(leads)} Apollo records into dictionaries.")
|
|
683
705
|
return leads
|
|
684
706
|
|
|
707
|
+
|
|
708
|
+
async def search_leads_with_apollo_page(
|
|
709
|
+
query: LeadsQueryFilters,
|
|
710
|
+
page: Optional[int] = 1,
|
|
711
|
+
per_page: Optional[int] = 25,
|
|
712
|
+
example_url: Optional[str] = None,
|
|
713
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
714
|
+
) -> Dict[str, Any]:
|
|
715
|
+
"""Fetch a single page of Apollo leads using ``page`` and ``per_page``.
|
|
716
|
+
|
|
717
|
+
This helper performs one request to the Apollo API and returns the fetched
|
|
718
|
+
leads along with comprehensive pagination metadata.
|
|
719
|
+
|
|
720
|
+
Args:
|
|
721
|
+
query: LeadsQueryFilters object containing search criteria
|
|
722
|
+
page: Page number to fetch (1-indexed, defaults to 1)
|
|
723
|
+
per_page: Number of results per page (defaults to 25)
|
|
724
|
+
example_url: Optional URL to parse search parameters from
|
|
725
|
+
tool_config: Optional tool configuration for API keys
|
|
726
|
+
|
|
727
|
+
Returns:
|
|
728
|
+
Dict containing:
|
|
729
|
+
- current_page: The current page number
|
|
730
|
+
- per_page: Number of results per page
|
|
731
|
+
- total_entries: Total number of results available
|
|
732
|
+
- total_pages: Total number of pages available
|
|
733
|
+
- has_next_page: Boolean indicating if more pages exist
|
|
734
|
+
- next_page: Next page number (None if no more pages)
|
|
735
|
+
- results: List of lead dictionaries for this page
|
|
736
|
+
"""
|
|
737
|
+
logger.info("Entering search_leads_with_apollo_page")
|
|
738
|
+
|
|
739
|
+
if example_url:
|
|
740
|
+
parsed_url = urlparse(example_url)
|
|
741
|
+
query_string = parsed_url.query
|
|
742
|
+
|
|
743
|
+
if not query_string and "?" in parsed_url.fragment:
|
|
744
|
+
fragment_query = parsed_url.fragment.split("?", 1)[1]
|
|
745
|
+
query_string = fragment_query
|
|
746
|
+
|
|
747
|
+
query_params = parse_qs(query_string)
|
|
748
|
+
|
|
749
|
+
dynamic_payload: Dict[str, Any] = {
|
|
750
|
+
"page": page,
|
|
751
|
+
"per_page": per_page,
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
mapping = {
|
|
755
|
+
"personLocations": "person_locations",
|
|
756
|
+
"organizationNumEmployeesRanges": "organization_num_employees_ranges",
|
|
757
|
+
"personTitles": "person_titles",
|
|
758
|
+
"personNotTitles": "person_not_titles",
|
|
759
|
+
"qOrganizationJobTitles": "q_organization_job_titles",
|
|
760
|
+
"sortAscending": "sort_ascending",
|
|
761
|
+
"sortByField": "sort_by_field",
|
|
762
|
+
"contactEmailStatusV2": "contact_email_status",
|
|
763
|
+
"searchSignalIds": "search_signal_ids",
|
|
764
|
+
"organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
|
|
765
|
+
"revenueRange[max]": "revenue_range_max",
|
|
766
|
+
"revenueRange[min]": "revenue_range_min",
|
|
767
|
+
"currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
|
|
768
|
+
"organizationIndustryTagIds": "organization_industry_tag_ids",
|
|
769
|
+
"notOrganizationIds": "not_organization_ids",
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
for raw_key, raw_value_list in query_params.items():
|
|
773
|
+
if raw_key.endswith("[]"):
|
|
774
|
+
key = raw_key[:-2]
|
|
775
|
+
else:
|
|
776
|
+
key = raw_key
|
|
777
|
+
|
|
778
|
+
if raw_key in mapping:
|
|
779
|
+
key = mapping[raw_key]
|
|
780
|
+
elif key in mapping:
|
|
781
|
+
key = mapping[key]
|
|
782
|
+
else:
|
|
783
|
+
key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
|
|
784
|
+
|
|
785
|
+
if len(raw_value_list) == 1:
|
|
786
|
+
final_value: Union[str, List[str]] = raw_value_list[0]
|
|
787
|
+
else:
|
|
788
|
+
final_value = raw_value_list
|
|
789
|
+
|
|
790
|
+
if key in ("sort_ascending",):
|
|
791
|
+
val_lower = str(final_value).lower()
|
|
792
|
+
final_value = val_lower in ("true", "1", "yes")
|
|
793
|
+
|
|
794
|
+
if key in ("page", "per_page"):
|
|
795
|
+
try:
|
|
796
|
+
final_value = int(final_value)
|
|
797
|
+
except ValueError:
|
|
798
|
+
pass
|
|
799
|
+
|
|
800
|
+
if key == "q_keywords" and isinstance(final_value, list):
|
|
801
|
+
final_value = " ".join(final_value)
|
|
802
|
+
|
|
803
|
+
if raw_key.endswith("[]"):
|
|
804
|
+
if isinstance(final_value, str):
|
|
805
|
+
final_value = [final_value]
|
|
806
|
+
else:
|
|
807
|
+
if key in (
|
|
808
|
+
"person_locations",
|
|
809
|
+
"person_titles",
|
|
810
|
+
"person_seniorities",
|
|
811
|
+
"organization_locations",
|
|
812
|
+
"q_organization_domains",
|
|
813
|
+
"contact_email_status",
|
|
814
|
+
"organization_ids",
|
|
815
|
+
"organization_num_employees_ranges",
|
|
816
|
+
"person_not_titles",
|
|
817
|
+
"q_organization_job_titles",
|
|
818
|
+
"organization_latest_funding_stage_cd",
|
|
819
|
+
):
|
|
820
|
+
if isinstance(final_value, str):
|
|
821
|
+
final_value = [final_value]
|
|
822
|
+
|
|
823
|
+
dynamic_payload[key] = final_value
|
|
824
|
+
|
|
825
|
+
if dynamic_payload.get("sort_by_field") == "[none]":
|
|
826
|
+
dynamic_payload.pop("sort_by_field")
|
|
827
|
+
|
|
828
|
+
# -----------------------------------
|
|
829
|
+
# B) No example_url -> build from `query`
|
|
830
|
+
# -----------------------------------
|
|
831
|
+
else:
|
|
832
|
+
dynamic_payload = {
|
|
833
|
+
"person_titles": query.person_current_titles or [],
|
|
834
|
+
"person_locations": query.person_locations or [],
|
|
835
|
+
"search_signal_ids": query.filter_by_signals or [],
|
|
836
|
+
"q_keywords": query.search_keywords or "",
|
|
837
|
+
"organization_num_employees_ranges": (
|
|
838
|
+
query.organization_num_employees_ranges
|
|
839
|
+
or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
|
|
840
|
+
),
|
|
841
|
+
}
|
|
842
|
+
if query.job_openings_with_titles:
|
|
843
|
+
dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
|
|
844
|
+
if query.latest_funding_stages:
|
|
845
|
+
dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
|
|
846
|
+
if query.sort_by_field is not None:
|
|
847
|
+
dynamic_payload["sort_by_field"] = query.sort_by_field
|
|
848
|
+
if query.sort_ascending is not None:
|
|
849
|
+
dynamic_payload["sort_ascending"] = query.sort_ascending
|
|
850
|
+
|
|
851
|
+
page_payload = dict(dynamic_payload)
|
|
852
|
+
page_payload["page"] = page
|
|
853
|
+
page_payload["per_page"] = per_page
|
|
854
|
+
|
|
855
|
+
print(f"Fetching Apollo page {page} with per_page {per_page}..."
|
|
856
|
+
f" Payload: {json.dumps(page_payload, indent=2)}")
|
|
857
|
+
|
|
858
|
+
# Get the full Apollo API response with pagination metadata
|
|
859
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
860
|
+
headers = {
|
|
861
|
+
"Cache-Control": "no-cache",
|
|
862
|
+
"Content-Type": "application/json",
|
|
863
|
+
}
|
|
864
|
+
if is_oauth:
|
|
865
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
866
|
+
else:
|
|
867
|
+
headers["X-Api-Key"] = token
|
|
868
|
+
|
|
869
|
+
url = "https://api.apollo.io/api/v1/mixed_people/search"
|
|
870
|
+
|
|
871
|
+
async with aiohttp.ClientSession() as session:
|
|
872
|
+
apollo_response = await fetch_apollo_data(session, url, headers, page_payload)
|
|
873
|
+
if not apollo_response:
|
|
874
|
+
return {"current_page": page, "per_page": per_page, "total_entries": 0, "total_pages": 0, "has_next_page": False, "results": []}
|
|
875
|
+
|
|
876
|
+
# Extract pagination metadata
|
|
877
|
+
pagination = apollo_response.get("pagination", {})
|
|
878
|
+
current_page = pagination.get("page", page)
|
|
879
|
+
total_entries = pagination.get("total_entries", 0)
|
|
880
|
+
total_pages = pagination.get("total_pages", 0)
|
|
881
|
+
per_page_actual = pagination.get("per_page", per_page)
|
|
882
|
+
|
|
883
|
+
# Determine if there are more pages
|
|
884
|
+
has_next_page = current_page < total_pages
|
|
885
|
+
|
|
886
|
+
# Extract people and contacts
|
|
887
|
+
people = apollo_response.get("people", [])
|
|
888
|
+
contacts = apollo_response.get("contacts", [])
|
|
889
|
+
page_results = people + contacts
|
|
890
|
+
|
|
891
|
+
leads: List[Dict[str, Any]] = []
|
|
892
|
+
for person_data in page_results:
|
|
893
|
+
input_user_properties: Dict[str, Any] = {}
|
|
894
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
895
|
+
input_user_properties = fill_in_properties_with_preference(input_user_properties, person_data)
|
|
896
|
+
person_data = cleanup_properties(person_data)
|
|
897
|
+
additional_props["apollo_person_data"] = json.dumps(person_data)
|
|
898
|
+
input_user_properties["additional_properties"] = additional_props
|
|
899
|
+
leads.append(input_user_properties)
|
|
900
|
+
|
|
901
|
+
logger.info(f"Converted {len(leads)} Apollo records into dictionaries (single page mode). Page {current_page} of {total_pages}")
|
|
902
|
+
|
|
903
|
+
return {
|
|
904
|
+
"current_page": current_page,
|
|
905
|
+
"per_page": per_page_actual,
|
|
906
|
+
"total_entries": total_entries,
|
|
907
|
+
"total_pages": total_pages,
|
|
908
|
+
"has_next_page": has_next_page,
|
|
909
|
+
"next_page": current_page + 1 if has_next_page else None,
|
|
910
|
+
"results": leads
|
|
911
|
+
}
|
|
912
|
+
|
|
685
913
|
@assistant_tool
|
|
686
914
|
async def get_organization_domain_from_apollo(
|
|
687
915
|
organization_id: str,
|
|
@@ -741,22 +969,20 @@ async def get_organization_details_from_apollo(
|
|
|
741
969
|
"""
|
|
742
970
|
logger.info("Entering get_organization_details_from_apollo")
|
|
743
971
|
|
|
744
|
-
|
|
972
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
745
973
|
if not organization_id:
|
|
746
974
|
logger.warning("No organization_id provided.")
|
|
747
975
|
return {'error': "Organization ID must be provided"}
|
|
748
976
|
|
|
749
977
|
headers = {
|
|
750
|
-
"X-Api-Key": APOLLO_API_KEY,
|
|
751
978
|
"Content-Type": "application/json",
|
|
752
979
|
"Cache-Control": "no-cache",
|
|
753
980
|
"Accept": "application/json"
|
|
754
981
|
}
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
return cached_response
|
|
982
|
+
if is_oauth:
|
|
983
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
984
|
+
else:
|
|
985
|
+
headers["X-Api-Key"] = token
|
|
760
986
|
|
|
761
987
|
url = f'https://api.apollo.io/api/v1/organizations/{organization_id}'
|
|
762
988
|
logger.debug(f"Making GET request to Apollo for organization ID: {organization_id}")
|
|
@@ -769,7 +995,6 @@ async def get_organization_details_from_apollo(
|
|
|
769
995
|
result = await response.json()
|
|
770
996
|
org_details = result.get('organization', {})
|
|
771
997
|
if org_details:
|
|
772
|
-
cache_output("get_organization_details_from_apollo", organization_id, org_details)
|
|
773
998
|
logger.info("Successfully retrieved organization details from Apollo.")
|
|
774
999
|
return org_details
|
|
775
1000
|
else:
|
|
@@ -836,7 +1061,7 @@ async def enrich_user_info_with_apollo(
|
|
|
836
1061
|
email=email,
|
|
837
1062
|
tool_config=tool_config
|
|
838
1063
|
)
|
|
839
|
-
except Exception
|
|
1064
|
+
except Exception:
|
|
840
1065
|
logger.exception("Exception occurred while enriching person info from Apollo by LinkedIn or email.")
|
|
841
1066
|
else:
|
|
842
1067
|
# Fallback to name-based lookup
|
|
@@ -884,11 +1109,11 @@ async def enrich_user_info_with_apollo(
|
|
|
884
1109
|
linkedin_url=linkedin_url,
|
|
885
1110
|
tool_config=tool_config
|
|
886
1111
|
)
|
|
887
|
-
except Exception
|
|
1112
|
+
except Exception:
|
|
888
1113
|
logger.exception("Exception occurred during second stage Apollo enrichment.")
|
|
889
1114
|
if user_data_from_apollo:
|
|
890
1115
|
break
|
|
891
|
-
except Exception
|
|
1116
|
+
except Exception:
|
|
892
1117
|
logger.exception("Exception occurred while performing name-based lookup in Apollo.")
|
|
893
1118
|
|
|
894
1119
|
if not user_data_from_apollo:
|
|
@@ -938,11 +1163,20 @@ async def enrich_user_info_with_apollo(
|
|
|
938
1163
|
if not input_user_properties.get("summary_about_lead"):
|
|
939
1164
|
input_user_properties["summary_about_lead"] = person_data["headline"]
|
|
940
1165
|
|
|
941
|
-
# Derive location
|
|
942
|
-
city = person_data.get("city"
|
|
943
|
-
state = person_data.get("state"
|
|
944
|
-
|
|
945
|
-
|
|
1166
|
+
# Derive location (avoid literal "None")
|
|
1167
|
+
city = person_data.get("city")
|
|
1168
|
+
state = person_data.get("state")
|
|
1169
|
+
parts = []
|
|
1170
|
+
for value in (city, state):
|
|
1171
|
+
if value is None:
|
|
1172
|
+
continue
|
|
1173
|
+
s = str(value).strip()
|
|
1174
|
+
if not s or s.lower() == "none":
|
|
1175
|
+
continue
|
|
1176
|
+
parts.append(s)
|
|
1177
|
+
lead_location = ", ".join(parts)
|
|
1178
|
+
if lead_location:
|
|
1179
|
+
input_user_properties["lead_location"] = lead_location
|
|
946
1180
|
|
|
947
1181
|
# Verify name match
|
|
948
1182
|
first_matched = bool(
|
|
@@ -962,3 +1196,389 @@ async def enrich_user_info_with_apollo(
|
|
|
962
1196
|
input_user_properties["additional_properties"] = additional_props
|
|
963
1197
|
|
|
964
1198
|
return input_user_properties
|
|
1199
|
+
|
|
1200
|
+
|
|
1201
|
+
async def search_companies_with_apollo(
|
|
1202
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
1203
|
+
dynamic_payload: Optional[Dict[str, Any]] = None,
|
|
1204
|
+
) -> List[Dict[str, Any]]:
|
|
1205
|
+
"""
|
|
1206
|
+
Search for companies using Apollo's organizations/search endpoint.
|
|
1207
|
+
|
|
1208
|
+
Args:
|
|
1209
|
+
tool_config: Apollo API configuration
|
|
1210
|
+
dynamic_payload: Search parameters for the API call
|
|
1211
|
+
|
|
1212
|
+
Returns:
|
|
1213
|
+
List of company/organization dictionaries
|
|
1214
|
+
"""
|
|
1215
|
+
logger.info("Entering search_companies_with_apollo")
|
|
1216
|
+
|
|
1217
|
+
if not dynamic_payload:
|
|
1218
|
+
logger.warning("No payload given; returning empty result.")
|
|
1219
|
+
return []
|
|
1220
|
+
|
|
1221
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
1222
|
+
headers = {
|
|
1223
|
+
"Cache-Control": "no-cache",
|
|
1224
|
+
"Content-Type": "application/json",
|
|
1225
|
+
}
|
|
1226
|
+
if is_oauth:
|
|
1227
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
1228
|
+
else:
|
|
1229
|
+
headers["X-Api-Key"] = token
|
|
1230
|
+
|
|
1231
|
+
url = "https://api.apollo.io/api/v1/organizations/search"
|
|
1232
|
+
logger.info(f"Sending payload to Apollo organizations endpoint (single page): {json.dumps(dynamic_payload, indent=2)}")
|
|
1233
|
+
|
|
1234
|
+
async with aiohttp.ClientSession() as session:
|
|
1235
|
+
data = await fetch_apollo_data(session, url, headers, dynamic_payload)
|
|
1236
|
+
if not data:
|
|
1237
|
+
logger.error("No data returned from Apollo organizations search.")
|
|
1238
|
+
return []
|
|
1239
|
+
|
|
1240
|
+
organizations = data.get("organizations", [])
|
|
1241
|
+
accounts = data.get("accounts", []) # Apollo sometimes returns accounts as well
|
|
1242
|
+
return organizations + accounts
|
|
1243
|
+
|
|
1244
|
+
|
|
1245
|
+
def fill_in_company_properties(company_data: dict) -> dict:
|
|
1246
|
+
"""
|
|
1247
|
+
Convert Apollo company/organization data into a standardized format.
|
|
1248
|
+
|
|
1249
|
+
Args:
|
|
1250
|
+
company_data: Raw company data from Apollo API
|
|
1251
|
+
|
|
1252
|
+
Returns:
|
|
1253
|
+
Dictionary with standardized company properties
|
|
1254
|
+
"""
|
|
1255
|
+
company_properties = {}
|
|
1256
|
+
|
|
1257
|
+
# Basic company information
|
|
1258
|
+
company_properties["organization_name"] = company_data.get("name", "")
|
|
1259
|
+
company_properties["primary_domain"] = company_data.get("primary_domain", "")
|
|
1260
|
+
company_properties["website_url"] = company_data.get("website_url", "")
|
|
1261
|
+
company_properties["organization_linkedin_url"] = company_data.get("linkedin_url", "")
|
|
1262
|
+
|
|
1263
|
+
# Location information
|
|
1264
|
+
company_properties["organization_city"] = company_data.get("city", "")
|
|
1265
|
+
company_properties["organization_state"] = company_data.get("state", "")
|
|
1266
|
+
company_properties["organization_country"] = company_data.get("country", "")
|
|
1267
|
+
|
|
1268
|
+
# Create a combined location string
|
|
1269
|
+
location_parts = [
|
|
1270
|
+
company_data.get("city", ""),
|
|
1271
|
+
company_data.get("state", ""),
|
|
1272
|
+
company_data.get("country", "")
|
|
1273
|
+
]
|
|
1274
|
+
company_properties["organization_location"] = ", ".join([part for part in location_parts if part])
|
|
1275
|
+
|
|
1276
|
+
# Company size and financial info
|
|
1277
|
+
company_properties["employee_count"] = company_data.get("estimated_num_employees", 0)
|
|
1278
|
+
company_properties["annual_revenue"] = company_data.get("annual_revenue", 0)
|
|
1279
|
+
|
|
1280
|
+
# Industry and business info
|
|
1281
|
+
company_properties["industry"] = company_data.get("industry", "")
|
|
1282
|
+
company_properties["keywords"] = ", ".join(company_data.get("keywords", []))
|
|
1283
|
+
company_properties["description"] = company_data.get("description", "")
|
|
1284
|
+
|
|
1285
|
+
# Funding and growth
|
|
1286
|
+
company_properties["founded_year"] = company_data.get("founded_year", "")
|
|
1287
|
+
company_properties["funding_stage"] = company_data.get("latest_funding_stage", "")
|
|
1288
|
+
company_properties["total_funding"] = company_data.get("total_funding", 0)
|
|
1289
|
+
|
|
1290
|
+
# Technology stack
|
|
1291
|
+
tech_stack = company_data.get("technology_names", [])
|
|
1292
|
+
if tech_stack:
|
|
1293
|
+
company_properties["technology_stack"] = ", ".join(tech_stack)
|
|
1294
|
+
|
|
1295
|
+
# Apollo-specific IDs
|
|
1296
|
+
company_properties["apollo_organization_id"] = company_data.get("id", "")
|
|
1297
|
+
|
|
1298
|
+
# Additional metadata
|
|
1299
|
+
company_properties["phone"] = company_data.get("phone", "")
|
|
1300
|
+
company_properties["facebook_url"] = company_data.get("facebook_url", "")
|
|
1301
|
+
company_properties["twitter_url"] = company_data.get("twitter_url", "")
|
|
1302
|
+
|
|
1303
|
+
# Store raw data for reference
|
|
1304
|
+
company_properties["additional_properties"] = {
|
|
1305
|
+
"apollo_organization_data": json.dumps(cleanup_properties(company_data))
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
return company_properties
|
|
1309
|
+
|
|
1310
|
+
|
|
1311
|
+
@assistant_tool
|
|
1312
|
+
async def search_companies_with_apollo_page(
|
|
1313
|
+
query: CompanyQueryFilters,
|
|
1314
|
+
page: Optional[int] = 1,
|
|
1315
|
+
per_page: Optional[int] = 25,
|
|
1316
|
+
example_url: Optional[str] = None,
|
|
1317
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
1318
|
+
) -> Dict[str, Any]:
|
|
1319
|
+
"""
|
|
1320
|
+
Fetch a single page of Apollo companies using ``page`` and ``per_page``.
|
|
1321
|
+
|
|
1322
|
+
This helper performs one request to the Apollo API and returns the fetched
|
|
1323
|
+
companies along with comprehensive pagination metadata.
|
|
1324
|
+
|
|
1325
|
+
Args:
|
|
1326
|
+
query: CompanyQueryFilters object containing search criteria
|
|
1327
|
+
page: Page number to fetch (1-indexed, defaults to 1)
|
|
1328
|
+
per_page: Number of results per page (defaults to 25)
|
|
1329
|
+
example_url: Optional URL to parse search parameters from
|
|
1330
|
+
tool_config: Optional tool configuration for API keys
|
|
1331
|
+
|
|
1332
|
+
Returns:
|
|
1333
|
+
Dict containing:
|
|
1334
|
+
- current_page: The current page number
|
|
1335
|
+
- per_page: Number of results per page
|
|
1336
|
+
- total_entries: Total number of results available
|
|
1337
|
+
- total_pages: Total number of pages available
|
|
1338
|
+
- has_next_page: Boolean indicating if more pages exist
|
|
1339
|
+
- next_page: Next page number (None if no more pages)
|
|
1340
|
+
- results: List of company dictionaries for this page
|
|
1341
|
+
"""
|
|
1342
|
+
logger.info("Entering search_companies_with_apollo_page")
|
|
1343
|
+
|
|
1344
|
+
if example_url:
|
|
1345
|
+
parsed_url = urlparse(example_url)
|
|
1346
|
+
query_string = parsed_url.query
|
|
1347
|
+
|
|
1348
|
+
if not query_string and "?" in parsed_url.fragment:
|
|
1349
|
+
fragment_query = parsed_url.fragment.split("?", 1)[1]
|
|
1350
|
+
query_string = fragment_query
|
|
1351
|
+
|
|
1352
|
+
query_params = parse_qs(query_string)
|
|
1353
|
+
|
|
1354
|
+
dynamic_payload: Dict[str, Any] = {
|
|
1355
|
+
"page": page,
|
|
1356
|
+
"per_page": per_page,
|
|
1357
|
+
}
|
|
1358
|
+
|
|
1359
|
+
# Organization-specific URL parameter mapping
|
|
1360
|
+
mapping = {
|
|
1361
|
+
"organizationLocations": "organization_locations",
|
|
1362
|
+
"organizationNumEmployeesRanges": "organization_num_employees_ranges",
|
|
1363
|
+
"organizationIndustries": "organization_industries",
|
|
1364
|
+
"organizationIndustryTagIds": "organization_industry_tag_ids",
|
|
1365
|
+
"qKeywords": "q_keywords",
|
|
1366
|
+
"qOrganizationDomains": "q_organization_domains",
|
|
1367
|
+
"sortAscending": "sort_ascending",
|
|
1368
|
+
"sortByField": "sort_by_field",
|
|
1369
|
+
"organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
|
|
1370
|
+
"revenueRange[max]": "revenue_range_max",
|
|
1371
|
+
"revenueRange[min]": "revenue_range_min",
|
|
1372
|
+
"currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
|
|
1373
|
+
"organizationIds": "organization_ids",
|
|
1374
|
+
"notOrganizationIds": "not_organization_ids",
|
|
1375
|
+
"qOrganizationSearchListId": "q_organization_search_list_id",
|
|
1376
|
+
"qNotOrganizationSearchListId": "q_not_organization_search_list_id",
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
for raw_key, raw_value_list in query_params.items():
|
|
1380
|
+
if raw_key.endswith("[]"):
|
|
1381
|
+
key = raw_key[:-2]
|
|
1382
|
+
else:
|
|
1383
|
+
key = raw_key
|
|
1384
|
+
|
|
1385
|
+
if raw_key in mapping:
|
|
1386
|
+
key = mapping[raw_key]
|
|
1387
|
+
elif key in mapping:
|
|
1388
|
+
key = mapping[key]
|
|
1389
|
+
else:
|
|
1390
|
+
key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
|
|
1391
|
+
|
|
1392
|
+
if len(raw_value_list) == 1:
|
|
1393
|
+
final_value: Union[str, List[str]] = raw_value_list[0]
|
|
1394
|
+
else:
|
|
1395
|
+
final_value = raw_value_list
|
|
1396
|
+
|
|
1397
|
+
if key in ("sort_ascending",):
|
|
1398
|
+
val_lower = str(final_value).lower()
|
|
1399
|
+
final_value = val_lower in ("true", "1", "yes")
|
|
1400
|
+
|
|
1401
|
+
if key in ("page", "per_page", "revenue_range_min", "revenue_range_max"):
|
|
1402
|
+
try:
|
|
1403
|
+
final_value = int(final_value)
|
|
1404
|
+
except ValueError:
|
|
1405
|
+
pass
|
|
1406
|
+
|
|
1407
|
+
if key == "q_organization_keyword_tags":
|
|
1408
|
+
# Handle both string and list inputs, split by comma if string
|
|
1409
|
+
if isinstance(final_value, str):
|
|
1410
|
+
# Split by comma and strip whitespace
|
|
1411
|
+
final_value = [tag.strip() for tag in final_value.split(",") if tag.strip()]
|
|
1412
|
+
elif isinstance(final_value, list):
|
|
1413
|
+
# If it's already a list, flatten any comma-separated items
|
|
1414
|
+
flattened = []
|
|
1415
|
+
for item in final_value:
|
|
1416
|
+
if isinstance(item, str) and "," in item:
|
|
1417
|
+
flattened.extend([tag.strip() for tag in item.split(",") if tag.strip()])
|
|
1418
|
+
else:
|
|
1419
|
+
flattened.append(item)
|
|
1420
|
+
final_value = flattened
|
|
1421
|
+
|
|
1422
|
+
if raw_key.endswith("[]"):
|
|
1423
|
+
if isinstance(final_value, str):
|
|
1424
|
+
final_value = [final_value]
|
|
1425
|
+
else:
|
|
1426
|
+
if key in (
|
|
1427
|
+
"organization_locations",
|
|
1428
|
+
"organization_industries",
|
|
1429
|
+
"organization_industry_tag_ids",
|
|
1430
|
+
"q_organization_domains",
|
|
1431
|
+
"q_organization_keyword_tags",
|
|
1432
|
+
"organization_ids",
|
|
1433
|
+
"not_organization_ids",
|
|
1434
|
+
"organization_num_employees_ranges",
|
|
1435
|
+
"currently_using_any_of_technology_uids",
|
|
1436
|
+
"organization_latest_funding_stage_cd",
|
|
1437
|
+
):
|
|
1438
|
+
if isinstance(final_value, str):
|
|
1439
|
+
final_value = [final_value]
|
|
1440
|
+
|
|
1441
|
+
dynamic_payload[key] = final_value
|
|
1442
|
+
|
|
1443
|
+
if dynamic_payload.get("sort_by_field") == "[none]":
|
|
1444
|
+
dynamic_payload.pop("sort_by_field")
|
|
1445
|
+
|
|
1446
|
+
# -----------------------------------
|
|
1447
|
+
# B) No example_url -> build from `query`
|
|
1448
|
+
# -----------------------------------
|
|
1449
|
+
else:
|
|
1450
|
+
dynamic_payload = {}
|
|
1451
|
+
|
|
1452
|
+
# Only add fields if they have values (Apollo doesn't like empty arrays)
|
|
1453
|
+
if query.organization_locations:
|
|
1454
|
+
dynamic_payload["organization_locations"] = query.organization_locations
|
|
1455
|
+
if query.organization_industries:
|
|
1456
|
+
dynamic_payload["organization_industries"] = query.organization_industries
|
|
1457
|
+
if query.organization_industry_tag_ids:
|
|
1458
|
+
dynamic_payload["organization_industry_tag_ids"] = query.organization_industry_tag_ids
|
|
1459
|
+
|
|
1460
|
+
# Handle employee ranges
|
|
1461
|
+
employee_ranges = []
|
|
1462
|
+
if query.organization_num_employees_ranges:
|
|
1463
|
+
employee_ranges = query.organization_num_employees_ranges
|
|
1464
|
+
elif query.min_employees or query.max_employees:
|
|
1465
|
+
employee_ranges = [f"{query.min_employees or 1},{query.max_employees or 1000}"]
|
|
1466
|
+
|
|
1467
|
+
if employee_ranges:
|
|
1468
|
+
dynamic_payload["organization_num_employees_ranges"] = employee_ranges
|
|
1469
|
+
|
|
1470
|
+
# Add optional parameters only if they have values
|
|
1471
|
+
if query.q_keywords:
|
|
1472
|
+
# Split comma-separated keywords into an array for company search
|
|
1473
|
+
if isinstance(query.q_keywords, str):
|
|
1474
|
+
keyword_tags = [tag.strip() for tag in query.q_keywords.split(",") if tag.strip()]
|
|
1475
|
+
else:
|
|
1476
|
+
keyword_tags = query.q_keywords
|
|
1477
|
+
dynamic_payload["q_organization_keyword_tags"] = keyword_tags
|
|
1478
|
+
if query.q_organization_domains:
|
|
1479
|
+
dynamic_payload["q_organization_domains"] = query.q_organization_domains
|
|
1480
|
+
if query.revenue_range_min is not None:
|
|
1481
|
+
dynamic_payload["revenue_range_min"] = query.revenue_range_min
|
|
1482
|
+
if query.revenue_range_max is not None:
|
|
1483
|
+
dynamic_payload["revenue_range_max"] = query.revenue_range_max
|
|
1484
|
+
if query.organization_latest_funding_stage_cd:
|
|
1485
|
+
dynamic_payload["organization_latest_funding_stage_cd"] = query.organization_latest_funding_stage_cd
|
|
1486
|
+
if query.currently_using_any_of_technology_uids:
|
|
1487
|
+
dynamic_payload["currently_using_any_of_technology_uids"] = query.currently_using_any_of_technology_uids
|
|
1488
|
+
if query.organization_ids:
|
|
1489
|
+
dynamic_payload["organization_ids"] = query.organization_ids
|
|
1490
|
+
if query.not_organization_ids:
|
|
1491
|
+
dynamic_payload["not_organization_ids"] = query.not_organization_ids
|
|
1492
|
+
if query.q_organization_search_list_id:
|
|
1493
|
+
dynamic_payload["q_organization_search_list_id"] = query.q_organization_search_list_id
|
|
1494
|
+
if query.q_not_organization_search_list_id:
|
|
1495
|
+
dynamic_payload["q_not_organization_search_list_id"] = query.q_not_organization_search_list_id
|
|
1496
|
+
if query.sort_by_field is not None:
|
|
1497
|
+
dynamic_payload["sort_by_field"] = query.sort_by_field
|
|
1498
|
+
if query.sort_ascending is not None:
|
|
1499
|
+
dynamic_payload["sort_ascending"] = query.sort_ascending
|
|
1500
|
+
|
|
1501
|
+
# Remove sorting parameters that may not be supported by organizations endpoint
|
|
1502
|
+
if "sort_by_field" in dynamic_payload:
|
|
1503
|
+
dynamic_payload.pop("sort_by_field")
|
|
1504
|
+
if "sort_ascending" in dynamic_payload:
|
|
1505
|
+
dynamic_payload.pop("sort_ascending")
|
|
1506
|
+
|
|
1507
|
+
page_payload = dict(dynamic_payload)
|
|
1508
|
+
page_payload["page"] = page
|
|
1509
|
+
page_payload["per_page"] = per_page
|
|
1510
|
+
|
|
1511
|
+
# Clean up the payload - remove empty arrays and None values that Apollo doesn't like
|
|
1512
|
+
cleaned_payload = {}
|
|
1513
|
+
for key, value in page_payload.items():
|
|
1514
|
+
if value is not None:
|
|
1515
|
+
if isinstance(value, list):
|
|
1516
|
+
# Only include non-empty lists
|
|
1517
|
+
if value:
|
|
1518
|
+
cleaned_payload[key] = value
|
|
1519
|
+
else:
|
|
1520
|
+
cleaned_payload[key] = value
|
|
1521
|
+
|
|
1522
|
+
# Ensure page and per_page are always included
|
|
1523
|
+
cleaned_payload["page"] = page
|
|
1524
|
+
cleaned_payload["per_page"] = per_page
|
|
1525
|
+
|
|
1526
|
+
print(f"Fetching Apollo companies page {page} with per_page {per_page}..."
|
|
1527
|
+
f" Payload: {json.dumps(cleaned_payload, indent=2)}")
|
|
1528
|
+
|
|
1529
|
+
# Get the full Apollo API response with pagination metadata
|
|
1530
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
1531
|
+
headers = {
|
|
1532
|
+
"Cache-Control": "no-cache",
|
|
1533
|
+
"Content-Type": "application/json",
|
|
1534
|
+
}
|
|
1535
|
+
if is_oauth:
|
|
1536
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
1537
|
+
else:
|
|
1538
|
+
headers["X-Api-Key"] = token
|
|
1539
|
+
|
|
1540
|
+
url = "https://api.apollo.io/api/v1/organizations/search"
|
|
1541
|
+
|
|
1542
|
+
async with aiohttp.ClientSession() as session:
|
|
1543
|
+
apollo_response = await fetch_apollo_data(session, url, headers, cleaned_payload)
|
|
1544
|
+
if not apollo_response:
|
|
1545
|
+
return {
|
|
1546
|
+
"current_page": page,
|
|
1547
|
+
"per_page": per_page,
|
|
1548
|
+
"total_entries": 0,
|
|
1549
|
+
"total_pages": 0,
|
|
1550
|
+
"has_next_page": False,
|
|
1551
|
+
"results": []
|
|
1552
|
+
}
|
|
1553
|
+
|
|
1554
|
+
# Extract pagination metadata
|
|
1555
|
+
pagination = apollo_response.get("pagination", {})
|
|
1556
|
+
current_page = pagination.get("page", page)
|
|
1557
|
+
total_entries = pagination.get("total_entries", 0)
|
|
1558
|
+
total_pages = pagination.get("total_pages", 0)
|
|
1559
|
+
per_page_actual = pagination.get("per_page", per_page)
|
|
1560
|
+
|
|
1561
|
+
# Determine if there are more pages
|
|
1562
|
+
has_next_page = current_page < total_pages
|
|
1563
|
+
|
|
1564
|
+
# Extract organizations and accounts
|
|
1565
|
+
organizations = apollo_response.get("organizations", [])
|
|
1566
|
+
accounts = apollo_response.get("accounts", [])
|
|
1567
|
+
page_results = organizations + accounts
|
|
1568
|
+
|
|
1569
|
+
companies: List[Dict[str, Any]] = []
|
|
1570
|
+
for company_data in page_results:
|
|
1571
|
+
company_properties = fill_in_company_properties(company_data)
|
|
1572
|
+
companies.append(company_properties)
|
|
1573
|
+
|
|
1574
|
+
logger.info(f"Converted {len(companies)} Apollo company records into standardized dictionaries (single page mode). Page {current_page} of {total_pages}")
|
|
1575
|
+
|
|
1576
|
+
return {
|
|
1577
|
+
"current_page": current_page,
|
|
1578
|
+
"per_page": per_page_actual,
|
|
1579
|
+
"total_entries": total_entries,
|
|
1580
|
+
"total_pages": total_pages,
|
|
1581
|
+
"has_next_page": has_next_page,
|
|
1582
|
+
"next_page": current_page + 1 if has_next_page else None,
|
|
1583
|
+
"results": companies
|
|
1584
|
+
}
|