dhisana 0.0.1.dev243__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/__init__.py +1 -0
- dhisana/cli/__init__.py +1 -0
- dhisana/cli/cli.py +20 -0
- dhisana/cli/datasets.py +27 -0
- dhisana/cli/models.py +26 -0
- dhisana/cli/predictions.py +20 -0
- dhisana/schemas/__init__.py +1 -0
- dhisana/schemas/common.py +399 -0
- dhisana/schemas/sales.py +965 -0
- dhisana/ui/__init__.py +1 -0
- dhisana/ui/components.py +472 -0
- dhisana/utils/__init__.py +1 -0
- dhisana/utils/add_mapping.py +352 -0
- dhisana/utils/agent_tools.py +51 -0
- dhisana/utils/apollo_tools.py +1597 -0
- dhisana/utils/assistant_tool_tag.py +4 -0
- dhisana/utils/built_with_api_tools.py +282 -0
- dhisana/utils/cache_output_tools.py +98 -0
- dhisana/utils/cache_output_tools_local.py +78 -0
- dhisana/utils/check_email_validity_tools.py +717 -0
- dhisana/utils/check_for_intent_signal.py +107 -0
- dhisana/utils/check_linkedin_url_validity.py +209 -0
- dhisana/utils/clay_tools.py +43 -0
- dhisana/utils/clean_properties.py +135 -0
- dhisana/utils/company_utils.py +60 -0
- dhisana/utils/compose_salesnav_query.py +259 -0
- dhisana/utils/compose_search_query.py +759 -0
- dhisana/utils/compose_three_step_workflow.py +234 -0
- dhisana/utils/composite_tools.py +137 -0
- dhisana/utils/dataframe_tools.py +237 -0
- dhisana/utils/domain_parser.py +45 -0
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_parse_helpers.py +132 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +933 -0
- dhisana/utils/extract_email_content_for_llm.py +101 -0
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +426 -0
- dhisana/utils/g2_tools.py +104 -0
- dhisana/utils/generate_content.py +41 -0
- dhisana/utils/generate_custom_message.py +271 -0
- dhisana/utils/generate_email.py +278 -0
- dhisana/utils/generate_email_response.py +465 -0
- dhisana/utils/generate_flow.py +102 -0
- dhisana/utils/generate_leads_salesnav.py +303 -0
- dhisana/utils/generate_linkedin_connect_message.py +224 -0
- dhisana/utils/generate_linkedin_response_message.py +317 -0
- dhisana/utils/generate_structured_output_internal.py +462 -0
- dhisana/utils/google_custom_search.py +267 -0
- dhisana/utils/google_oauth_tools.py +727 -0
- dhisana/utils/google_workspace_tools.py +1294 -0
- dhisana/utils/hubspot_clearbit.py +96 -0
- dhisana/utils/hubspot_crm_tools.py +2440 -0
- dhisana/utils/instantly_tools.py +149 -0
- dhisana/utils/linkedin_crawler.py +168 -0
- dhisana/utils/lusha_tools.py +333 -0
- dhisana/utils/mailgun_tools.py +156 -0
- dhisana/utils/mailreach_tools.py +123 -0
- dhisana/utils/microsoft365_tools.py +455 -0
- dhisana/utils/openai_assistant_and_file_utils.py +267 -0
- dhisana/utils/openai_helpers.py +977 -0
- dhisana/utils/openapi_spec_to_tools.py +45 -0
- dhisana/utils/openapi_tool/__init__.py +1 -0
- dhisana/utils/openapi_tool/api_models.py +633 -0
- dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
- dhisana/utils/openapi_tool/openapi_tool.py +319 -0
- dhisana/utils/parse_linkedin_messages_txt.py +100 -0
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +1226 -0
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/python_function_to_tools.py +83 -0
- dhisana/utils/research_lead.py +176 -0
- dhisana/utils/sales_navigator_crawler.py +1103 -0
- dhisana/utils/salesforce_crm_tools.py +477 -0
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +162 -0
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +852 -0
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +582 -0
- dhisana/utils/test_connect.py +2087 -0
- dhisana/utils/trasform_json.py +173 -0
- dhisana/utils/web_download_parse_tools.py +189 -0
- dhisana/utils/workflow_code_model.py +5 -0
- dhisana/utils/zoominfo_tools.py +357 -0
- dhisana/workflow/__init__.py +1 -0
- dhisana/workflow/agent.py +18 -0
- dhisana/workflow/flow.py +44 -0
- dhisana/workflow/task.py +43 -0
- dhisana/workflow/test.py +90 -0
- dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
- dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
- dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
- dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
- dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1597 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import aiohttp
|
|
7
|
+
import backoff
|
|
8
|
+
|
|
9
|
+
from dhisana.schemas.sales import LeadsQueryFilters, CompanyQueryFilters
|
|
10
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
11
|
+
from urllib.parse import urlparse, parse_qs
|
|
12
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
13
|
+
|
|
14
|
+
from dhisana.utils.clean_properties import cleanup_properties
|
|
15
|
+
|
|
16
|
+
logging.basicConfig(level=logging.INFO)
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_apollo_access_token(tool_config: Optional[List[Dict]] = None) -> Tuple[str, bool]:
|
|
21
|
+
"""
|
|
22
|
+
Retrieves an Apollo access token from tool configuration or environment variables.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
tool_config (list): Optional tool configuration payload provided to the tool.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Tuple[str, bool]: A tuple containing the token string and a boolean flag indicating
|
|
29
|
+
whether the token represents an OAuth bearer token (``True``) or an API key (``False``).
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
ValueError: If the Apollo integration has not been configured.
|
|
33
|
+
"""
|
|
34
|
+
token: Optional[str] = None
|
|
35
|
+
is_oauth = False
|
|
36
|
+
|
|
37
|
+
if tool_config:
|
|
38
|
+
apollo_config = next(
|
|
39
|
+
(item for item in tool_config if item.get("name") == "apollo"), None
|
|
40
|
+
)
|
|
41
|
+
if apollo_config:
|
|
42
|
+
config_map = {
|
|
43
|
+
item["name"]: item.get("value")
|
|
44
|
+
for item in apollo_config.get("configuration", [])
|
|
45
|
+
if item
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
raw_oauth = config_map.get("oauth_tokens")
|
|
49
|
+
if isinstance(raw_oauth, str):
|
|
50
|
+
try:
|
|
51
|
+
raw_oauth = json.loads(raw_oauth)
|
|
52
|
+
except Exception:
|
|
53
|
+
raw_oauth = None
|
|
54
|
+
if isinstance(raw_oauth, dict):
|
|
55
|
+
token = (
|
|
56
|
+
raw_oauth.get("access_token")
|
|
57
|
+
or raw_oauth.get("token")
|
|
58
|
+
)
|
|
59
|
+
if token:
|
|
60
|
+
is_oauth = True
|
|
61
|
+
|
|
62
|
+
if not token:
|
|
63
|
+
direct_access_token = config_map.get("access_token")
|
|
64
|
+
if direct_access_token:
|
|
65
|
+
token = direct_access_token
|
|
66
|
+
is_oauth = True
|
|
67
|
+
|
|
68
|
+
if not token:
|
|
69
|
+
api_key = config_map.get("apiKey") or config_map.get("api_key")
|
|
70
|
+
if api_key:
|
|
71
|
+
token = api_key
|
|
72
|
+
is_oauth = False
|
|
73
|
+
else:
|
|
74
|
+
logger.warning("No 'apollo' config item found in tool_config.")
|
|
75
|
+
|
|
76
|
+
if not token:
|
|
77
|
+
env_oauth_token = os.getenv("APOLLO_ACCESS_TOKEN")
|
|
78
|
+
if env_oauth_token:
|
|
79
|
+
token = env_oauth_token
|
|
80
|
+
is_oauth = True
|
|
81
|
+
|
|
82
|
+
if not token:
|
|
83
|
+
env_api_key = os.getenv("APOLLO_API_KEY")
|
|
84
|
+
if env_api_key:
|
|
85
|
+
token = env_api_key
|
|
86
|
+
is_oauth = False
|
|
87
|
+
|
|
88
|
+
if not token:
|
|
89
|
+
logger.error("Apollo integration is not configured.")
|
|
90
|
+
raise ValueError(
|
|
91
|
+
"Apollo integration is not configured. Please configure the connection to Apollo in Integrations."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return token, is_oauth
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@assistant_tool
|
|
98
|
+
@backoff.on_exception(
|
|
99
|
+
backoff.expo,
|
|
100
|
+
aiohttp.ClientResponseError,
|
|
101
|
+
max_tries=2,
|
|
102
|
+
giveup=lambda e: e.status != 429,
|
|
103
|
+
factor=10,
|
|
104
|
+
)
|
|
105
|
+
async def enrich_person_info_from_apollo(
|
|
106
|
+
linkedin_url: Optional[str] = None,
|
|
107
|
+
email: Optional[str] = None,
|
|
108
|
+
phone: Optional[str] = None,
|
|
109
|
+
fetch_valid_phone_number: Optional[bool] = False,
|
|
110
|
+
tool_config: Optional[List[Dict]] = None,
|
|
111
|
+
) -> Dict[str, Any]:
|
|
112
|
+
"""
|
|
113
|
+
Fetch a person's details from Apollo using LinkedIn URL, email, or phone number.
|
|
114
|
+
|
|
115
|
+
Parameters:
|
|
116
|
+
- **linkedin_url** (*str*, optional): LinkedIn profile URL of the person.
|
|
117
|
+
- **email** (*str*, optional): Email address of the person.
|
|
118
|
+
- **phone** (*str*, optional): Phone number of the person.
|
|
119
|
+
- **fetch_valid_phone_number** (*bool*, optional): If True, include phone numbers in the API response. Defaults to False.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
- **dict**: JSON response containing person information.
|
|
123
|
+
"""
|
|
124
|
+
logger.info("Entering enrich_person_info_from_apollo")
|
|
125
|
+
|
|
126
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
127
|
+
|
|
128
|
+
if not linkedin_url and not email and not phone:
|
|
129
|
+
logger.warning("No linkedin_url, email, or phone provided. At least one is required.")
|
|
130
|
+
return {'error': "At least one of linkedin_url, email, or phone must be provided"}
|
|
131
|
+
|
|
132
|
+
headers = {"Content-Type": "application/json"}
|
|
133
|
+
if is_oauth:
|
|
134
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
135
|
+
else:
|
|
136
|
+
headers["X-Api-Key"] = token
|
|
137
|
+
|
|
138
|
+
data = {}
|
|
139
|
+
if linkedin_url:
|
|
140
|
+
logger.debug(f"LinkedIn URL provided: {linkedin_url}")
|
|
141
|
+
data['linkedin_url'] = linkedin_url
|
|
142
|
+
if email:
|
|
143
|
+
logger.debug(f"Email provided: {email}")
|
|
144
|
+
data['email'] = email
|
|
145
|
+
if phone:
|
|
146
|
+
logger.debug(f"Phone provided: {phone}")
|
|
147
|
+
data['phone_numbers'] = [phone] # Apollo expects a list for phone numbers
|
|
148
|
+
|
|
149
|
+
# Add reveal_phone_number parameter if fetch_valid_phone_number is True
|
|
150
|
+
if fetch_valid_phone_number:
|
|
151
|
+
logger.debug("fetch_valid_phone_number flag is True, including phone numbers in API response")
|
|
152
|
+
data['reveal_phone_number'] = True
|
|
153
|
+
|
|
154
|
+
url = 'https://api.apollo.io/api/v1/people/match'
|
|
155
|
+
|
|
156
|
+
async with aiohttp.ClientSession() as session:
|
|
157
|
+
try:
|
|
158
|
+
async with session.post(url, headers=headers, json=data) as response:
|
|
159
|
+
logger.debug(f"Received response status: {response.status}")
|
|
160
|
+
if response.status == 200:
|
|
161
|
+
result = await response.json()
|
|
162
|
+
logger.info("Successfully retrieved person info from Apollo.")
|
|
163
|
+
return result
|
|
164
|
+
elif response.status == 429:
|
|
165
|
+
msg = "Rate limit exceeded"
|
|
166
|
+
logger.warning(msg)
|
|
167
|
+
await asyncio.sleep(30)
|
|
168
|
+
raise aiohttp.ClientResponseError(
|
|
169
|
+
request_info=response.request_info,
|
|
170
|
+
history=response.history,
|
|
171
|
+
status=response.status,
|
|
172
|
+
message=msg,
|
|
173
|
+
headers=response.headers
|
|
174
|
+
)
|
|
175
|
+
else:
|
|
176
|
+
result = await response.json()
|
|
177
|
+
logger.warning(f"enrich_person_info_from_apollo error: {result}")
|
|
178
|
+
return {'error': result}
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.exception("Exception occurred while fetching person info from Apollo.")
|
|
181
|
+
return {'error': str(e)}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@backoff.on_exception(
|
|
185
|
+
backoff.expo,
|
|
186
|
+
aiohttp.ClientResponseError,
|
|
187
|
+
max_tries=2,
|
|
188
|
+
giveup=lambda e: e.status != 429,
|
|
189
|
+
factor=10,
|
|
190
|
+
)
|
|
191
|
+
async def lookup_person_in_apollo_by_name(
|
|
192
|
+
full_name: str,
|
|
193
|
+
company_name: Optional[str] = None,
|
|
194
|
+
tool_config: Optional[List[Dict]] = None,
|
|
195
|
+
) -> Dict[str, Any]:
|
|
196
|
+
"""
|
|
197
|
+
Fetch a person's details from Apollo using their full name and optionally company name.
|
|
198
|
+
|
|
199
|
+
Parameters:
|
|
200
|
+
- **full_name** (*str*): Full name of the person.
|
|
201
|
+
- **company_name** (*str*, optional): Name of the company where the person works.
|
|
202
|
+
- **tool_config** (*list*, optional): Tool configuration for API keys.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
- **dict**: JSON response containing person information.
|
|
206
|
+
"""
|
|
207
|
+
logger.info("Entering lookup_person_in_apollo_by_name")
|
|
208
|
+
|
|
209
|
+
if not full_name:
|
|
210
|
+
logger.warning("No full_name provided.")
|
|
211
|
+
return {'error': "Full name is required"}
|
|
212
|
+
|
|
213
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
214
|
+
headers = {"Content-Type": "application/json"}
|
|
215
|
+
if is_oauth:
|
|
216
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
217
|
+
else:
|
|
218
|
+
headers["X-Api-Key"] = token
|
|
219
|
+
|
|
220
|
+
# Construct the query payload
|
|
221
|
+
data = {
|
|
222
|
+
"q_keywords": f"{full_name} {company_name}" if company_name else full_name,
|
|
223
|
+
"page": 1,
|
|
224
|
+
"per_page": 10
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
url = 'https://api.apollo.io/api/v1/mixed_people/search'
|
|
228
|
+
logger.debug(f"Making request to Apollo with payload: {data}")
|
|
229
|
+
|
|
230
|
+
async with aiohttp.ClientSession() as session:
|
|
231
|
+
try:
|
|
232
|
+
async with session.post(url, headers=headers, json=data) as response:
|
|
233
|
+
logger.debug(f"Received response status: {response.status}")
|
|
234
|
+
if response.status == 200:
|
|
235
|
+
result = await response.json()
|
|
236
|
+
logger.info("Successfully looked up person by name on Apollo.")
|
|
237
|
+
return result
|
|
238
|
+
elif response.status == 429:
|
|
239
|
+
msg = "Rate limit exceeded"
|
|
240
|
+
logger.warning(msg)
|
|
241
|
+
await asyncio.sleep(30)
|
|
242
|
+
raise aiohttp.ClientResponseError(
|
|
243
|
+
request_info=response.request_info,
|
|
244
|
+
history=response.history,
|
|
245
|
+
status=response.status,
|
|
246
|
+
message=msg,
|
|
247
|
+
headers=response.headers
|
|
248
|
+
)
|
|
249
|
+
else:
|
|
250
|
+
result = await response.json()
|
|
251
|
+
logger.warning(f"lookup_person_in_apollo_by_name error: {result}")
|
|
252
|
+
return {'error': result}
|
|
253
|
+
except Exception as e:
|
|
254
|
+
logger.exception("Exception occurred while looking up person by name.")
|
|
255
|
+
return {'error': str(e)}
|
|
256
|
+
|
|
257
|
+
@assistant_tool
|
|
258
|
+
@backoff.on_exception(
|
|
259
|
+
backoff.expo,
|
|
260
|
+
aiohttp.ClientResponseError,
|
|
261
|
+
max_tries=2,
|
|
262
|
+
giveup=lambda e: e.status != 429,
|
|
263
|
+
factor=30,
|
|
264
|
+
)
|
|
265
|
+
async def enrich_organization_info_from_apollo(
|
|
266
|
+
organization_domain: Optional[str] = None,
|
|
267
|
+
tool_config: Optional[List[Dict]] = None,
|
|
268
|
+
) -> Dict[str, Any]:
|
|
269
|
+
"""
|
|
270
|
+
Fetch an organization's details from Apollo using the organization domain.
|
|
271
|
+
|
|
272
|
+
Parameters:
|
|
273
|
+
- **organization_domain** (*str*, optional): Domain of the organization.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
- **dict**: JSON response containing organization information.
|
|
277
|
+
"""
|
|
278
|
+
logger.info("Entering enrich_organization_info_from_apollo")
|
|
279
|
+
|
|
280
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
281
|
+
|
|
282
|
+
if not organization_domain:
|
|
283
|
+
logger.warning("No organization domain provided.")
|
|
284
|
+
return {'error': "organization domain must be provided"}
|
|
285
|
+
|
|
286
|
+
headers = {
|
|
287
|
+
"Content-Type": "application/json",
|
|
288
|
+
"Cache-Control": "no-cache",
|
|
289
|
+
"accept": "application/json"
|
|
290
|
+
}
|
|
291
|
+
if is_oauth:
|
|
292
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
293
|
+
else:
|
|
294
|
+
headers["X-Api-Key"] = token
|
|
295
|
+
|
|
296
|
+
url = f'https://api.apollo.io/api/v1/organizations/enrich?domain={organization_domain}'
|
|
297
|
+
logger.debug(f"Making GET request to Apollo for organization domain: {organization_domain}")
|
|
298
|
+
|
|
299
|
+
async with aiohttp.ClientSession() as session:
|
|
300
|
+
try:
|
|
301
|
+
async with session.get(url, headers=headers) as response:
|
|
302
|
+
logger.debug(f"Received response status: {response.status}")
|
|
303
|
+
if response.status == 200:
|
|
304
|
+
result = await response.json()
|
|
305
|
+
logger.info("Successfully retrieved organization info from Apollo.")
|
|
306
|
+
return result
|
|
307
|
+
elif response.status == 429:
|
|
308
|
+
msg = "Rate limit exceeded"
|
|
309
|
+
logger.warning(msg)
|
|
310
|
+
raise aiohttp.ClientResponseError(
|
|
311
|
+
request_info=response.request_info,
|
|
312
|
+
history=response.history,
|
|
313
|
+
status=response.status,
|
|
314
|
+
message=msg,
|
|
315
|
+
headers=response.headers
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
result = await response.json()
|
|
319
|
+
logger.warning(f"Error from Apollo while enriching org info: {result}")
|
|
320
|
+
return {'error': result}
|
|
321
|
+
except Exception as e:
|
|
322
|
+
logger.exception("Exception occurred while fetching organization info from Apollo.")
|
|
323
|
+
return {'error': str(e)}
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
@backoff.on_exception(
|
|
328
|
+
backoff.expo,
|
|
329
|
+
aiohttp.ClientResponseError,
|
|
330
|
+
max_tries=5,
|
|
331
|
+
giveup=lambda e: e.status != 429,
|
|
332
|
+
factor=2,
|
|
333
|
+
)
|
|
334
|
+
async def fetch_apollo_data(session, url: str, headers: Dict[str, str], payload: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
335
|
+
logger.info("Entering fetch_apollo_data")
|
|
336
|
+
logger.debug("Making POST request to Apollo.")
|
|
337
|
+
async with session.post(url, headers=headers, json=payload) as response:
|
|
338
|
+
logger.debug(f"Received response status: {response.status}")
|
|
339
|
+
if response.status == 200:
|
|
340
|
+
result = await response.json()
|
|
341
|
+
logger.info("Successfully fetched data from Apollo.")
|
|
342
|
+
return result
|
|
343
|
+
elif response.status == 429:
|
|
344
|
+
msg = "Rate limit exceeded"
|
|
345
|
+
logger.warning(msg)
|
|
346
|
+
raise aiohttp.ClientResponseError(
|
|
347
|
+
request_info=response.request_info,
|
|
348
|
+
history=response.history,
|
|
349
|
+
status=response.status,
|
|
350
|
+
message=msg,
|
|
351
|
+
headers=response.headers
|
|
352
|
+
)
|
|
353
|
+
else:
|
|
354
|
+
logger.error(f"Unexpected status code {response.status} from Apollo. Raising exception.")
|
|
355
|
+
response.raise_for_status()
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
async def search_people_with_apollo(
|
|
359
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
360
|
+
dynamic_payload: Optional[Dict[str, Any]] = None,
|
|
361
|
+
) -> List[Dict[str, Any]]:
|
|
362
|
+
logger.info("Entering search_people_with_apollo")
|
|
363
|
+
|
|
364
|
+
if not dynamic_payload:
|
|
365
|
+
logger.warning("No payload given; returning empty result.")
|
|
366
|
+
return []
|
|
367
|
+
|
|
368
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
369
|
+
headers = {
|
|
370
|
+
"Cache-Control": "no-cache",
|
|
371
|
+
"Content-Type": "application/json",
|
|
372
|
+
}
|
|
373
|
+
if is_oauth:
|
|
374
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
375
|
+
else:
|
|
376
|
+
headers["X-Api-Key"] = token
|
|
377
|
+
|
|
378
|
+
url = "https://api.apollo.io/api/v1/mixed_people/search"
|
|
379
|
+
logger.info(f"Sending payload to Apollo (single page): {json.dumps(dynamic_payload, indent=2)}")
|
|
380
|
+
|
|
381
|
+
async with aiohttp.ClientSession() as session:
|
|
382
|
+
data = await fetch_apollo_data(session, url, headers, dynamic_payload)
|
|
383
|
+
if not data:
|
|
384
|
+
logger.error("No data returned from Apollo.")
|
|
385
|
+
return []
|
|
386
|
+
|
|
387
|
+
people = data.get("people", [])
|
|
388
|
+
contacts = data.get("contacts", [])
|
|
389
|
+
return people + contacts
|
|
390
|
+
|
|
391
|
+
def fill_in_properties_with_preference(input_user_properties: dict, person_data: dict) -> dict:
|
|
392
|
+
"""
|
|
393
|
+
For each property:
|
|
394
|
+
- If input_user_properties already has a non-empty value, keep it.
|
|
395
|
+
- Otherwise, take the value from person_data if available.
|
|
396
|
+
"""
|
|
397
|
+
|
|
398
|
+
def is_empty(value):
|
|
399
|
+
"""Returns True if the value is None, empty string, or only whitespace."""
|
|
400
|
+
return value is None or (isinstance(value, str) and not value.strip())
|
|
401
|
+
|
|
402
|
+
# Full name
|
|
403
|
+
# Because `person_data.get("name")` has precedence over input_user_properties,
|
|
404
|
+
# we only update it if input_user_properties is empty/None for "full_name".
|
|
405
|
+
if is_empty(input_user_properties.get("full_name")) and person_data.get("name"):
|
|
406
|
+
input_user_properties["full_name"] = person_data["name"]
|
|
407
|
+
|
|
408
|
+
# First name
|
|
409
|
+
if is_empty(input_user_properties.get("first_name")) and person_data.get("first_name"):
|
|
410
|
+
input_user_properties["first_name"] = person_data["first_name"]
|
|
411
|
+
|
|
412
|
+
# Last name
|
|
413
|
+
if is_empty(input_user_properties.get("last_name")) and person_data.get("last_name"):
|
|
414
|
+
input_user_properties["last_name"] = person_data["last_name"]
|
|
415
|
+
|
|
416
|
+
# Email
|
|
417
|
+
if is_empty(input_user_properties.get("email")):
|
|
418
|
+
input_user_properties["email"] = person_data.get("email", "")
|
|
419
|
+
|
|
420
|
+
# Phone
|
|
421
|
+
if is_empty(input_user_properties.get("phone")):
|
|
422
|
+
# person_data["contact"] might not be defined, so we chain get calls
|
|
423
|
+
input_user_properties["phone"] = ((person_data.get("contact", {}) or {})
|
|
424
|
+
.get("sanitized_phone", ""))
|
|
425
|
+
|
|
426
|
+
# LinkedIn URL
|
|
427
|
+
if is_empty(input_user_properties.get("user_linkedin_url")) and person_data.get("linkedin_url"):
|
|
428
|
+
input_user_properties["user_linkedin_url"] = person_data["linkedin_url"]
|
|
429
|
+
|
|
430
|
+
# Organization data
|
|
431
|
+
org_data = person_data.get("organization") or {}
|
|
432
|
+
if org_data:
|
|
433
|
+
# Primary domain
|
|
434
|
+
if is_empty(input_user_properties.get("primary_domain_of_organization")) and org_data.get("primary_domain"):
|
|
435
|
+
input_user_properties["primary_domain_of_organization"] = org_data["primary_domain"]
|
|
436
|
+
|
|
437
|
+
# Organization name
|
|
438
|
+
if is_empty(input_user_properties.get("organization_name")) and org_data.get("name"):
|
|
439
|
+
input_user_properties["organization_name"] = org_data["name"]
|
|
440
|
+
|
|
441
|
+
# Organization LinkedIn URL
|
|
442
|
+
if is_empty(input_user_properties.get("organization_linkedin_url")) and org_data.get("linkedin_url"):
|
|
443
|
+
input_user_properties["organization_linkedin_url"] = org_data["linkedin_url"]
|
|
444
|
+
|
|
445
|
+
# Organization website
|
|
446
|
+
if is_empty(input_user_properties.get("organization_website")) and org_data.get("website_url"):
|
|
447
|
+
input_user_properties["organization_website"] = org_data["website_url"]
|
|
448
|
+
|
|
449
|
+
# Keywords
|
|
450
|
+
if is_empty(input_user_properties.get("keywords")) and org_data.get("keywords"):
|
|
451
|
+
input_user_properties["keywords"] = ", ".join(org_data["keywords"])
|
|
452
|
+
|
|
453
|
+
# Title / Job Title
|
|
454
|
+
if is_empty(input_user_properties.get("job_title")) and person_data.get("title"):
|
|
455
|
+
input_user_properties["job_title"] = person_data["title"]
|
|
456
|
+
|
|
457
|
+
# Headline
|
|
458
|
+
if is_empty(input_user_properties.get("headline")) and person_data.get("headline"):
|
|
459
|
+
input_user_properties["headline"] = person_data["headline"]
|
|
460
|
+
|
|
461
|
+
# Summary about lead (fallback to headline if summary is missing, or if none set yet)
|
|
462
|
+
if is_empty(input_user_properties.get("summary_about_lead")) and person_data.get("headline"):
|
|
463
|
+
input_user_properties["summary_about_lead"] = person_data["headline"]
|
|
464
|
+
|
|
465
|
+
# City/State -> lead_location (avoid literal "None")
|
|
466
|
+
city = person_data.get("city")
|
|
467
|
+
state = person_data.get("state")
|
|
468
|
+
parts = []
|
|
469
|
+
for value in (city, state):
|
|
470
|
+
if value is None:
|
|
471
|
+
continue
|
|
472
|
+
s = str(value).strip()
|
|
473
|
+
if not s or s.lower() == "none":
|
|
474
|
+
continue
|
|
475
|
+
parts.append(s)
|
|
476
|
+
lead_location = ", ".join(parts) if parts else None
|
|
477
|
+
if is_empty(input_user_properties.get("lead_location")) and lead_location:
|
|
478
|
+
input_user_properties["lead_location"] = lead_location
|
|
479
|
+
|
|
480
|
+
# Filter out placeholder emails
|
|
481
|
+
if input_user_properties.get("email") and "domain.com" in input_user_properties["email"].lower():
|
|
482
|
+
input_user_properties["email"] = ""
|
|
483
|
+
|
|
484
|
+
return input_user_properties
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
async def search_leads_with_apollo(
|
|
488
|
+
query: LeadsQueryFilters,
|
|
489
|
+
max_items_to_search: Optional[int] = 10,
|
|
490
|
+
example_url: Optional[str] = None,
|
|
491
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
492
|
+
) -> List[Dict]:
|
|
493
|
+
logger.info("Entering search_leads_with_apollo")
|
|
494
|
+
|
|
495
|
+
max_items = max_items_to_search or 10
|
|
496
|
+
if max_items > 2500:
|
|
497
|
+
logger.warning("Requested max_items_to_search > 2000, overriding to 2000.")
|
|
498
|
+
max_items = 2500
|
|
499
|
+
|
|
500
|
+
# -----------------------------
|
|
501
|
+
# A) example_url -> parse query
|
|
502
|
+
# -----------------------------
|
|
503
|
+
if example_url:
|
|
504
|
+
logger.debug(f"example_url provided: {example_url}")
|
|
505
|
+
|
|
506
|
+
parsed_url = urlparse(example_url)
|
|
507
|
+
query_string = parsed_url.query
|
|
508
|
+
|
|
509
|
+
if not query_string and "?" in parsed_url.fragment:
|
|
510
|
+
fragment_query = parsed_url.fragment.split("?", 1)[1]
|
|
511
|
+
query_string = fragment_query
|
|
512
|
+
|
|
513
|
+
query_params = parse_qs(query_string)
|
|
514
|
+
|
|
515
|
+
page_list = query_params.get("page", ["1"])
|
|
516
|
+
per_page_list = query_params.get("per_page", ["100"])
|
|
517
|
+
|
|
518
|
+
try:
|
|
519
|
+
page_val = int(page_list[-1])
|
|
520
|
+
except ValueError:
|
|
521
|
+
page_val = 1
|
|
522
|
+
|
|
523
|
+
try:
|
|
524
|
+
per_page_val = int(per_page_list[-1])
|
|
525
|
+
except ValueError:
|
|
526
|
+
per_page_val = min(max_items, 100)
|
|
527
|
+
|
|
528
|
+
dynamic_payload: Dict[str, Any] = {
|
|
529
|
+
"page": page_val,
|
|
530
|
+
"per_page": per_page_val,
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
# You can augment this mapping if you have more custom fields
|
|
534
|
+
mapping = {
|
|
535
|
+
"personLocations": "person_locations",
|
|
536
|
+
"organizationNumEmployeesRanges": "organization_num_employees_ranges",
|
|
537
|
+
"personTitles": "person_titles",
|
|
538
|
+
# Important: handle personNotTitles as well
|
|
539
|
+
"personNotTitles": "person_not_titles",
|
|
540
|
+
|
|
541
|
+
"qOrganizationJobTitles": "q_organization_job_titles",
|
|
542
|
+
"sortAscending": "sort_ascending",
|
|
543
|
+
"sortByField": "sort_by_field",
|
|
544
|
+
"contactEmailStatusV2": "contact_email_status",
|
|
545
|
+
"searchSignalIds": "search_signal_ids",
|
|
546
|
+
"organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
|
|
547
|
+
"revenueRange[max]": "revenue_range_max",
|
|
548
|
+
"revenueRange[min]": "revenue_range_min",
|
|
549
|
+
"currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
|
|
550
|
+
"organizationIndustryTagIds": "organization_industry_tag_ids",
|
|
551
|
+
"notOrganizationIds": "not_organization_ids",
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
for raw_key, raw_value_list in query_params.items():
|
|
555
|
+
# Strip off [] if present so we can do a snake_case transform
|
|
556
|
+
if raw_key.endswith("[]"):
|
|
557
|
+
key = raw_key[:-2]
|
|
558
|
+
else:
|
|
559
|
+
key = raw_key
|
|
560
|
+
|
|
561
|
+
# If the mapping has this raw_key or the stripped key, use it:
|
|
562
|
+
if raw_key in mapping:
|
|
563
|
+
key = mapping[raw_key]
|
|
564
|
+
elif key in mapping:
|
|
565
|
+
key = mapping[key]
|
|
566
|
+
else:
|
|
567
|
+
# fallback: convert camelCase -> snake_case
|
|
568
|
+
key = re.sub(r'(?<!^)(?=[A-Z])', '_', key).lower()
|
|
569
|
+
|
|
570
|
+
# If there's only one item, let's pull it out as a single str
|
|
571
|
+
# otherwise, keep it a list
|
|
572
|
+
if len(raw_value_list) == 1:
|
|
573
|
+
final_value: Union[str, List[str]] = raw_value_list[0]
|
|
574
|
+
else:
|
|
575
|
+
final_value = raw_value_list
|
|
576
|
+
|
|
577
|
+
# Known booleans
|
|
578
|
+
if key in ("sort_ascending",):
|
|
579
|
+
val_lower = str(final_value).lower()
|
|
580
|
+
final_value = val_lower in ("true", "1", "yes")
|
|
581
|
+
|
|
582
|
+
# Parse numeric fields
|
|
583
|
+
if key in ("page", "per_page"):
|
|
584
|
+
try:
|
|
585
|
+
final_value = int(final_value)
|
|
586
|
+
except ValueError:
|
|
587
|
+
pass
|
|
588
|
+
|
|
589
|
+
# Join arrays for q_keywords
|
|
590
|
+
if key == "q_keywords" and isinstance(final_value, list):
|
|
591
|
+
final_value = " ".join(final_value)
|
|
592
|
+
|
|
593
|
+
# ---------------------------------------------
|
|
594
|
+
# Force any param that originated from `[]` to
|
|
595
|
+
# be a list, even if there's only one value.
|
|
596
|
+
# Or handle known array-likely parameters:
|
|
597
|
+
# ---------------------------------------------
|
|
598
|
+
if raw_key.endswith("[]"):
|
|
599
|
+
# Guaranteed to treat it as a list
|
|
600
|
+
if isinstance(final_value, str):
|
|
601
|
+
final_value = [final_value]
|
|
602
|
+
else:
|
|
603
|
+
# Or if we have a known array param
|
|
604
|
+
if key in (
|
|
605
|
+
"person_locations",
|
|
606
|
+
"person_titles",
|
|
607
|
+
"person_seniorities",
|
|
608
|
+
"organization_locations",
|
|
609
|
+
"q_organization_domains",
|
|
610
|
+
"contact_email_status",
|
|
611
|
+
"organization_ids",
|
|
612
|
+
"organization_num_employees_ranges",
|
|
613
|
+
"person_not_titles", # <--- added so single item is forced into list
|
|
614
|
+
"q_organization_job_titles",
|
|
615
|
+
"organization_latest_funding_stage_cd",
|
|
616
|
+
):
|
|
617
|
+
if isinstance(final_value, str):
|
|
618
|
+
final_value = [final_value]
|
|
619
|
+
|
|
620
|
+
dynamic_payload[key] = final_value
|
|
621
|
+
|
|
622
|
+
# Remove invalid sort
|
|
623
|
+
if dynamic_payload.get("sort_by_field") == "[none]":
|
|
624
|
+
dynamic_payload.pop("sort_by_field")
|
|
625
|
+
|
|
626
|
+
if "per_page" not in query_params:
|
|
627
|
+
dynamic_payload["per_page"] = min(max_items, 100)
|
|
628
|
+
|
|
629
|
+
# -----------------------------------
|
|
630
|
+
# B) No example_url -> build from `query`
|
|
631
|
+
# -----------------------------------
|
|
632
|
+
else:
|
|
633
|
+
dynamic_payload = {
|
|
634
|
+
"person_titles": query.person_current_titles or [],
|
|
635
|
+
"person_locations": query.person_locations or [],
|
|
636
|
+
"search_signal_ids": query.filter_by_signals or [],
|
|
637
|
+
"q_keywords": query.search_keywords or "",
|
|
638
|
+
"organization_num_employees_ranges": (
|
|
639
|
+
query.organization_num_employees_ranges
|
|
640
|
+
or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
|
|
641
|
+
),
|
|
642
|
+
"page": 1,
|
|
643
|
+
"per_page": min(max_items, 100),
|
|
644
|
+
}
|
|
645
|
+
if query.job_openings_with_titles:
|
|
646
|
+
dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
|
|
647
|
+
if query.latest_funding_stages:
|
|
648
|
+
dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
|
|
649
|
+
if query.sort_by_field is not None:
|
|
650
|
+
dynamic_payload["sort_by_field"] = query.sort_by_field
|
|
651
|
+
if query.sort_ascending is not None:
|
|
652
|
+
dynamic_payload["sort_ascending"] = query.sort_ascending
|
|
653
|
+
if query.person_seniorities:
|
|
654
|
+
dynamic_payload["person_seniorities"] = query.person_seniorities
|
|
655
|
+
|
|
656
|
+
# -----------------------------
|
|
657
|
+
# C) Fetch multiple pages
|
|
658
|
+
# -----------------------------
|
|
659
|
+
all_people: List[Dict[str, Any]] = []
|
|
660
|
+
total_fetched = 0
|
|
661
|
+
|
|
662
|
+
current_page = int(dynamic_payload.get("page", 1))
|
|
663
|
+
per_page = int(dynamic_payload.get("per_page", min(max_items, 100)))
|
|
664
|
+
|
|
665
|
+
while total_fetched < max_items:
|
|
666
|
+
page_payload = dict(dynamic_payload)
|
|
667
|
+
page_payload["page"] = current_page
|
|
668
|
+
page_payload["per_page"] = per_page
|
|
669
|
+
|
|
670
|
+
logger.debug(f"Fetching page {current_page}, per_page {per_page}")
|
|
671
|
+
page_results = await search_people_with_apollo(tool_config=tool_config, dynamic_payload=page_payload)
|
|
672
|
+
|
|
673
|
+
if not page_results:
|
|
674
|
+
break
|
|
675
|
+
|
|
676
|
+
all_people.extend(page_results)
|
|
677
|
+
page_count = len(page_results)
|
|
678
|
+
total_fetched += page_count
|
|
679
|
+
|
|
680
|
+
if page_count < per_page or total_fetched >= max_items:
|
|
681
|
+
break
|
|
682
|
+
|
|
683
|
+
current_page += 1
|
|
684
|
+
|
|
685
|
+
logger.info(f"Fetched a total of {len(all_people)} items from Apollo (across pages).")
|
|
686
|
+
|
|
687
|
+
# -----------------------------------------------
|
|
688
|
+
# Convert raw results -> dictionary objects
|
|
689
|
+
# -----------------------------------------------
|
|
690
|
+
leads: List[Dict[str, Any]] = []
|
|
691
|
+
for user_data_from_apollo in all_people:
|
|
692
|
+
person_data = user_data_from_apollo
|
|
693
|
+
|
|
694
|
+
input_user_properties: Dict[str, Any] = {}
|
|
695
|
+
|
|
696
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
697
|
+
input_user_properties = fill_in_properties_with_preference(input_user_properties, person_data)
|
|
698
|
+
|
|
699
|
+
person_data = cleanup_properties(person_data)
|
|
700
|
+
|
|
701
|
+
additional_props["apollo_person_data"] = json.dumps(person_data)
|
|
702
|
+
input_user_properties["additional_properties"] = additional_props
|
|
703
|
+
|
|
704
|
+
leads.append(input_user_properties)
|
|
705
|
+
|
|
706
|
+
logger.info(f"Converted {len(leads)} Apollo records into dictionaries.")
|
|
707
|
+
return leads
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
async def search_leads_with_apollo_page(
|
|
711
|
+
query: LeadsQueryFilters,
|
|
712
|
+
page: Optional[int] = 1,
|
|
713
|
+
per_page: Optional[int] = 25,
|
|
714
|
+
example_url: Optional[str] = None,
|
|
715
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
716
|
+
) -> Dict[str, Any]:
|
|
717
|
+
"""Fetch a single page of Apollo leads using ``page`` and ``per_page``.
|
|
718
|
+
|
|
719
|
+
This helper performs one request to the Apollo API and returns the fetched
|
|
720
|
+
leads along with comprehensive pagination metadata.
|
|
721
|
+
|
|
722
|
+
Args:
|
|
723
|
+
query: LeadsQueryFilters object containing search criteria
|
|
724
|
+
page: Page number to fetch (1-indexed, defaults to 1)
|
|
725
|
+
per_page: Number of results per page (defaults to 25)
|
|
726
|
+
example_url: Optional URL to parse search parameters from
|
|
727
|
+
tool_config: Optional tool configuration for API keys
|
|
728
|
+
|
|
729
|
+
Returns:
|
|
730
|
+
Dict containing:
|
|
731
|
+
- current_page: The current page number
|
|
732
|
+
- per_page: Number of results per page
|
|
733
|
+
- total_entries: Total number of results available
|
|
734
|
+
- total_pages: Total number of pages available
|
|
735
|
+
- has_next_page: Boolean indicating if more pages exist
|
|
736
|
+
- next_page: Next page number (None if no more pages)
|
|
737
|
+
- results: List of lead dictionaries for this page
|
|
738
|
+
"""
|
|
739
|
+
logger.info("Entering search_leads_with_apollo_page")
|
|
740
|
+
|
|
741
|
+
if example_url:
|
|
742
|
+
parsed_url = urlparse(example_url)
|
|
743
|
+
query_string = parsed_url.query
|
|
744
|
+
|
|
745
|
+
if not query_string and "?" in parsed_url.fragment:
|
|
746
|
+
fragment_query = parsed_url.fragment.split("?", 1)[1]
|
|
747
|
+
query_string = fragment_query
|
|
748
|
+
|
|
749
|
+
query_params = parse_qs(query_string)
|
|
750
|
+
|
|
751
|
+
dynamic_payload: Dict[str, Any] = {
|
|
752
|
+
"page": page,
|
|
753
|
+
"per_page": per_page,
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
mapping = {
|
|
757
|
+
"personLocations": "person_locations",
|
|
758
|
+
"organizationNumEmployeesRanges": "organization_num_employees_ranges",
|
|
759
|
+
"personTitles": "person_titles",
|
|
760
|
+
"personNotTitles": "person_not_titles",
|
|
761
|
+
"qOrganizationJobTitles": "q_organization_job_titles",
|
|
762
|
+
"sortAscending": "sort_ascending",
|
|
763
|
+
"sortByField": "sort_by_field",
|
|
764
|
+
"contactEmailStatusV2": "contact_email_status",
|
|
765
|
+
"searchSignalIds": "search_signal_ids",
|
|
766
|
+
"organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
|
|
767
|
+
"revenueRange[max]": "revenue_range_max",
|
|
768
|
+
"revenueRange[min]": "revenue_range_min",
|
|
769
|
+
"currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
|
|
770
|
+
"organizationIndustryTagIds": "organization_industry_tag_ids",
|
|
771
|
+
"notOrganizationIds": "not_organization_ids",
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
for raw_key, raw_value_list in query_params.items():
|
|
775
|
+
if raw_key.endswith("[]"):
|
|
776
|
+
key = raw_key[:-2]
|
|
777
|
+
else:
|
|
778
|
+
key = raw_key
|
|
779
|
+
|
|
780
|
+
if raw_key in mapping:
|
|
781
|
+
key = mapping[raw_key]
|
|
782
|
+
elif key in mapping:
|
|
783
|
+
key = mapping[key]
|
|
784
|
+
else:
|
|
785
|
+
key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
|
|
786
|
+
|
|
787
|
+
if len(raw_value_list) == 1:
|
|
788
|
+
final_value: Union[str, List[str]] = raw_value_list[0]
|
|
789
|
+
else:
|
|
790
|
+
final_value = raw_value_list
|
|
791
|
+
|
|
792
|
+
if key in ("sort_ascending",):
|
|
793
|
+
val_lower = str(final_value).lower()
|
|
794
|
+
final_value = val_lower in ("true", "1", "yes")
|
|
795
|
+
|
|
796
|
+
if key in ("page", "per_page"):
|
|
797
|
+
try:
|
|
798
|
+
final_value = int(final_value)
|
|
799
|
+
except ValueError:
|
|
800
|
+
pass
|
|
801
|
+
|
|
802
|
+
if key == "q_keywords" and isinstance(final_value, list):
|
|
803
|
+
final_value = " ".join(final_value)
|
|
804
|
+
|
|
805
|
+
if raw_key.endswith("[]"):
|
|
806
|
+
if isinstance(final_value, str):
|
|
807
|
+
final_value = [final_value]
|
|
808
|
+
else:
|
|
809
|
+
if key in (
|
|
810
|
+
"person_locations",
|
|
811
|
+
"person_titles",
|
|
812
|
+
"person_seniorities",
|
|
813
|
+
"organization_locations",
|
|
814
|
+
"q_organization_domains",
|
|
815
|
+
"contact_email_status",
|
|
816
|
+
"organization_ids",
|
|
817
|
+
"organization_num_employees_ranges",
|
|
818
|
+
"person_not_titles",
|
|
819
|
+
"q_organization_job_titles",
|
|
820
|
+
"organization_latest_funding_stage_cd",
|
|
821
|
+
):
|
|
822
|
+
if isinstance(final_value, str):
|
|
823
|
+
final_value = [final_value]
|
|
824
|
+
|
|
825
|
+
dynamic_payload[key] = final_value
|
|
826
|
+
|
|
827
|
+
if dynamic_payload.get("sort_by_field") == "[none]":
|
|
828
|
+
dynamic_payload.pop("sort_by_field")
|
|
829
|
+
|
|
830
|
+
# -----------------------------------
|
|
831
|
+
# B) No example_url -> build from `query`
|
|
832
|
+
# -----------------------------------
|
|
833
|
+
else:
|
|
834
|
+
dynamic_payload = {
|
|
835
|
+
"person_titles": query.person_current_titles or [],
|
|
836
|
+
"person_locations": query.person_locations or [],
|
|
837
|
+
"search_signal_ids": query.filter_by_signals or [],
|
|
838
|
+
"q_keywords": query.search_keywords or "",
|
|
839
|
+
"organization_num_employees_ranges": (
|
|
840
|
+
query.organization_num_employees_ranges
|
|
841
|
+
or [f"{query.min_employees_in_organization or 1},{query.max_employees_in_organization or 1000}"]
|
|
842
|
+
),
|
|
843
|
+
}
|
|
844
|
+
if query.job_openings_with_titles:
|
|
845
|
+
dynamic_payload["q_organization_job_titles"] = query.job_openings_with_titles
|
|
846
|
+
if query.latest_funding_stages:
|
|
847
|
+
dynamic_payload["organization_latest_funding_stage_cd"] = query.latest_funding_stages
|
|
848
|
+
if query.sort_by_field is not None:
|
|
849
|
+
dynamic_payload["sort_by_field"] = query.sort_by_field
|
|
850
|
+
if query.sort_ascending is not None:
|
|
851
|
+
dynamic_payload["sort_ascending"] = query.sort_ascending
|
|
852
|
+
if query.q_organization_keyword_tags:
|
|
853
|
+
dynamic_payload["q_organization_keyword_tags"] = query.q_organization_keyword_tags
|
|
854
|
+
|
|
855
|
+
if query.q_not_organization_keyword_tags:
|
|
856
|
+
dynamic_payload["q_not_organization_keyword_tags"] = query.q_not_organization_keyword_tags
|
|
857
|
+
|
|
858
|
+
page_payload = dict(dynamic_payload)
|
|
859
|
+
page_payload["page"] = page
|
|
860
|
+
page_payload["per_page"] = per_page
|
|
861
|
+
|
|
862
|
+
print(f"Fetching Apollo page {page} with per_page {per_page}..."
|
|
863
|
+
f" Payload: {json.dumps(page_payload, indent=2)}")
|
|
864
|
+
|
|
865
|
+
# Get the full Apollo API response with pagination metadata
|
|
866
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
867
|
+
headers = {
|
|
868
|
+
"Cache-Control": "no-cache",
|
|
869
|
+
"Content-Type": "application/json",
|
|
870
|
+
}
|
|
871
|
+
if is_oauth:
|
|
872
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
873
|
+
else:
|
|
874
|
+
headers["X-Api-Key"] = token
|
|
875
|
+
|
|
876
|
+
url = "https://api.apollo.io/api/v1/mixed_people/search"
|
|
877
|
+
|
|
878
|
+
async with aiohttp.ClientSession() as session:
|
|
879
|
+
apollo_response = await fetch_apollo_data(session, url, headers, page_payload)
|
|
880
|
+
if not apollo_response:
|
|
881
|
+
return {"current_page": page, "per_page": per_page, "total_entries": 0, "total_pages": 0, "has_next_page": False, "results": []}
|
|
882
|
+
|
|
883
|
+
# Extract pagination metadata
|
|
884
|
+
pagination = apollo_response.get("pagination", {})
|
|
885
|
+
current_page = pagination.get("page", page)
|
|
886
|
+
total_entries = pagination.get("total_entries", 0)
|
|
887
|
+
total_pages = pagination.get("total_pages", 0)
|
|
888
|
+
per_page_actual = pagination.get("per_page", per_page)
|
|
889
|
+
|
|
890
|
+
# Determine if there are more pages
|
|
891
|
+
has_next_page = current_page < total_pages
|
|
892
|
+
|
|
893
|
+
# Extract people and contacts
|
|
894
|
+
people = apollo_response.get("people", [])
|
|
895
|
+
contacts = apollo_response.get("contacts", [])
|
|
896
|
+
page_results = people + contacts
|
|
897
|
+
|
|
898
|
+
leads: List[Dict[str, Any]] = []
|
|
899
|
+
for person_data in page_results:
|
|
900
|
+
input_user_properties: Dict[str, Any] = {}
|
|
901
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
902
|
+
input_user_properties = fill_in_properties_with_preference(input_user_properties, person_data)
|
|
903
|
+
person_data = cleanup_properties(person_data)
|
|
904
|
+
additional_props["apollo_person_data"] = json.dumps(person_data)
|
|
905
|
+
input_user_properties["additional_properties"] = additional_props
|
|
906
|
+
leads.append(input_user_properties)
|
|
907
|
+
|
|
908
|
+
logger.info(f"Converted {len(leads)} Apollo records into dictionaries (single page mode). Page {current_page} of {total_pages}")
|
|
909
|
+
|
|
910
|
+
return {
|
|
911
|
+
"current_page": current_page,
|
|
912
|
+
"per_page": per_page_actual,
|
|
913
|
+
"total_entries": total_entries,
|
|
914
|
+
"total_pages": total_pages,
|
|
915
|
+
"has_next_page": has_next_page,
|
|
916
|
+
"next_page": current_page + 1 if has_next_page else None,
|
|
917
|
+
"results": leads
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
@assistant_tool
|
|
921
|
+
async def get_organization_domain_from_apollo(
|
|
922
|
+
organization_id: str,
|
|
923
|
+
tool_config: Optional[List[Dict]] = None
|
|
924
|
+
) -> Dict[str, Any]:
|
|
925
|
+
"""
|
|
926
|
+
Fetch an organization's domain from Apollo using the organization ID.
|
|
927
|
+
|
|
928
|
+
Parameters:
|
|
929
|
+
- organization_id (str): ID of the organization.
|
|
930
|
+
|
|
931
|
+
Returns:
|
|
932
|
+
- dict: Contains the organization's ID and domain, or an error message.
|
|
933
|
+
"""
|
|
934
|
+
logger.info("Entering get_organization_domain_from_apollo")
|
|
935
|
+
|
|
936
|
+
if not organization_id:
|
|
937
|
+
logger.warning("No organization_id provided.")
|
|
938
|
+
return {'error': 'organization_id must be provided'}
|
|
939
|
+
|
|
940
|
+
try:
|
|
941
|
+
result = await get_organization_details_from_apollo(organization_id, tool_config=tool_config)
|
|
942
|
+
if 'error' in result:
|
|
943
|
+
return result
|
|
944
|
+
domain = result.get('primary_domain')
|
|
945
|
+
if domain:
|
|
946
|
+
logger.info("Successfully retrieved domain from Apollo organization details.")
|
|
947
|
+
return {'organization_id': organization_id, 'domain': domain}
|
|
948
|
+
else:
|
|
949
|
+
logger.warning("Domain not found in the organization details.")
|
|
950
|
+
return {'error': 'Domain not found in the organization details'}
|
|
951
|
+
except Exception as e:
|
|
952
|
+
logger.exception("Exception occurred in get_organization_domain_from_apollo.")
|
|
953
|
+
return {'error': str(e)}
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
@assistant_tool
|
|
957
|
+
@backoff.on_exception(
|
|
958
|
+
backoff.expo,
|
|
959
|
+
aiohttp.ClientResponseError,
|
|
960
|
+
max_tries=3,
|
|
961
|
+
giveup=lambda e: e.status != 429,
|
|
962
|
+
factor=60,
|
|
963
|
+
)
|
|
964
|
+
async def get_organization_details_from_apollo(
|
|
965
|
+
organization_id: str,
|
|
966
|
+
tool_config: Optional[List[Dict]] = None,
|
|
967
|
+
) -> Dict[str, Any]:
|
|
968
|
+
"""
|
|
969
|
+
Fetch an organization's details from Apollo using the organization ID.
|
|
970
|
+
|
|
971
|
+
Parameters:
|
|
972
|
+
- organization_id (str): ID of the organization.
|
|
973
|
+
|
|
974
|
+
Returns:
|
|
975
|
+
- dict: Organization details or an error message.
|
|
976
|
+
"""
|
|
977
|
+
logger.info("Entering get_organization_details_from_apollo")
|
|
978
|
+
|
|
979
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
980
|
+
if not organization_id:
|
|
981
|
+
logger.warning("No organization_id provided.")
|
|
982
|
+
return {'error': "Organization ID must be provided"}
|
|
983
|
+
|
|
984
|
+
headers = {
|
|
985
|
+
"Content-Type": "application/json",
|
|
986
|
+
"Cache-Control": "no-cache",
|
|
987
|
+
"Accept": "application/json"
|
|
988
|
+
}
|
|
989
|
+
if is_oauth:
|
|
990
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
991
|
+
else:
|
|
992
|
+
headers["X-Api-Key"] = token
|
|
993
|
+
|
|
994
|
+
url = f'https://api.apollo.io/api/v1/organizations/{organization_id}'
|
|
995
|
+
logger.debug(f"Making GET request to Apollo for organization ID: {organization_id}")
|
|
996
|
+
|
|
997
|
+
async with aiohttp.ClientSession() as session:
|
|
998
|
+
try:
|
|
999
|
+
async with session.get(url, headers=headers) as response:
|
|
1000
|
+
logger.debug(f"Received response status: {response.status}")
|
|
1001
|
+
if response.status == 200:
|
|
1002
|
+
result = await response.json()
|
|
1003
|
+
org_details = result.get('organization', {})
|
|
1004
|
+
if org_details:
|
|
1005
|
+
logger.info("Successfully retrieved organization details from Apollo.")
|
|
1006
|
+
return org_details
|
|
1007
|
+
else:
|
|
1008
|
+
logger.warning("Organization details not found in the response.")
|
|
1009
|
+
return {'error': 'Organization details not found in the response'}
|
|
1010
|
+
elif response.status == 429:
|
|
1011
|
+
msg = "Rate limit exceeded"
|
|
1012
|
+
limit_minute = response.headers.get('x-rate-limit-minute')
|
|
1013
|
+
limit_hourly = response.headers.get('x-rate-limit-hourly')
|
|
1014
|
+
limit_daily = response.headers.get('x-rate-limit-daily')
|
|
1015
|
+
logger.info(f"get_organization_details_from_apollo x-rate-limit-minute: {limit_minute}")
|
|
1016
|
+
logger.info(f"get_organization_details_from_apollo x-rate-limit-hourly: {limit_hourly}")
|
|
1017
|
+
logger.info(f"get_organization_details_from_apollo x-rate-limit-daily: {limit_daily}")
|
|
1018
|
+
logger.warning(msg)
|
|
1019
|
+
raise aiohttp.ClientResponseError(
|
|
1020
|
+
request_info=response.request_info,
|
|
1021
|
+
history=response.history,
|
|
1022
|
+
status=response.status,
|
|
1023
|
+
message=msg,
|
|
1024
|
+
headers=response.headers
|
|
1025
|
+
)
|
|
1026
|
+
else:
|
|
1027
|
+
result = await response.json()
|
|
1028
|
+
logger.warning(f"get_organization_details_from_apollo error: {result}")
|
|
1029
|
+
return {'error': result}
|
|
1030
|
+
except Exception as e:
|
|
1031
|
+
logger.exception("Exception occurred while fetching organization details from Apollo.")
|
|
1032
|
+
return {'error': str(e)}
|
|
1033
|
+
|
|
1034
|
+
|
|
1035
|
+
async def enrich_user_info_with_apollo(
|
|
1036
|
+
input_user_properties: Dict[str, Any],
|
|
1037
|
+
tool_config: Optional[List[Dict]] = None
|
|
1038
|
+
) -> Dict[str, Any]:
|
|
1039
|
+
"""
|
|
1040
|
+
Enriches the user info (input_user_properties) with data from Apollo.
|
|
1041
|
+
Attempts direct enrichment if LinkedIn URL or email is provided; otherwise,
|
|
1042
|
+
performs a name-based search. Updates the user_properties dictionary in place.
|
|
1043
|
+
|
|
1044
|
+
Parameters:
|
|
1045
|
+
- input_user_properties (Dict[str, Any]): A dictionary with user details.
|
|
1046
|
+
- tool_config (List[Dict], optional): Apollo tool configuration.
|
|
1047
|
+
|
|
1048
|
+
Returns:
|
|
1049
|
+
- Dict[str, Any]: Updated input_user_properties with enriched data from Apollo.
|
|
1050
|
+
"""
|
|
1051
|
+
logger.info("Entering enrich_user_info_with_apollo")
|
|
1052
|
+
|
|
1053
|
+
if not input_user_properties:
|
|
1054
|
+
logger.warning("No input_user_properties provided; returning empty dict.")
|
|
1055
|
+
return {}
|
|
1056
|
+
|
|
1057
|
+
linkedin_url = input_user_properties.get("user_linkedin_url", "")
|
|
1058
|
+
email = input_user_properties.get("email", "")
|
|
1059
|
+
user_data_from_apollo = None
|
|
1060
|
+
|
|
1061
|
+
logger.debug(f"Properties => LinkedIn URL: {linkedin_url}, Email: {email}")
|
|
1062
|
+
|
|
1063
|
+
# If LinkedIn url or email is present, attempt direct enrichment
|
|
1064
|
+
if linkedin_url or email:
|
|
1065
|
+
try:
|
|
1066
|
+
user_data_from_apollo = await enrich_person_info_from_apollo(
|
|
1067
|
+
linkedin_url=linkedin_url,
|
|
1068
|
+
email=email,
|
|
1069
|
+
tool_config=tool_config
|
|
1070
|
+
)
|
|
1071
|
+
except Exception:
|
|
1072
|
+
logger.exception("Exception occurred while enriching person info from Apollo by LinkedIn or email.")
|
|
1073
|
+
else:
|
|
1074
|
+
# Fallback to name-based lookup
|
|
1075
|
+
first_name = input_user_properties.get("first_name", "")
|
|
1076
|
+
last_name = input_user_properties.get("last_name", "")
|
|
1077
|
+
full_name = input_user_properties.get("full_name", f"{first_name} {last_name}").strip()
|
|
1078
|
+
company = input_user_properties.get("organization_name", "")
|
|
1079
|
+
|
|
1080
|
+
if not full_name:
|
|
1081
|
+
logger.warning("No full_name or (first_name + last_name) provided.")
|
|
1082
|
+
input_user_properties["found_user_in_apollo"] = False
|
|
1083
|
+
return input_user_properties
|
|
1084
|
+
|
|
1085
|
+
logger.debug(f"Looking up Apollo by name: {full_name}, company: {company}")
|
|
1086
|
+
try:
|
|
1087
|
+
search_result = await lookup_person_in_apollo_by_name(
|
|
1088
|
+
full_name=full_name,
|
|
1089
|
+
company_name=company,
|
|
1090
|
+
tool_config=tool_config
|
|
1091
|
+
)
|
|
1092
|
+
|
|
1093
|
+
# Extract people and contacts from the search result
|
|
1094
|
+
people = search_result.get("people", [])
|
|
1095
|
+
contacts = search_result.get("contacts", [])
|
|
1096
|
+
results = people + contacts
|
|
1097
|
+
logger.info(f"Name-based lookup returned {len(results)} results from Apollo.")
|
|
1098
|
+
|
|
1099
|
+
for person in results:
|
|
1100
|
+
person_name = person.get("name", "").lower()
|
|
1101
|
+
person_first_name = person.get("first_name", "").lower()
|
|
1102
|
+
person_last_name = person.get("last_name", "").lower()
|
|
1103
|
+
person_company = (person.get("organization", {}) or {}).get("name", "").lower()
|
|
1104
|
+
|
|
1105
|
+
# Match the full name or first/last name and company
|
|
1106
|
+
if (
|
|
1107
|
+
(person_name == full_name.lower() or
|
|
1108
|
+
(person_first_name == first_name.lower() and person_last_name == last_name.lower()))
|
|
1109
|
+
and (not company or person_company == company.lower())
|
|
1110
|
+
):
|
|
1111
|
+
logger.info(f"Found matching person {person.get('name')} in Apollo. Enriching data.")
|
|
1112
|
+
linkedin_url = person.get("linkedin_url", "")
|
|
1113
|
+
if linkedin_url:
|
|
1114
|
+
try:
|
|
1115
|
+
user_data_from_apollo = await enrich_person_info_from_apollo(
|
|
1116
|
+
linkedin_url=linkedin_url,
|
|
1117
|
+
tool_config=tool_config
|
|
1118
|
+
)
|
|
1119
|
+
except Exception:
|
|
1120
|
+
logger.exception("Exception occurred during second stage Apollo enrichment.")
|
|
1121
|
+
if user_data_from_apollo:
|
|
1122
|
+
break
|
|
1123
|
+
except Exception:
|
|
1124
|
+
logger.exception("Exception occurred while performing name-based lookup in Apollo.")
|
|
1125
|
+
|
|
1126
|
+
if not user_data_from_apollo:
|
|
1127
|
+
logger.debug("No user data returned from Apollo.")
|
|
1128
|
+
input_user_properties["found_user_in_apollo"] = False
|
|
1129
|
+
return input_user_properties
|
|
1130
|
+
|
|
1131
|
+
# At this point, user_data_from_apollo likely has "person" key
|
|
1132
|
+
person_data = user_data_from_apollo.get("person", {})
|
|
1133
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
1134
|
+
|
|
1135
|
+
|
|
1136
|
+
# Fill missing contact info if not already present
|
|
1137
|
+
if not input_user_properties.get("email"):
|
|
1138
|
+
input_user_properties["email"] = person_data.get("email", "")
|
|
1139
|
+
if not input_user_properties.get("phone"):
|
|
1140
|
+
input_user_properties["phone"] = (person_data.get("contact", {}) or {}).get("sanitized_phone", "")
|
|
1141
|
+
|
|
1142
|
+
# Map fields
|
|
1143
|
+
if person_data.get("name"):
|
|
1144
|
+
input_user_properties["full_name"] = person_data["name"]
|
|
1145
|
+
if person_data.get("first_name"):
|
|
1146
|
+
input_user_properties["first_name"] = person_data["first_name"]
|
|
1147
|
+
if person_data.get("last_name"):
|
|
1148
|
+
input_user_properties["last_name"] = person_data["last_name"]
|
|
1149
|
+
if person_data.get("linkedin_url"):
|
|
1150
|
+
input_user_properties["user_linkedin_url"] = person_data["linkedin_url"]
|
|
1151
|
+
|
|
1152
|
+
if person_data.get("organization"):
|
|
1153
|
+
org_data = person_data["organization"] or {}
|
|
1154
|
+
if org_data.get("primary_domain"):
|
|
1155
|
+
input_user_properties["primary_domain_of_organization"] = org_data["primary_domain"]
|
|
1156
|
+
if org_data.get("name"):
|
|
1157
|
+
input_user_properties["organization_name"] = org_data["name"]
|
|
1158
|
+
if org_data.get("linkedin_url"):
|
|
1159
|
+
input_user_properties["organization_linkedin_url"] = org_data["linkedin_url"]
|
|
1160
|
+
if org_data.get("website_url"):
|
|
1161
|
+
input_user_properties["organization_website"] = org_data["website_url"]
|
|
1162
|
+
if org_data.get("keywords"):
|
|
1163
|
+
input_user_properties["keywords"] = ", ".join(org_data["keywords"])
|
|
1164
|
+
|
|
1165
|
+
if person_data.get("title"):
|
|
1166
|
+
input_user_properties["job_title"] = person_data["title"]
|
|
1167
|
+
if person_data.get("headline"):
|
|
1168
|
+
input_user_properties["headline"] = person_data["headline"]
|
|
1169
|
+
# If there's no summary_about_lead, reuse the person's headline
|
|
1170
|
+
if not input_user_properties.get("summary_about_lead"):
|
|
1171
|
+
input_user_properties["summary_about_lead"] = person_data["headline"]
|
|
1172
|
+
|
|
1173
|
+
# Derive location (avoid literal "None")
|
|
1174
|
+
city = person_data.get("city")
|
|
1175
|
+
state = person_data.get("state")
|
|
1176
|
+
parts = []
|
|
1177
|
+
for value in (city, state):
|
|
1178
|
+
if value is None:
|
|
1179
|
+
continue
|
|
1180
|
+
s = str(value).strip()
|
|
1181
|
+
if not s or s.lower() == "none":
|
|
1182
|
+
continue
|
|
1183
|
+
parts.append(s)
|
|
1184
|
+
lead_location = ", ".join(parts)
|
|
1185
|
+
if lead_location:
|
|
1186
|
+
input_user_properties["lead_location"] = lead_location
|
|
1187
|
+
|
|
1188
|
+
# Verify name match
|
|
1189
|
+
first_matched = bool(
|
|
1190
|
+
input_user_properties.get("first_name")
|
|
1191
|
+
and person_data.get("first_name") == input_user_properties["first_name"]
|
|
1192
|
+
)
|
|
1193
|
+
last_matched = bool(
|
|
1194
|
+
input_user_properties.get("last_name")
|
|
1195
|
+
and person_data.get("last_name") == input_user_properties["last_name"]
|
|
1196
|
+
)
|
|
1197
|
+
if first_matched and last_matched:
|
|
1198
|
+
logger.info("Matching user found and data enriched from Apollo.")
|
|
1199
|
+
input_user_properties["found_user_in_apollo"] = True
|
|
1200
|
+
|
|
1201
|
+
person_data = cleanup_properties(person_data)
|
|
1202
|
+
additional_props["apollo_person_data"] = json.dumps(person_data)
|
|
1203
|
+
input_user_properties["additional_properties"] = additional_props
|
|
1204
|
+
|
|
1205
|
+
return input_user_properties
|
|
1206
|
+
|
|
1207
|
+
|
|
1208
|
+
async def search_companies_with_apollo(
|
|
1209
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
1210
|
+
dynamic_payload: Optional[Dict[str, Any]] = None,
|
|
1211
|
+
) -> List[Dict[str, Any]]:
|
|
1212
|
+
"""
|
|
1213
|
+
Search for companies using Apollo's organizations/search endpoint.
|
|
1214
|
+
|
|
1215
|
+
Args:
|
|
1216
|
+
tool_config: Apollo API configuration
|
|
1217
|
+
dynamic_payload: Search parameters for the API call
|
|
1218
|
+
|
|
1219
|
+
Returns:
|
|
1220
|
+
List of company/organization dictionaries
|
|
1221
|
+
"""
|
|
1222
|
+
logger.info("Entering search_companies_with_apollo")
|
|
1223
|
+
|
|
1224
|
+
if not dynamic_payload:
|
|
1225
|
+
logger.warning("No payload given; returning empty result.")
|
|
1226
|
+
return []
|
|
1227
|
+
|
|
1228
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
1229
|
+
headers = {
|
|
1230
|
+
"Cache-Control": "no-cache",
|
|
1231
|
+
"Content-Type": "application/json",
|
|
1232
|
+
}
|
|
1233
|
+
if is_oauth:
|
|
1234
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
1235
|
+
else:
|
|
1236
|
+
headers["X-Api-Key"] = token
|
|
1237
|
+
|
|
1238
|
+
url = "https://api.apollo.io/api/v1/organizations/search"
|
|
1239
|
+
logger.info(f"Sending payload to Apollo organizations endpoint (single page): {json.dumps(dynamic_payload, indent=2)}")
|
|
1240
|
+
|
|
1241
|
+
async with aiohttp.ClientSession() as session:
|
|
1242
|
+
data = await fetch_apollo_data(session, url, headers, dynamic_payload)
|
|
1243
|
+
if not data:
|
|
1244
|
+
logger.error("No data returned from Apollo organizations search.")
|
|
1245
|
+
return []
|
|
1246
|
+
|
|
1247
|
+
organizations = data.get("organizations", [])
|
|
1248
|
+
accounts = data.get("accounts", []) # Apollo sometimes returns accounts as well
|
|
1249
|
+
return organizations + accounts
|
|
1250
|
+
|
|
1251
|
+
|
|
1252
|
+
def fill_in_company_properties(company_data: dict) -> dict:
|
|
1253
|
+
"""
|
|
1254
|
+
Convert Apollo company/organization data into a standardized format.
|
|
1255
|
+
|
|
1256
|
+
Args:
|
|
1257
|
+
company_data: Raw company data from Apollo API
|
|
1258
|
+
|
|
1259
|
+
Returns:
|
|
1260
|
+
Dictionary with standardized company properties
|
|
1261
|
+
"""
|
|
1262
|
+
company_properties = {}
|
|
1263
|
+
|
|
1264
|
+
# Basic company information
|
|
1265
|
+
company_properties["organization_name"] = company_data.get("name", "")
|
|
1266
|
+
company_properties["primary_domain"] = company_data.get("primary_domain", "")
|
|
1267
|
+
company_properties["website_url"] = company_data.get("website_url", "")
|
|
1268
|
+
company_properties["organization_linkedin_url"] = company_data.get("linkedin_url", "")
|
|
1269
|
+
|
|
1270
|
+
# Location information
|
|
1271
|
+
company_properties["organization_city"] = company_data.get("city", "")
|
|
1272
|
+
company_properties["organization_state"] = company_data.get("state", "")
|
|
1273
|
+
company_properties["organization_country"] = company_data.get("country", "")
|
|
1274
|
+
|
|
1275
|
+
# Create a combined location string
|
|
1276
|
+
location_parts = [
|
|
1277
|
+
company_data.get("city", ""),
|
|
1278
|
+
company_data.get("state", ""),
|
|
1279
|
+
company_data.get("country", "")
|
|
1280
|
+
]
|
|
1281
|
+
company_properties["organization_location"] = ", ".join([part for part in location_parts if part])
|
|
1282
|
+
|
|
1283
|
+
# Company size and financial info
|
|
1284
|
+
company_properties["employee_count"] = company_data.get("estimated_num_employees", 0)
|
|
1285
|
+
company_properties["annual_revenue"] = company_data.get("annual_revenue", 0)
|
|
1286
|
+
|
|
1287
|
+
# Industry and business info
|
|
1288
|
+
company_properties["industry"] = company_data.get("industry", "")
|
|
1289
|
+
company_properties["keywords"] = ", ".join(company_data.get("keywords", []))
|
|
1290
|
+
company_properties["description"] = company_data.get("description", "")
|
|
1291
|
+
|
|
1292
|
+
# Funding and growth
|
|
1293
|
+
company_properties["founded_year"] = company_data.get("founded_year", "")
|
|
1294
|
+
company_properties["funding_stage"] = company_data.get("latest_funding_stage", "")
|
|
1295
|
+
company_properties["total_funding"] = company_data.get("total_funding", 0)
|
|
1296
|
+
|
|
1297
|
+
# Technology stack
|
|
1298
|
+
tech_stack = company_data.get("technology_names", [])
|
|
1299
|
+
if tech_stack:
|
|
1300
|
+
company_properties["technology_stack"] = ", ".join(tech_stack)
|
|
1301
|
+
|
|
1302
|
+
# Apollo-specific IDs
|
|
1303
|
+
company_properties["apollo_organization_id"] = company_data.get("id", "")
|
|
1304
|
+
|
|
1305
|
+
# Additional metadata
|
|
1306
|
+
company_properties["phone"] = company_data.get("phone", "")
|
|
1307
|
+
company_properties["facebook_url"] = company_data.get("facebook_url", "")
|
|
1308
|
+
company_properties["twitter_url"] = company_data.get("twitter_url", "")
|
|
1309
|
+
|
|
1310
|
+
# Store raw data for reference
|
|
1311
|
+
company_properties["additional_properties"] = {
|
|
1312
|
+
"apollo_organization_data": json.dumps(cleanup_properties(company_data))
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
return company_properties
|
|
1316
|
+
|
|
1317
|
+
|
|
1318
|
+
@assistant_tool
|
|
1319
|
+
async def search_companies_with_apollo_page(
|
|
1320
|
+
query: CompanyQueryFilters,
|
|
1321
|
+
page: Optional[int] = 1,
|
|
1322
|
+
per_page: Optional[int] = 25,
|
|
1323
|
+
example_url: Optional[str] = None,
|
|
1324
|
+
tool_config: Optional[List[Dict[str, Any]]] = None,
|
|
1325
|
+
) -> Dict[str, Any]:
|
|
1326
|
+
"""
|
|
1327
|
+
Fetch a single page of Apollo companies using ``page`` and ``per_page``.
|
|
1328
|
+
|
|
1329
|
+
This helper performs one request to the Apollo API and returns the fetched
|
|
1330
|
+
companies along with comprehensive pagination metadata.
|
|
1331
|
+
|
|
1332
|
+
Args:
|
|
1333
|
+
query: CompanyQueryFilters object containing search criteria
|
|
1334
|
+
page: Page number to fetch (1-indexed, defaults to 1)
|
|
1335
|
+
per_page: Number of results per page (defaults to 25)
|
|
1336
|
+
example_url: Optional URL to parse search parameters from
|
|
1337
|
+
tool_config: Optional tool configuration for API keys
|
|
1338
|
+
|
|
1339
|
+
Returns:
|
|
1340
|
+
Dict containing:
|
|
1341
|
+
- current_page: The current page number
|
|
1342
|
+
- per_page: Number of results per page
|
|
1343
|
+
- total_entries: Total number of results available
|
|
1344
|
+
- total_pages: Total number of pages available
|
|
1345
|
+
- has_next_page: Boolean indicating if more pages exist
|
|
1346
|
+
- next_page: Next page number (None if no more pages)
|
|
1347
|
+
- results: List of company dictionaries for this page
|
|
1348
|
+
"""
|
|
1349
|
+
logger.info("Entering search_companies_with_apollo_page")
|
|
1350
|
+
|
|
1351
|
+
if example_url:
|
|
1352
|
+
parsed_url = urlparse(example_url)
|
|
1353
|
+
query_string = parsed_url.query
|
|
1354
|
+
|
|
1355
|
+
if not query_string and "?" in parsed_url.fragment:
|
|
1356
|
+
fragment_query = parsed_url.fragment.split("?", 1)[1]
|
|
1357
|
+
query_string = fragment_query
|
|
1358
|
+
|
|
1359
|
+
query_params = parse_qs(query_string)
|
|
1360
|
+
|
|
1361
|
+
dynamic_payload: Dict[str, Any] = {
|
|
1362
|
+
"page": page,
|
|
1363
|
+
"per_page": per_page,
|
|
1364
|
+
}
|
|
1365
|
+
|
|
1366
|
+
# Organization-specific URL parameter mapping
|
|
1367
|
+
mapping = {
|
|
1368
|
+
"organizationLocations": "organization_locations",
|
|
1369
|
+
"organizationNumEmployeesRanges": "organization_num_employees_ranges",
|
|
1370
|
+
"organizationIndustries": "organization_industries",
|
|
1371
|
+
"organizationIndustryTagIds": "organization_industry_tag_ids",
|
|
1372
|
+
"qKeywords": "q_keywords",
|
|
1373
|
+
"qOrganizationDomains": "q_organization_domains",
|
|
1374
|
+
"sortAscending": "sort_ascending",
|
|
1375
|
+
"sortByField": "sort_by_field",
|
|
1376
|
+
"organizationLatestFundingStageCd": "organization_latest_funding_stage_cd",
|
|
1377
|
+
"revenueRange[max]": "revenue_range_max",
|
|
1378
|
+
"revenueRange[min]": "revenue_range_min",
|
|
1379
|
+
"currentlyUsingAnyOfTechnologyUids": "currently_using_any_of_technology_uids",
|
|
1380
|
+
"organizationIds": "organization_ids",
|
|
1381
|
+
"notOrganizationIds": "not_organization_ids",
|
|
1382
|
+
"qOrganizationSearchListId": "q_organization_search_list_id",
|
|
1383
|
+
"qNotOrganizationSearchListId": "q_not_organization_search_list_id",
|
|
1384
|
+
}
|
|
1385
|
+
|
|
1386
|
+
for raw_key, raw_value_list in query_params.items():
|
|
1387
|
+
if raw_key.endswith("[]"):
|
|
1388
|
+
key = raw_key[:-2]
|
|
1389
|
+
else:
|
|
1390
|
+
key = raw_key
|
|
1391
|
+
|
|
1392
|
+
if raw_key in mapping:
|
|
1393
|
+
key = mapping[raw_key]
|
|
1394
|
+
elif key in mapping:
|
|
1395
|
+
key = mapping[key]
|
|
1396
|
+
else:
|
|
1397
|
+
key = re.sub(r"(?<!^)(?=[A-Z])", "_", key).lower()
|
|
1398
|
+
|
|
1399
|
+
if len(raw_value_list) == 1:
|
|
1400
|
+
final_value: Union[str, List[str]] = raw_value_list[0]
|
|
1401
|
+
else:
|
|
1402
|
+
final_value = raw_value_list
|
|
1403
|
+
|
|
1404
|
+
if key in ("sort_ascending",):
|
|
1405
|
+
val_lower = str(final_value).lower()
|
|
1406
|
+
final_value = val_lower in ("true", "1", "yes")
|
|
1407
|
+
|
|
1408
|
+
if key in ("page", "per_page", "revenue_range_min", "revenue_range_max"):
|
|
1409
|
+
try:
|
|
1410
|
+
final_value = int(final_value)
|
|
1411
|
+
except ValueError:
|
|
1412
|
+
pass
|
|
1413
|
+
|
|
1414
|
+
if key == "q_organization_keyword_tags":
|
|
1415
|
+
# Handle both string and list inputs, split by comma if string
|
|
1416
|
+
if isinstance(final_value, str):
|
|
1417
|
+
# Split by comma and strip whitespace
|
|
1418
|
+
final_value = [tag.strip() for tag in final_value.split(",") if tag.strip()]
|
|
1419
|
+
elif isinstance(final_value, list):
|
|
1420
|
+
# If it's already a list, flatten any comma-separated items
|
|
1421
|
+
flattened = []
|
|
1422
|
+
for item in final_value:
|
|
1423
|
+
if isinstance(item, str) and "," in item:
|
|
1424
|
+
flattened.extend([tag.strip() for tag in item.split(",") if tag.strip()])
|
|
1425
|
+
else:
|
|
1426
|
+
flattened.append(item)
|
|
1427
|
+
final_value = flattened
|
|
1428
|
+
|
|
1429
|
+
if raw_key.endswith("[]"):
|
|
1430
|
+
if isinstance(final_value, str):
|
|
1431
|
+
final_value = [final_value]
|
|
1432
|
+
else:
|
|
1433
|
+
if key in (
|
|
1434
|
+
"organization_locations",
|
|
1435
|
+
"organization_industries",
|
|
1436
|
+
"organization_industry_tag_ids",
|
|
1437
|
+
"q_organization_domains",
|
|
1438
|
+
"q_organization_keyword_tags",
|
|
1439
|
+
"organization_ids",
|
|
1440
|
+
"not_organization_ids",
|
|
1441
|
+
"organization_num_employees_ranges",
|
|
1442
|
+
"currently_using_any_of_technology_uids",
|
|
1443
|
+
"organization_latest_funding_stage_cd",
|
|
1444
|
+
):
|
|
1445
|
+
if isinstance(final_value, str):
|
|
1446
|
+
final_value = [final_value]
|
|
1447
|
+
|
|
1448
|
+
dynamic_payload[key] = final_value
|
|
1449
|
+
|
|
1450
|
+
if dynamic_payload.get("sort_by_field") == "[none]":
|
|
1451
|
+
dynamic_payload.pop("sort_by_field")
|
|
1452
|
+
|
|
1453
|
+
# -----------------------------------
|
|
1454
|
+
# B) No example_url -> build from `query`
|
|
1455
|
+
# -----------------------------------
|
|
1456
|
+
else:
|
|
1457
|
+
dynamic_payload = {}
|
|
1458
|
+
|
|
1459
|
+
# Only add fields if they have values (Apollo doesn't like empty arrays)
|
|
1460
|
+
if query.organization_locations:
|
|
1461
|
+
dynamic_payload["organization_locations"] = query.organization_locations
|
|
1462
|
+
if query.organization_industries:
|
|
1463
|
+
dynamic_payload["organization_industries"] = query.organization_industries
|
|
1464
|
+
if query.organization_industry_tag_ids:
|
|
1465
|
+
dynamic_payload["organization_industry_tag_ids"] = query.organization_industry_tag_ids
|
|
1466
|
+
|
|
1467
|
+
# Handle employee ranges
|
|
1468
|
+
employee_ranges = []
|
|
1469
|
+
if query.organization_num_employees_ranges:
|
|
1470
|
+
employee_ranges = query.organization_num_employees_ranges
|
|
1471
|
+
elif query.min_employees or query.max_employees:
|
|
1472
|
+
employee_ranges = [f"{query.min_employees or 1},{query.max_employees or 1000}"]
|
|
1473
|
+
|
|
1474
|
+
if employee_ranges:
|
|
1475
|
+
dynamic_payload["organization_num_employees_ranges"] = employee_ranges
|
|
1476
|
+
|
|
1477
|
+
# Add optional parameters only if they have values
|
|
1478
|
+
if query.q_keywords:
|
|
1479
|
+
# Split comma-separated keywords into an array for company search
|
|
1480
|
+
if isinstance(query.q_keywords, str):
|
|
1481
|
+
keyword_tags = [tag.strip() for tag in query.q_keywords.split(",") if tag.strip()]
|
|
1482
|
+
else:
|
|
1483
|
+
keyword_tags = query.q_keywords
|
|
1484
|
+
|
|
1485
|
+
if query.q_organization_keyword_tags:
|
|
1486
|
+
dynamic_payload["q_organization_keyword_tags"] = keyword_tags
|
|
1487
|
+
|
|
1488
|
+
if query.q_not_organization_keyword_tags:
|
|
1489
|
+
dynamic_payload["q_not_organization_keyword_tags"] = query.q_not_organization_keyword_tags
|
|
1490
|
+
|
|
1491
|
+
if query.q_organization_domains:
|
|
1492
|
+
dynamic_payload["q_organization_domains"] = query.q_organization_domains
|
|
1493
|
+
if query.revenue_range_min is not None:
|
|
1494
|
+
dynamic_payload["revenue_range_min"] = query.revenue_range_min
|
|
1495
|
+
if query.revenue_range_max is not None:
|
|
1496
|
+
dynamic_payload["revenue_range_max"] = query.revenue_range_max
|
|
1497
|
+
if query.organization_latest_funding_stage_cd:
|
|
1498
|
+
dynamic_payload["organization_latest_funding_stage_cd"] = query.organization_latest_funding_stage_cd
|
|
1499
|
+
if query.currently_using_any_of_technology_uids:
|
|
1500
|
+
dynamic_payload["currently_using_any_of_technology_uids"] = query.currently_using_any_of_technology_uids
|
|
1501
|
+
if query.organization_ids:
|
|
1502
|
+
dynamic_payload["organization_ids"] = query.organization_ids
|
|
1503
|
+
if query.not_organization_ids:
|
|
1504
|
+
dynamic_payload["not_organization_ids"] = query.not_organization_ids
|
|
1505
|
+
if query.q_organization_search_list_id:
|
|
1506
|
+
dynamic_payload["q_organization_search_list_id"] = query.q_organization_search_list_id
|
|
1507
|
+
if query.q_not_organization_search_list_id:
|
|
1508
|
+
dynamic_payload["q_not_organization_search_list_id"] = query.q_not_organization_search_list_id
|
|
1509
|
+
if query.sort_by_field is not None:
|
|
1510
|
+
dynamic_payload["sort_by_field"] = query.sort_by_field
|
|
1511
|
+
if query.sort_ascending is not None:
|
|
1512
|
+
dynamic_payload["sort_ascending"] = query.sort_ascending
|
|
1513
|
+
|
|
1514
|
+
# Remove sorting parameters that may not be supported by organizations endpoint
|
|
1515
|
+
if "sort_by_field" in dynamic_payload:
|
|
1516
|
+
dynamic_payload.pop("sort_by_field")
|
|
1517
|
+
if "sort_ascending" in dynamic_payload:
|
|
1518
|
+
dynamic_payload.pop("sort_ascending")
|
|
1519
|
+
|
|
1520
|
+
page_payload = dict(dynamic_payload)
|
|
1521
|
+
page_payload["page"] = page
|
|
1522
|
+
page_payload["per_page"] = per_page
|
|
1523
|
+
|
|
1524
|
+
# Clean up the payload - remove empty arrays and None values that Apollo doesn't like
|
|
1525
|
+
cleaned_payload = {}
|
|
1526
|
+
for key, value in page_payload.items():
|
|
1527
|
+
if value is not None:
|
|
1528
|
+
if isinstance(value, list):
|
|
1529
|
+
# Only include non-empty lists
|
|
1530
|
+
if value:
|
|
1531
|
+
cleaned_payload[key] = value
|
|
1532
|
+
else:
|
|
1533
|
+
cleaned_payload[key] = value
|
|
1534
|
+
|
|
1535
|
+
# Ensure page and per_page are always included
|
|
1536
|
+
cleaned_payload["page"] = page
|
|
1537
|
+
cleaned_payload["per_page"] = per_page
|
|
1538
|
+
|
|
1539
|
+
print(f"Fetching Apollo companies page {page} with per_page {per_page}..."
|
|
1540
|
+
f" Payload: {json.dumps(cleaned_payload, indent=2)}")
|
|
1541
|
+
|
|
1542
|
+
# Get the full Apollo API response with pagination metadata
|
|
1543
|
+
token, is_oauth = get_apollo_access_token(tool_config)
|
|
1544
|
+
headers = {
|
|
1545
|
+
"Cache-Control": "no-cache",
|
|
1546
|
+
"Content-Type": "application/json",
|
|
1547
|
+
}
|
|
1548
|
+
if is_oauth:
|
|
1549
|
+
headers["Authorization"] = f"Bearer {token}"
|
|
1550
|
+
else:
|
|
1551
|
+
headers["X-Api-Key"] = token
|
|
1552
|
+
|
|
1553
|
+
url = "https://api.apollo.io/api/v1/organizations/search"
|
|
1554
|
+
|
|
1555
|
+
async with aiohttp.ClientSession() as session:
|
|
1556
|
+
apollo_response = await fetch_apollo_data(session, url, headers, cleaned_payload)
|
|
1557
|
+
if not apollo_response:
|
|
1558
|
+
return {
|
|
1559
|
+
"current_page": page,
|
|
1560
|
+
"per_page": per_page,
|
|
1561
|
+
"total_entries": 0,
|
|
1562
|
+
"total_pages": 0,
|
|
1563
|
+
"has_next_page": False,
|
|
1564
|
+
"results": []
|
|
1565
|
+
}
|
|
1566
|
+
|
|
1567
|
+
# Extract pagination metadata
|
|
1568
|
+
pagination = apollo_response.get("pagination", {})
|
|
1569
|
+
current_page = pagination.get("page", page)
|
|
1570
|
+
total_entries = pagination.get("total_entries", 0)
|
|
1571
|
+
total_pages = pagination.get("total_pages", 0)
|
|
1572
|
+
per_page_actual = pagination.get("per_page", per_page)
|
|
1573
|
+
|
|
1574
|
+
# Determine if there are more pages
|
|
1575
|
+
has_next_page = current_page < total_pages
|
|
1576
|
+
|
|
1577
|
+
# Extract organizations and accounts
|
|
1578
|
+
organizations = apollo_response.get("organizations", [])
|
|
1579
|
+
accounts = apollo_response.get("accounts", [])
|
|
1580
|
+
page_results = organizations + accounts
|
|
1581
|
+
|
|
1582
|
+
companies: List[Dict[str, Any]] = []
|
|
1583
|
+
for company_data in page_results:
|
|
1584
|
+
company_properties = fill_in_company_properties(company_data)
|
|
1585
|
+
companies.append(company_properties)
|
|
1586
|
+
|
|
1587
|
+
logger.info(f"Converted {len(companies)} Apollo company records into standardized dictionaries (single page mode). Page {current_page} of {total_pages}")
|
|
1588
|
+
|
|
1589
|
+
return {
|
|
1590
|
+
"current_page": current_page,
|
|
1591
|
+
"per_page": per_page_actual,
|
|
1592
|
+
"total_entries": total_entries,
|
|
1593
|
+
"total_pages": total_pages,
|
|
1594
|
+
"has_next_page": has_next_page,
|
|
1595
|
+
"next_page": current_page + 1 if has_next_page else None,
|
|
1596
|
+
"results": companies
|
|
1597
|
+
}
|