dhisana 0.0.1.dev243__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/__init__.py +1 -0
- dhisana/cli/__init__.py +1 -0
- dhisana/cli/cli.py +20 -0
- dhisana/cli/datasets.py +27 -0
- dhisana/cli/models.py +26 -0
- dhisana/cli/predictions.py +20 -0
- dhisana/schemas/__init__.py +1 -0
- dhisana/schemas/common.py +399 -0
- dhisana/schemas/sales.py +965 -0
- dhisana/ui/__init__.py +1 -0
- dhisana/ui/components.py +472 -0
- dhisana/utils/__init__.py +1 -0
- dhisana/utils/add_mapping.py +352 -0
- dhisana/utils/agent_tools.py +51 -0
- dhisana/utils/apollo_tools.py +1597 -0
- dhisana/utils/assistant_tool_tag.py +4 -0
- dhisana/utils/built_with_api_tools.py +282 -0
- dhisana/utils/cache_output_tools.py +98 -0
- dhisana/utils/cache_output_tools_local.py +78 -0
- dhisana/utils/check_email_validity_tools.py +717 -0
- dhisana/utils/check_for_intent_signal.py +107 -0
- dhisana/utils/check_linkedin_url_validity.py +209 -0
- dhisana/utils/clay_tools.py +43 -0
- dhisana/utils/clean_properties.py +135 -0
- dhisana/utils/company_utils.py +60 -0
- dhisana/utils/compose_salesnav_query.py +259 -0
- dhisana/utils/compose_search_query.py +759 -0
- dhisana/utils/compose_three_step_workflow.py +234 -0
- dhisana/utils/composite_tools.py +137 -0
- dhisana/utils/dataframe_tools.py +237 -0
- dhisana/utils/domain_parser.py +45 -0
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_parse_helpers.py +132 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +933 -0
- dhisana/utils/extract_email_content_for_llm.py +101 -0
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +426 -0
- dhisana/utils/g2_tools.py +104 -0
- dhisana/utils/generate_content.py +41 -0
- dhisana/utils/generate_custom_message.py +271 -0
- dhisana/utils/generate_email.py +278 -0
- dhisana/utils/generate_email_response.py +465 -0
- dhisana/utils/generate_flow.py +102 -0
- dhisana/utils/generate_leads_salesnav.py +303 -0
- dhisana/utils/generate_linkedin_connect_message.py +224 -0
- dhisana/utils/generate_linkedin_response_message.py +317 -0
- dhisana/utils/generate_structured_output_internal.py +462 -0
- dhisana/utils/google_custom_search.py +267 -0
- dhisana/utils/google_oauth_tools.py +727 -0
- dhisana/utils/google_workspace_tools.py +1294 -0
- dhisana/utils/hubspot_clearbit.py +96 -0
- dhisana/utils/hubspot_crm_tools.py +2440 -0
- dhisana/utils/instantly_tools.py +149 -0
- dhisana/utils/linkedin_crawler.py +168 -0
- dhisana/utils/lusha_tools.py +333 -0
- dhisana/utils/mailgun_tools.py +156 -0
- dhisana/utils/mailreach_tools.py +123 -0
- dhisana/utils/microsoft365_tools.py +455 -0
- dhisana/utils/openai_assistant_and_file_utils.py +267 -0
- dhisana/utils/openai_helpers.py +977 -0
- dhisana/utils/openapi_spec_to_tools.py +45 -0
- dhisana/utils/openapi_tool/__init__.py +1 -0
- dhisana/utils/openapi_tool/api_models.py +633 -0
- dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
- dhisana/utils/openapi_tool/openapi_tool.py +319 -0
- dhisana/utils/parse_linkedin_messages_txt.py +100 -0
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +1226 -0
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/python_function_to_tools.py +83 -0
- dhisana/utils/research_lead.py +176 -0
- dhisana/utils/sales_navigator_crawler.py +1103 -0
- dhisana/utils/salesforce_crm_tools.py +477 -0
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +162 -0
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +852 -0
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +582 -0
- dhisana/utils/test_connect.py +2087 -0
- dhisana/utils/trasform_json.py +173 -0
- dhisana/utils/web_download_parse_tools.py +189 -0
- dhisana/utils/workflow_code_model.py +5 -0
- dhisana/utils/zoominfo_tools.py +357 -0
- dhisana/workflow/__init__.py +1 -0
- dhisana/workflow/agent.py +18 -0
- dhisana/workflow/flow.py +44 -0
- dhisana/workflow/task.py +43 -0
- dhisana/workflow/test.py +90 -0
- dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
- dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
- dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
- dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
- dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1226 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import aiohttp
|
|
7
|
+
import backoff
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
|
|
10
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
11
|
+
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
12
|
+
from dhisana.utils.clean_properties import cleanup_properties
|
|
13
|
+
from dhisana.utils.search_router import search_google_with_tools
|
|
14
|
+
from urllib.parse import urlparse, urlunparse
|
|
15
|
+
|
|
16
|
+
logging.basicConfig(level=logging.INFO)
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_proxycurl_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
21
|
+
"""
|
|
22
|
+
Retrieves the PROXY_CURL_API_KEY access token from the provided tool configuration.
|
|
23
|
+
|
|
24
|
+
Raises:
|
|
25
|
+
ValueError: If the Proxycurl integration has not been configured.
|
|
26
|
+
"""
|
|
27
|
+
PROXY_CURL_API_KEY = None
|
|
28
|
+
|
|
29
|
+
if tool_config:
|
|
30
|
+
logger.debug(f"Tool config provided: {tool_config}")
|
|
31
|
+
proxy_curl_config = next(
|
|
32
|
+
(item for item in tool_config if item.get("name") == "proxycurl"), None
|
|
33
|
+
)
|
|
34
|
+
if proxy_curl_config:
|
|
35
|
+
config_map = {
|
|
36
|
+
item["name"]: item["value"]
|
|
37
|
+
for item in proxy_curl_config.get("configuration", [])
|
|
38
|
+
if item
|
|
39
|
+
}
|
|
40
|
+
PROXY_CURL_API_KEY = config_map.get("apiKey")
|
|
41
|
+
else:
|
|
42
|
+
logger.warning("No 'proxycurl' config item found in tool_config.")
|
|
43
|
+
else:
|
|
44
|
+
logger.debug("No tool_config provided or it's None.")
|
|
45
|
+
|
|
46
|
+
# Check environment variable if no key found yet
|
|
47
|
+
PROXY_CURL_API_KEY = PROXY_CURL_API_KEY or os.getenv("PROXY_CURL_API_KEY")
|
|
48
|
+
|
|
49
|
+
if not PROXY_CURL_API_KEY:
|
|
50
|
+
logger.error("Proxycurl integration is not configured.")
|
|
51
|
+
raise ValueError(
|
|
52
|
+
"Proxycurl integration is not configured. Please configure the connection to Proxycurl in Integrations."
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
return PROXY_CURL_API_KEY
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@assistant_tool
|
|
59
|
+
@backoff.on_exception(
|
|
60
|
+
backoff.expo,
|
|
61
|
+
aiohttp.ClientResponseError,
|
|
62
|
+
max_tries=3,
|
|
63
|
+
giveup=lambda e: e.status != 429,
|
|
64
|
+
factor=10,
|
|
65
|
+
)
|
|
66
|
+
async def enrich_person_info_from_proxycurl(
|
|
67
|
+
linkedin_url: Optional[str] = None,
|
|
68
|
+
email: Optional[str] = None,
|
|
69
|
+
phone: Optional[str] = None,
|
|
70
|
+
tool_config: Optional[List[Dict]] = None
|
|
71
|
+
) -> Dict:
|
|
72
|
+
"""
|
|
73
|
+
Fetch a person's details from Proxycurl using LinkedIn URL, email, or phone number.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
dict: JSON response containing person information or an error.
|
|
77
|
+
"""
|
|
78
|
+
logger.info("Entering enrich_person_info_from_proxycurl")
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
82
|
+
except ValueError as e:
|
|
83
|
+
return {"error": str(e)}
|
|
84
|
+
|
|
85
|
+
HEADERS = {
|
|
86
|
+
'Authorization': f'Bearer {API_KEY}',
|
|
87
|
+
'Content-Type': 'application/json'
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if not linkedin_url:
|
|
91
|
+
logger.warning("No linkedin_url provided.")
|
|
92
|
+
return {'error': "linkedin_url must be provided"}
|
|
93
|
+
|
|
94
|
+
# Check cache if linkedin_url is provided
|
|
95
|
+
if linkedin_url:
|
|
96
|
+
cached_response = retrieve_output("enrich_person_info_from_proxycurl", linkedin_url)
|
|
97
|
+
if cached_response is not None and cached_response.get('error') is None:
|
|
98
|
+
logger.info(f"Cache hit for LinkedIn URL: {linkedin_url}")
|
|
99
|
+
return cached_response
|
|
100
|
+
|
|
101
|
+
params = {}
|
|
102
|
+
if linkedin_url:
|
|
103
|
+
params['url'] = linkedin_url
|
|
104
|
+
if email:
|
|
105
|
+
params['email'] = email
|
|
106
|
+
|
|
107
|
+
if phone:
|
|
108
|
+
params['phone'] = phone
|
|
109
|
+
|
|
110
|
+
url = 'https://enrichlayer.com/api/v2/profile'
|
|
111
|
+
logger.debug(f"Making request to Proxycurl with params: {params}")
|
|
112
|
+
|
|
113
|
+
async with aiohttp.ClientSession() as session:
|
|
114
|
+
try:
|
|
115
|
+
async with session.get(url, headers=HEADERS, params=params) as response:
|
|
116
|
+
logger.debug(f"Received response status: {response.status}")
|
|
117
|
+
if response.status == 200:
|
|
118
|
+
result = await response.json()
|
|
119
|
+
if linkedin_url:
|
|
120
|
+
cache_output("enrich_person_info_from_proxycurl", linkedin_url, result)
|
|
121
|
+
logger.info("Successfully retrieved person info from Proxycurl.")
|
|
122
|
+
return result
|
|
123
|
+
elif response.status == 404:
|
|
124
|
+
msg = "Person not found"
|
|
125
|
+
logger.warning(msg)
|
|
126
|
+
return {'error': msg}
|
|
127
|
+
elif response.status == 429:
|
|
128
|
+
msg = "Rate limit exceeded"
|
|
129
|
+
logger.warning(msg)
|
|
130
|
+
# Sleep and then return an error (no raise)
|
|
131
|
+
await asyncio.sleep(30)
|
|
132
|
+
return {'error': msg}
|
|
133
|
+
else:
|
|
134
|
+
error_text = await response.text()
|
|
135
|
+
logger.error(f"Error from Proxycurl: {error_text}")
|
|
136
|
+
return {'error': error_text}
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.exception("Exception occurred while fetching person info from Proxycurl.")
|
|
139
|
+
return {"error": str(e)}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@assistant_tool
|
|
143
|
+
@backoff.on_exception(
|
|
144
|
+
backoff.expo,
|
|
145
|
+
aiohttp.ClientResponseError,
|
|
146
|
+
max_tries=3,
|
|
147
|
+
giveup=lambda e: e.status != 429,
|
|
148
|
+
factor=10,
|
|
149
|
+
)
|
|
150
|
+
async def lookup_person_in_proxy_curl_by_name(
|
|
151
|
+
first_name: str,
|
|
152
|
+
last_name: str,
|
|
153
|
+
company_name: Optional[str] = None,
|
|
154
|
+
tool_config: Optional[List[Dict]] = None,
|
|
155
|
+
) -> Dict:
|
|
156
|
+
"""
|
|
157
|
+
Look up a person in Proxycurl by first and last name, optionally a company name.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
dict: JSON response containing search results or an error.
|
|
161
|
+
"""
|
|
162
|
+
logger.info("Entering lookup_person_in_proxy_curl_by_name")
|
|
163
|
+
|
|
164
|
+
if not first_name or not last_name:
|
|
165
|
+
logger.warning("First name or last name missing for lookup.")
|
|
166
|
+
return {'error': "Full name is required"}
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
170
|
+
except ValueError as e:
|
|
171
|
+
return {"error": str(e)}
|
|
172
|
+
|
|
173
|
+
headers = {'Authorization': f'Bearer {API_KEY}'}
|
|
174
|
+
params = {
|
|
175
|
+
'first_name': first_name,
|
|
176
|
+
'last_name': last_name,
|
|
177
|
+
'page_size': '1',
|
|
178
|
+
}
|
|
179
|
+
if company_name:
|
|
180
|
+
params['current_company_name'] = company_name
|
|
181
|
+
|
|
182
|
+
key = f"{first_name} {last_name} {company_name}".strip()
|
|
183
|
+
if key:
|
|
184
|
+
cached_response = retrieve_output("lookup_person_in_proxycurl_by_name", key)
|
|
185
|
+
if cached_response is not None:
|
|
186
|
+
logger.info(f"Cache hit for name lookup key: {key}")
|
|
187
|
+
return cached_response
|
|
188
|
+
|
|
189
|
+
url = 'https://enrichlayer.com/api/v2/search/person'
|
|
190
|
+
logger.debug(f"Making request to Proxycurl with params: {params}")
|
|
191
|
+
|
|
192
|
+
async with aiohttp.ClientSession() as session:
|
|
193
|
+
try:
|
|
194
|
+
async with session.get(url, headers=headers, params=params) as response:
|
|
195
|
+
logger.debug(f"Received response status: {response.status}")
|
|
196
|
+
if response.status == 200:
|
|
197
|
+
result = await response.json()
|
|
198
|
+
cache_output("lookup_person_in_proxycurl_by_name", key, result)
|
|
199
|
+
logger.info("Successfully retrieved person search info from Proxycurl.")
|
|
200
|
+
return result
|
|
201
|
+
elif response.status == 404:
|
|
202
|
+
msg = "Person not found"
|
|
203
|
+
logger.warning(msg)
|
|
204
|
+
if key:
|
|
205
|
+
cache_output("lookup_person_in_proxycurl_by_name", key, {'error': msg})
|
|
206
|
+
return {'error': msg}
|
|
207
|
+
elif response.status == 429:
|
|
208
|
+
msg = "Rate limit exceeded"
|
|
209
|
+
logger.warning(msg)
|
|
210
|
+
await asyncio.sleep(30)
|
|
211
|
+
return {'error': msg}
|
|
212
|
+
else:
|
|
213
|
+
result = await response.json()
|
|
214
|
+
logger.warning(f"lookup_person_in_proxycurl_by_name error: {result}")
|
|
215
|
+
return {'error': result}
|
|
216
|
+
except Exception as e:
|
|
217
|
+
logger.exception("Exception occurred while looking up person by name.")
|
|
218
|
+
return {"error": str(e)}
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def transform_company_data(data: dict) -> dict:
|
|
222
|
+
"""
|
|
223
|
+
Transform the company data by mapping:
|
|
224
|
+
- 'name' to 'organization_name'
|
|
225
|
+
- 'website' to 'organization_website'
|
|
226
|
+
- 'industry' to 'organization_industry'
|
|
227
|
+
- 'hq' or 'headquarters' to 'organization_hq_location'
|
|
228
|
+
in the format "city, state, country" (skipping empty parts).
|
|
229
|
+
Copies over all other properties except the ones that are mapped.
|
|
230
|
+
If data is empty, returns an empty dictionary.
|
|
231
|
+
"""
|
|
232
|
+
if not data:
|
|
233
|
+
return {}
|
|
234
|
+
|
|
235
|
+
transformed = {}
|
|
236
|
+
|
|
237
|
+
# Map name, website, and industry
|
|
238
|
+
if "name" in data:
|
|
239
|
+
transformed["organization_name"] = data["name"]
|
|
240
|
+
if "website" in data:
|
|
241
|
+
transformed["organization_website"] = data["website"]
|
|
242
|
+
if "industry" in data:
|
|
243
|
+
transformed["organization_industry"] = data["industry"]
|
|
244
|
+
|
|
245
|
+
if "company_size" in data:
|
|
246
|
+
transformed["company_size_list"] = data["company_size"]
|
|
247
|
+
|
|
248
|
+
if "company_size_on_linkedin" in data:
|
|
249
|
+
transformed["organization_size"] = data["company_size_on_linkedin"]
|
|
250
|
+
transformed["company_size"] = data["company_size_on_linkedin"]
|
|
251
|
+
|
|
252
|
+
# Determine headquarters info from "hq" or "headquarters"
|
|
253
|
+
hq_data = data.get("hq") or data.get("headquarters")
|
|
254
|
+
if hq_data:
|
|
255
|
+
if isinstance(hq_data, dict):
|
|
256
|
+
city = hq_data.get("city", "")
|
|
257
|
+
state = hq_data.get("geographic_area", "")
|
|
258
|
+
country = hq_data.get("country", "")
|
|
259
|
+
# Join non-empty parts with a comma and a space
|
|
260
|
+
parts = [part for part in (city, state, country) if part]
|
|
261
|
+
transformed["organization_hq_location"] = ", ".join(parts)
|
|
262
|
+
else:
|
|
263
|
+
# If hq_data is not a dict, assume it's already in the desired format
|
|
264
|
+
transformed["organization_hq_location"] = hq_data
|
|
265
|
+
|
|
266
|
+
# Copy all other properties, excluding those already mapped
|
|
267
|
+
for key, value in data.items():
|
|
268
|
+
if key not in ("name", "website", "industry", "hq", "headquarters", "company_size"):
|
|
269
|
+
transformed[key] = value
|
|
270
|
+
|
|
271
|
+
return transformed
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _build_company_profile_params(
|
|
275
|
+
company_url: str,
|
|
276
|
+
profile_flags: Dict[str, Optional[str]],
|
|
277
|
+
) -> Dict[str, str]:
|
|
278
|
+
"""
|
|
279
|
+
Build request params for the Enrichlayer company profile endpoint,
|
|
280
|
+
ensuring we only forward flags that were explicitly provided.
|
|
281
|
+
"""
|
|
282
|
+
params: Dict[str, str] = {'url': company_url}
|
|
283
|
+
for key, value in profile_flags.items():
|
|
284
|
+
if value is not None:
|
|
285
|
+
params[key] = value
|
|
286
|
+
return params
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _build_company_cache_key(identifier: str, profile_flags: Dict[str, Optional[str]]) -> str:
|
|
290
|
+
"""
|
|
291
|
+
Builds a cache key that is unique for the combination of identifier
|
|
292
|
+
(LinkedIn URL or domain) and the optional enrichment flags.
|
|
293
|
+
"""
|
|
294
|
+
suffix_bits = [
|
|
295
|
+
f"{key}={value}"
|
|
296
|
+
for key, value in sorted(profile_flags.items())
|
|
297
|
+
if value is not None
|
|
298
|
+
]
|
|
299
|
+
if suffix_bits:
|
|
300
|
+
return f"{identifier}|{'&'.join(suffix_bits)}"
|
|
301
|
+
return identifier
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _bool_to_include_exclude(value: Optional[bool]) -> Optional[str]:
|
|
305
|
+
"""
|
|
306
|
+
Convert a boolean flag into the string literals expected by Proxycurl.
|
|
307
|
+
True -> "include", False -> "exclude", None -> None (omit parameter).
|
|
308
|
+
"""
|
|
309
|
+
if value is None:
|
|
310
|
+
return None
|
|
311
|
+
return "include" if value else "exclude"
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
@backoff.on_exception(
|
|
315
|
+
backoff.expo,
|
|
316
|
+
aiohttp.ClientResponseError,
|
|
317
|
+
max_tries=3,
|
|
318
|
+
giveup=lambda e: e.status != 429,
|
|
319
|
+
factor=10,
|
|
320
|
+
)
|
|
321
|
+
async def enrich_organization_info_from_proxycurl(
|
|
322
|
+
organization_domain: Optional[str] = None,
|
|
323
|
+
organization_linkedin_url: Optional[str] = None,
|
|
324
|
+
tool_config: Optional[List[Dict]] = None,
|
|
325
|
+
categories: Optional[bool] = None,
|
|
326
|
+
funding_data: Optional[bool] = None,
|
|
327
|
+
exit_data: Optional[bool] = None,
|
|
328
|
+
acquisitions: Optional[bool] = None,
|
|
329
|
+
extra: Optional[bool] = None,
|
|
330
|
+
use_cache: Optional[str] = "if-present",
|
|
331
|
+
fallback_to_cache: Optional[str] = "on-error",
|
|
332
|
+
) -> Dict:
|
|
333
|
+
"""
|
|
334
|
+
Fetch an organization's details from Proxycurl using either the organization domain or LinkedIn URL.
|
|
335
|
+
Additional keyword parameters map directly to the Enrichlayer Company Profile endpoint.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
organization_domain: Organization's domain name to resolve via Proxycurl.
|
|
339
|
+
organization_linkedin_url: LinkedIn company profile URL.
|
|
340
|
+
tool_config: Optional tool configuration metadata for credential lookup.
|
|
341
|
+
categories/funding_data/exit_data/acquisitions/extra: Set True to request
|
|
342
|
+
"include", False for "exclude", or None to omit.
|
|
343
|
+
use_cache: Controls Proxycurl caching behaviour (e.g. "if-present").
|
|
344
|
+
fallback_to_cache: Controls Proxycurl cache fallback behaviour (e.g. "on-error").
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
dict: Transformed JSON response containing organization information,
|
|
348
|
+
or {'error': ...} on error, or empty dict if not found.
|
|
349
|
+
"""
|
|
350
|
+
logger.info("Entering enrich_organization_info_from_proxycurl")
|
|
351
|
+
|
|
352
|
+
try:
|
|
353
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
354
|
+
except ValueError as e:
|
|
355
|
+
return {"error": str(e)}
|
|
356
|
+
|
|
357
|
+
HEADERS = {
|
|
358
|
+
'Authorization': f'Bearer {API_KEY}',
|
|
359
|
+
'Content-Type': 'application/json'
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if not organization_domain and not organization_linkedin_url:
|
|
363
|
+
logger.warning("No organization domain or LinkedIn URL provided.")
|
|
364
|
+
return {}
|
|
365
|
+
|
|
366
|
+
profile_flags: Dict[str, Optional[str]] = {
|
|
367
|
+
"categories": _bool_to_include_exclude(categories),
|
|
368
|
+
"funding_data": _bool_to_include_exclude(funding_data),
|
|
369
|
+
"exit_data": _bool_to_include_exclude(exit_data),
|
|
370
|
+
"acquisitions": _bool_to_include_exclude(acquisitions),
|
|
371
|
+
"extra": _bool_to_include_exclude(extra),
|
|
372
|
+
"use_cache": use_cache,
|
|
373
|
+
"fallback_to_cache": fallback_to_cache,
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
# If LinkedIn URL is provided, standardize it and fetch data
|
|
377
|
+
if organization_linkedin_url:
|
|
378
|
+
logger.debug(f"Organization LinkedIn URL provided: {organization_linkedin_url}")
|
|
379
|
+
if "linkedin.com/company" not in organization_linkedin_url:
|
|
380
|
+
logger.warning("Invalid LinkedIn URL provided." + organization_linkedin_url)
|
|
381
|
+
return {}
|
|
382
|
+
parsed_url = urlparse(organization_linkedin_url)
|
|
383
|
+
if parsed_url.netloc != 'www.linkedin.com':
|
|
384
|
+
standardized_netloc = 'www.linkedin.com'
|
|
385
|
+
standardized_path = parsed_url.path
|
|
386
|
+
if not standardized_path.startswith('/company/'):
|
|
387
|
+
standardized_path = '/company' + standardized_path
|
|
388
|
+
standardized_url = urlunparse(
|
|
389
|
+
parsed_url._replace(netloc=standardized_netloc, path=standardized_path)
|
|
390
|
+
)
|
|
391
|
+
if standardized_url and not standardized_url.endswith('/'):
|
|
392
|
+
standardized_url += '/'
|
|
393
|
+
else:
|
|
394
|
+
standardized_url = organization_linkedin_url
|
|
395
|
+
if standardized_url and not standardized_url.endswith('/'):
|
|
396
|
+
standardized_url += '/'
|
|
397
|
+
|
|
398
|
+
cache_key = _build_company_cache_key(standardized_url, profile_flags)
|
|
399
|
+
# Check cache for standardized LinkedIn URL
|
|
400
|
+
cached_response = retrieve_output("enrich_organization_info_from_proxycurl", cache_key)
|
|
401
|
+
if cached_response is not None:
|
|
402
|
+
logger.info(f"Cache hit for organization LinkedIn URL: {standardized_url}")
|
|
403
|
+
cached_response = transform_company_data(cached_response)
|
|
404
|
+
return cached_response
|
|
405
|
+
|
|
406
|
+
# Fetch details using standardized LinkedIn URL
|
|
407
|
+
url = 'https://enrichlayer.com/api/v2/company'
|
|
408
|
+
params = _build_company_profile_params(standardized_url, profile_flags)
|
|
409
|
+
logger.debug(f"Making request to Proxycurl with params: {params}")
|
|
410
|
+
|
|
411
|
+
async with aiohttp.ClientSession() as session:
|
|
412
|
+
try:
|
|
413
|
+
async with session.get(url, headers=HEADERS, params=params) as response:
|
|
414
|
+
logger.debug(f"Received response status: {response.status}")
|
|
415
|
+
if response.status == 200:
|
|
416
|
+
result = await response.json()
|
|
417
|
+
transformed_result = transform_company_data(result)
|
|
418
|
+
cache_output("enrich_organization_info_from_proxycurl", cache_key, transformed_result)
|
|
419
|
+
logger.info("Successfully retrieved and transformed organization info from Proxycurl by LinkedIn URL.")
|
|
420
|
+
return transformed_result
|
|
421
|
+
elif response.status == 429:
|
|
422
|
+
msg = "Rate limit exceeded"
|
|
423
|
+
logger.warning(msg)
|
|
424
|
+
await asyncio.sleep(30)
|
|
425
|
+
return {"error": msg}
|
|
426
|
+
elif response.status == 404:
|
|
427
|
+
error_text = await response.text()
|
|
428
|
+
logger.warning(
|
|
429
|
+
f"Proxycurl organization profile not found for LinkedIn URL {standardized_url}: {error_text}"
|
|
430
|
+
)
|
|
431
|
+
cache_output(
|
|
432
|
+
"enrich_organization_info_from_proxycurl", cache_key, {}
|
|
433
|
+
)
|
|
434
|
+
return {}
|
|
435
|
+
else:
|
|
436
|
+
error_text = await response.text()
|
|
437
|
+
logger.error(
|
|
438
|
+
f"Error from Proxycurl organization info fetch by URL: {error_text}"
|
|
439
|
+
)
|
|
440
|
+
return {}
|
|
441
|
+
except Exception as e:
|
|
442
|
+
logger.exception("Exception occurred while fetching organization info from Proxycurl by LinkedIn URL.")
|
|
443
|
+
return {"error": str(e)}
|
|
444
|
+
|
|
445
|
+
# If organization domain is provided, resolve domain to LinkedIn URL and fetch data
|
|
446
|
+
if organization_domain:
|
|
447
|
+
logger.debug(f"Organization domain provided: {organization_domain}")
|
|
448
|
+
domain_cache_key = _build_company_cache_key(organization_domain, profile_flags)
|
|
449
|
+
cached_response = retrieve_output("enrich_organization_info_from_proxycurl", domain_cache_key)
|
|
450
|
+
if cached_response is not None:
|
|
451
|
+
logger.info(f"Cache hit for organization domain: {organization_domain}")
|
|
452
|
+
return cached_response
|
|
453
|
+
|
|
454
|
+
resolve_url = 'https://enrichlayer.com/api/v2/company/resolve'
|
|
455
|
+
params = {'domain': organization_domain}
|
|
456
|
+
logger.debug(f"Making request to Proxycurl to resolve domain with params: {params}")
|
|
457
|
+
|
|
458
|
+
async with aiohttp.ClientSession() as session:
|
|
459
|
+
try:
|
|
460
|
+
async with session.get(resolve_url, headers=HEADERS, params=params) as response:
|
|
461
|
+
logger.debug(f"Received response status: {response.status}")
|
|
462
|
+
if response.status == 200:
|
|
463
|
+
company_data = await response.json()
|
|
464
|
+
company_url = company_data.get('url')
|
|
465
|
+
if company_url:
|
|
466
|
+
parsed_url = urlparse(company_url)
|
|
467
|
+
if parsed_url.netloc != 'www.linkedin.com':
|
|
468
|
+
standardized_netloc = 'www.linkedin.com'
|
|
469
|
+
standardized_path = parsed_url.path
|
|
470
|
+
if not standardized_path.startswith('/company/'):
|
|
471
|
+
standardized_path = '/company' + standardized_path
|
|
472
|
+
standardized_url = urlunparse(
|
|
473
|
+
parsed_url._replace(netloc=standardized_netloc, path=standardized_path)
|
|
474
|
+
)
|
|
475
|
+
else:
|
|
476
|
+
standardized_url = company_url
|
|
477
|
+
|
|
478
|
+
profile_url = 'https://enrichlayer.com/api/v2/company'
|
|
479
|
+
try:
|
|
480
|
+
profile_params = _build_company_profile_params(standardized_url, profile_flags)
|
|
481
|
+
async with session.get(profile_url, headers=HEADERS, params=profile_params) as profile_response:
|
|
482
|
+
logger.debug(f"Received profile response status: {profile_response.status}")
|
|
483
|
+
if profile_response.status == 200:
|
|
484
|
+
result = await profile_response.json()
|
|
485
|
+
transformed_result = transform_company_data(result)
|
|
486
|
+
cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, transformed_result)
|
|
487
|
+
logger.info("Successfully retrieved and transformed organization info from Proxycurl by domain.")
|
|
488
|
+
return transformed_result
|
|
489
|
+
elif profile_response.status == 429:
|
|
490
|
+
msg = "Rate limit exceeded"
|
|
491
|
+
logger.warning(msg)
|
|
492
|
+
await asyncio.sleep(30)
|
|
493
|
+
return {"error": msg}
|
|
494
|
+
else:
|
|
495
|
+
error_text = await profile_response.text()
|
|
496
|
+
logger.error(f"Error from Proxycurl organization profile fetch by resolved domain: {error_text}")
|
|
497
|
+
return {}
|
|
498
|
+
except Exception as e:
|
|
499
|
+
logger.exception("Exception occurred while fetching organization profile data.")
|
|
500
|
+
return {"error": str(e)}
|
|
501
|
+
else:
|
|
502
|
+
logger.warning("Company URL not found for the provided domain.")
|
|
503
|
+
return {}
|
|
504
|
+
elif response.status == 429:
|
|
505
|
+
msg = "Rate limit exceeded"
|
|
506
|
+
logger.warning(msg)
|
|
507
|
+
await asyncio.sleep(30)
|
|
508
|
+
return {"error": msg}
|
|
509
|
+
elif response.status == 404:
|
|
510
|
+
msg = "Item not found"
|
|
511
|
+
logger.warning(msg)
|
|
512
|
+
cache_output("enrich_organization_info_from_proxycurl", domain_cache_key, {})
|
|
513
|
+
return {}
|
|
514
|
+
else:
|
|
515
|
+
error_text = await response.text()
|
|
516
|
+
logger.error(f"Error from Proxycurl domain resolve: {error_text}")
|
|
517
|
+
return {}
|
|
518
|
+
except Exception as e:
|
|
519
|
+
logger.exception("Exception occurred while resolving organization domain on Proxycurl.")
|
|
520
|
+
return {"error": str(e)}
|
|
521
|
+
|
|
522
|
+
return {}
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
@assistant_tool
|
|
526
|
+
@backoff.on_exception(
|
|
527
|
+
backoff.expo,
|
|
528
|
+
aiohttp.ClientResponseError,
|
|
529
|
+
max_tries=3,
|
|
530
|
+
giveup=lambda e: e.status != 429,
|
|
531
|
+
factor=10,
|
|
532
|
+
)
|
|
533
|
+
async def enrich_job_info_from_proxycurl(
|
|
534
|
+
job_url: Optional[str] = None,
|
|
535
|
+
tool_config: Optional[List[Dict]] = None
|
|
536
|
+
) -> Dict:
|
|
537
|
+
"""
|
|
538
|
+
Fetch a job's details from Proxycurl using the job URL.
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
dict: JSON response containing job information or error.
|
|
542
|
+
"""
|
|
543
|
+
logger.info("Entering enrich_job_info_from_proxycurl")
|
|
544
|
+
|
|
545
|
+
try:
|
|
546
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
547
|
+
except ValueError as e:
|
|
548
|
+
return {"error": str(e)}
|
|
549
|
+
|
|
550
|
+
HEADERS = {
|
|
551
|
+
'Authorization': f'Bearer {API_KEY}',
|
|
552
|
+
'Content-Type': 'application/json'
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
if not job_url:
|
|
556
|
+
logger.warning("No job URL provided.")
|
|
557
|
+
return {'error': "Job URL must be provided"}
|
|
558
|
+
|
|
559
|
+
# Check cache
|
|
560
|
+
cached_response = retrieve_output("enrich_job_info_from_proxycurl", job_url)
|
|
561
|
+
if cached_response is not None:
|
|
562
|
+
logger.info(f"Cache hit for job URL: {job_url}")
|
|
563
|
+
return cached_response
|
|
564
|
+
|
|
565
|
+
params = {'url': job_url}
|
|
566
|
+
api_endpoint = 'https://enrichlayer.com/api/v2/job'
|
|
567
|
+
logger.debug(f"Making request to Proxycurl for job info with params: {params}")
|
|
568
|
+
|
|
569
|
+
async with aiohttp.ClientSession() as session:
|
|
570
|
+
try:
|
|
571
|
+
async with session.get(api_endpoint, headers=HEADERS, params=params) as response:
|
|
572
|
+
logger.debug(f"Received response status: {response.status}")
|
|
573
|
+
if response.status == 200:
|
|
574
|
+
result = await response.json()
|
|
575
|
+
cache_output("enrich_job_info_from_proxycurl", job_url, result)
|
|
576
|
+
logger.info("Successfully retrieved job info from Proxycurl.")
|
|
577
|
+
return result
|
|
578
|
+
elif response.status == 429:
|
|
579
|
+
msg = "Rate limit exceeded"
|
|
580
|
+
logger.warning(msg)
|
|
581
|
+
await asyncio.sleep(30)
|
|
582
|
+
return {'error': msg}
|
|
583
|
+
elif response.status == 404:
|
|
584
|
+
msg = "Job not found"
|
|
585
|
+
logger.warning(msg)
|
|
586
|
+
cache_output("enrich_job_info_from_proxycurl", job_url, {'error': msg})
|
|
587
|
+
return {'error': msg}
|
|
588
|
+
else:
|
|
589
|
+
error_text = await response.text()
|
|
590
|
+
logger.error(f"Error from Proxycurl: {error_text}")
|
|
591
|
+
return {'error': error_text}
|
|
592
|
+
except Exception as e:
|
|
593
|
+
logger.exception("Exception occurred while fetching job info from Proxycurl.")
|
|
594
|
+
return {"error": str(e)}
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
@assistant_tool
|
|
598
|
+
@backoff.on_exception(
|
|
599
|
+
backoff.expo,
|
|
600
|
+
aiohttp.ClientResponseError,
|
|
601
|
+
max_tries=3,
|
|
602
|
+
giveup=lambda e: e.status != 429,
|
|
603
|
+
factor=10,
|
|
604
|
+
)
|
|
605
|
+
async def search_recent_job_changes(
|
|
606
|
+
job_titles: List[str],
|
|
607
|
+
locations: List[str],
|
|
608
|
+
max_items_to_return: int = 100,
|
|
609
|
+
tool_config: Optional[List[Dict]] = None
|
|
610
|
+
) -> List[dict]:
|
|
611
|
+
"""
|
|
612
|
+
Search for individuals with specified job titles and locations who have recently changed jobs.
|
|
613
|
+
|
|
614
|
+
Returns:
|
|
615
|
+
List[dict]: List of individuals matching the criteria, or empty list on failure/error.
|
|
616
|
+
"""
|
|
617
|
+
logger.info("Entering search_recent_job_changes")
|
|
618
|
+
|
|
619
|
+
try:
|
|
620
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
621
|
+
except ValueError as e:
|
|
622
|
+
logger.error(str(e))
|
|
623
|
+
return []
|
|
624
|
+
|
|
625
|
+
HEADERS = {
|
|
626
|
+
'Authorization': f'Bearer {API_KEY}',
|
|
627
|
+
'Content-Type': 'application/json'
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
url = 'https://enrichlayer.com/api/v2/search/person'
|
|
631
|
+
results = []
|
|
632
|
+
page = 1
|
|
633
|
+
per_page = min(max_items_to_return, 100)
|
|
634
|
+
|
|
635
|
+
logger.debug(f"Starting search with job_titles={job_titles}, locations={locations}, max_items={max_items_to_return}")
|
|
636
|
+
|
|
637
|
+
async with aiohttp.ClientSession() as session:
|
|
638
|
+
while len(results) < max_items_to_return:
|
|
639
|
+
params = {
|
|
640
|
+
'job_title': ','.join(job_titles),
|
|
641
|
+
'location': ','.join(locations),
|
|
642
|
+
'page': page,
|
|
643
|
+
'num_records': per_page
|
|
644
|
+
}
|
|
645
|
+
logger.debug(f"Request params: {params}")
|
|
646
|
+
|
|
647
|
+
try:
|
|
648
|
+
async with session.get(url, headers=HEADERS, params=params) as response:
|
|
649
|
+
logger.debug(f"Received response status: {response.status}")
|
|
650
|
+
if response.status == 200:
|
|
651
|
+
data = await response.json()
|
|
652
|
+
people = data.get('persons', [])
|
|
653
|
+
if not people:
|
|
654
|
+
logger.info("No more people found, ending search.")
|
|
655
|
+
break
|
|
656
|
+
results.extend(people)
|
|
657
|
+
logger.info(f"Fetched {len(people)} results on page {page}. Total so far: {len(results)}")
|
|
658
|
+
page += 1
|
|
659
|
+
if len(results) >= max_items_to_return:
|
|
660
|
+
logger.info("Reached max items limit.")
|
|
661
|
+
break
|
|
662
|
+
elif response.status == 429:
|
|
663
|
+
msg = "Rate limit exceeded"
|
|
664
|
+
logger.warning(msg)
|
|
665
|
+
await asyncio.sleep(30)
|
|
666
|
+
# Without raising, won't trigger another backoff retry
|
|
667
|
+
# so just continue or break as desired:
|
|
668
|
+
continue
|
|
669
|
+
else:
|
|
670
|
+
error_text = await response.text()
|
|
671
|
+
logger.error(f"Error while searching recent job changes: {error_text}")
|
|
672
|
+
break
|
|
673
|
+
except Exception:
|
|
674
|
+
logger.exception("Exception occurred while searching recent job changes.")
|
|
675
|
+
break
|
|
676
|
+
|
|
677
|
+
return results[:max_items_to_return]
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
@assistant_tool
|
|
681
|
+
async def find_matching_job_posting_proxy_curl(
|
|
682
|
+
company_name: str,
|
|
683
|
+
keywords_check: List[str],
|
|
684
|
+
optional_keywords: List[str],
|
|
685
|
+
organization_linkedin_url: Optional[str] = None,
|
|
686
|
+
tool_config: Optional[List[Dict]] = None
|
|
687
|
+
) -> List[str]:
|
|
688
|
+
"""
|
|
689
|
+
Find job postings on LinkedIn for a given company using Google Custom Search,
|
|
690
|
+
then optionally validate those links with Proxycurl.
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
List[str]: A list of matching job posting links.
|
|
694
|
+
"""
|
|
695
|
+
logger.info("Entering find_matching_job_posting_proxy_curl")
|
|
696
|
+
|
|
697
|
+
if not company_name:
|
|
698
|
+
logger.warning("No company name provided.")
|
|
699
|
+
return []
|
|
700
|
+
|
|
701
|
+
if not keywords_check:
|
|
702
|
+
logger.warning("No keywords_check provided, defaulting to an empty list.")
|
|
703
|
+
keywords_check = []
|
|
704
|
+
|
|
705
|
+
if not optional_keywords:
|
|
706
|
+
logger.warning("No optional_keywords provided, defaulting to an empty list.")
|
|
707
|
+
optional_keywords = []
|
|
708
|
+
|
|
709
|
+
keywords_list = [kw.strip().lower() for kw in keywords_check]
|
|
710
|
+
job_posting_links = []
|
|
711
|
+
|
|
712
|
+
# Build the search query
|
|
713
|
+
keywords_str = ' '.join(f'"{kw}"' for kw in keywords_check)
|
|
714
|
+
optional_keywords_str = ' '.join(f'{kw}' for kw in optional_keywords)
|
|
715
|
+
query = f'site:*linkedin.com/jobs/view/ "{company_name}" {keywords_str} {optional_keywords_str}'
|
|
716
|
+
logger.debug(f"Google search query: {query}")
|
|
717
|
+
|
|
718
|
+
# First Google search attempt
|
|
719
|
+
results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
|
|
720
|
+
if not isinstance(results, list) or len(results) == 0:
|
|
721
|
+
logger.info("No results found. Attempting fallback query without optional keywords.")
|
|
722
|
+
query = f'site:*linkedin.com/jobs/view/ "{company_name}" {keywords_str}'
|
|
723
|
+
results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
|
|
724
|
+
if not isinstance(results, list) or len(results) == 0:
|
|
725
|
+
logger.info("No job postings found in fallback search either.")
|
|
726
|
+
return job_posting_links
|
|
727
|
+
|
|
728
|
+
# Process each search result
|
|
729
|
+
for result_item in results:
|
|
730
|
+
try:
|
|
731
|
+
result_json = json.loads(result_item)
|
|
732
|
+
except json.JSONDecodeError:
|
|
733
|
+
logger.debug("Skipping invalid JSON result.")
|
|
734
|
+
continue
|
|
735
|
+
|
|
736
|
+
link = result_json.get('link', '')
|
|
737
|
+
if not link:
|
|
738
|
+
logger.debug("No link in result; skipping.")
|
|
739
|
+
continue
|
|
740
|
+
|
|
741
|
+
if "linkedin.com/jobs/view/" not in link:
|
|
742
|
+
logger.debug("Link is not a LinkedIn job posting; skipping.")
|
|
743
|
+
continue
|
|
744
|
+
|
|
745
|
+
# Normalize the LinkedIn domain to www.linkedin.com
|
|
746
|
+
parsed = urlparse(link)
|
|
747
|
+
new_link = parsed._replace(netloc="www.linkedin.com").geturl()
|
|
748
|
+
link = new_link
|
|
749
|
+
|
|
750
|
+
# Use Proxycurl to enrich job info
|
|
751
|
+
logger.debug(f"Fetching job info from Proxycurl for link: {link}")
|
|
752
|
+
json_result = await enrich_job_info_from_proxycurl(link, tool_config=tool_config)
|
|
753
|
+
if not json_result or 'error' in json_result:
|
|
754
|
+
logger.debug("No valid job info returned; skipping.")
|
|
755
|
+
continue
|
|
756
|
+
|
|
757
|
+
text = json.dumps(json_result).lower()
|
|
758
|
+
|
|
759
|
+
# If the user gave an organization_linkedin_url, check if it matches
|
|
760
|
+
company_match = False
|
|
761
|
+
if organization_linkedin_url and json_result.get('company', {}):
|
|
762
|
+
result_url = json_result.get('company', {}).get('url', '').lower()
|
|
763
|
+
result_path = urlparse(result_url).path
|
|
764
|
+
company_path = urlparse(organization_linkedin_url.lower()).path
|
|
765
|
+
company_match = (result_path == company_path)
|
|
766
|
+
else:
|
|
767
|
+
company_match = False
|
|
768
|
+
|
|
769
|
+
keywords_found = any(kw in text for kw in keywords_list)
|
|
770
|
+
|
|
771
|
+
# If company matches and keywords are found, add to results
|
|
772
|
+
if company_match and keywords_found:
|
|
773
|
+
job_posting_links.append(link)
|
|
774
|
+
|
|
775
|
+
logger.info(f"Found {len(job_posting_links)} matching job postings.")
|
|
776
|
+
return job_posting_links
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
def fill_in_missing_properties(input_user_properties: dict, person_data: dict) -> dict:
|
|
780
|
+
"""
|
|
781
|
+
If input_user_properties has a non-empty value for a field, keep it.
|
|
782
|
+
Otherwise, use that field from person_data.
|
|
783
|
+
"""
|
|
784
|
+
|
|
785
|
+
def is_empty(value):
|
|
786
|
+
# Checks for None, empty string, or string with only whitespace
|
|
787
|
+
return value is None or (isinstance(value, str) and not value.strip())
|
|
788
|
+
|
|
789
|
+
# Email - use first personal email if input is empty
|
|
790
|
+
if is_empty(input_user_properties.get("email")):
|
|
791
|
+
personal_emails = person_data.get("personal_emails")
|
|
792
|
+
if isinstance(personal_emails, list) and personal_emails:
|
|
793
|
+
input_user_properties["email"] = personal_emails[0]
|
|
794
|
+
|
|
795
|
+
# Phone
|
|
796
|
+
if is_empty(input_user_properties.get("phone")):
|
|
797
|
+
input_user_properties["phone"] = person_data.get("contact", {}).get("sanitized_phone", "")
|
|
798
|
+
|
|
799
|
+
# Full name
|
|
800
|
+
if person_data.get("full_name"):
|
|
801
|
+
input_user_properties["full_name"] = person_data["full_name"]
|
|
802
|
+
|
|
803
|
+
# First name
|
|
804
|
+
if person_data.get("first_name"):
|
|
805
|
+
input_user_properties["first_name"] = person_data["first_name"]
|
|
806
|
+
|
|
807
|
+
# Last name
|
|
808
|
+
if person_data.get("last_name"):
|
|
809
|
+
input_user_properties["last_name"] = person_data["last_name"]
|
|
810
|
+
|
|
811
|
+
# Occupation -> job_title
|
|
812
|
+
if person_data.get("occupation"):
|
|
813
|
+
input_user_properties["job_title"] = person_data["occupation"]
|
|
814
|
+
|
|
815
|
+
# Headline
|
|
816
|
+
if person_data.get("headline"):
|
|
817
|
+
input_user_properties["headline"] = person_data["headline"]
|
|
818
|
+
|
|
819
|
+
# Summary
|
|
820
|
+
if is_empty(input_user_properties.get("summary_about_lead")) and person_data.get("summary"):
|
|
821
|
+
input_user_properties["summary_about_lead"] = person_data["summary"]
|
|
822
|
+
|
|
823
|
+
# Experiences
|
|
824
|
+
experiences = person_data.get("experiences", [])
|
|
825
|
+
if experiences:
|
|
826
|
+
# Current role data
|
|
827
|
+
|
|
828
|
+
input_user_properties["organization_name"] = experiences[0].get("company", "")
|
|
829
|
+
|
|
830
|
+
org_url = experiences[0].get("company_linkedin_profile_url", "")
|
|
831
|
+
if org_url and is_empty(input_user_properties.get("organization_linkedin_url")):
|
|
832
|
+
input_user_properties["organization_linkedin_url"] = org_url
|
|
833
|
+
|
|
834
|
+
# If there's a second experience, track it as previous
|
|
835
|
+
if len(experiences) > 1:
|
|
836
|
+
previous_org = experiences[1]
|
|
837
|
+
prev_org_url = previous_org.get("company_linkedin_profile_url", "")
|
|
838
|
+
|
|
839
|
+
if prev_org_url and is_empty(input_user_properties.get("previous_organization_linkedin_url")):
|
|
840
|
+
input_user_properties["previous_organization_linkedin_url"] = prev_org_url
|
|
841
|
+
|
|
842
|
+
if is_empty(input_user_properties.get("previous_organization_name")):
|
|
843
|
+
input_user_properties["previous_organization_name"] = previous_org.get("company", "")
|
|
844
|
+
|
|
845
|
+
# Combine city/state if available (and if lead_location is empty); avoid literal "None"
|
|
846
|
+
if is_empty(input_user_properties.get("lead_location")):
|
|
847
|
+
city = person_data.get("city")
|
|
848
|
+
state = person_data.get("state")
|
|
849
|
+
parts = []
|
|
850
|
+
for value in (city, state):
|
|
851
|
+
if value is None:
|
|
852
|
+
continue
|
|
853
|
+
s = str(value).strip()
|
|
854
|
+
if not s or s.lower() == "none":
|
|
855
|
+
continue
|
|
856
|
+
parts.append(s)
|
|
857
|
+
if parts:
|
|
858
|
+
input_user_properties["lead_location"] = ", ".join(parts)
|
|
859
|
+
|
|
860
|
+
# LinkedIn Followers Count
|
|
861
|
+
if is_empty(input_user_properties.get("linkedin_follower_count")):
|
|
862
|
+
input_user_properties["linkedin_follower_count"] = person_data.get("follower_count", 0)
|
|
863
|
+
|
|
864
|
+
return input_user_properties
|
|
865
|
+
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
async def enrich_user_info_with_proxy_curl(input_user_properties: dict, tool_config: Optional[List[Dict]] = None) -> dict:
|
|
869
|
+
"""
|
|
870
|
+
Enriches the user info (input_user_properties) with data from Proxycurl.
|
|
871
|
+
If the user_linkedin_url is determined to be a proxy (acw* and length > 10),
|
|
872
|
+
we skip calling enrich_person_info_from_proxycurl, keep the input as-is,
|
|
873
|
+
and only perform the organization enrichment logic.
|
|
874
|
+
|
|
875
|
+
Returns:
|
|
876
|
+
dict: Updated input_user_properties with enriched data or
|
|
877
|
+
with an error field if something goes wrong.
|
|
878
|
+
"""
|
|
879
|
+
logger.info("Entering enrich_user_info_with_proxy_curl")
|
|
880
|
+
|
|
881
|
+
if not input_user_properties:
|
|
882
|
+
logger.warning("No input_user_properties provided; returning empty dict.")
|
|
883
|
+
return {}
|
|
884
|
+
|
|
885
|
+
linkedin_url = input_user_properties.get("user_linkedin_url", "")
|
|
886
|
+
email = input_user_properties.get("email", "")
|
|
887
|
+
user_data_from_proxycurl = None
|
|
888
|
+
|
|
889
|
+
logger.debug(f"Attempting to enrich data for LinkedIn URL='{linkedin_url}', Email='{email}'")
|
|
890
|
+
|
|
891
|
+
# ---------------------------------------------------------------
|
|
892
|
+
# 1) Detect if the LinkedIn URL is a "proxy" URL (acw + length > 10)
|
|
893
|
+
# ---------------------------------------------------------------
|
|
894
|
+
def is_proxy_linkedin_url(url: str) -> bool:
|
|
895
|
+
"""
|
|
896
|
+
Checks if the LinkedIn URL has an /in/<profile_id> path
|
|
897
|
+
that starts with 'acw' and has length > 10, indicating a proxy.
|
|
898
|
+
"""
|
|
899
|
+
match = re.search(r"linkedin\.com/in/([^/]+)", url, re.IGNORECASE)
|
|
900
|
+
if match:
|
|
901
|
+
profile_id = match.group(1)
|
|
902
|
+
if profile_id.startswith("acw") and len(profile_id) > 10:
|
|
903
|
+
return True
|
|
904
|
+
return False
|
|
905
|
+
|
|
906
|
+
if is_proxy_linkedin_url(linkedin_url):
|
|
907
|
+
logger.info("The LinkedIn URL appears to be a proxy URL. Skipping user data enrichment from Proxycurl.")
|
|
908
|
+
# We do NOT call enrich_person_info_from_proxycurl for user data.
|
|
909
|
+
# We just set linkedin_url_match = False and enrich organization info if possible:
|
|
910
|
+
input_user_properties["linkedin_url_match"] = False
|
|
911
|
+
|
|
912
|
+
# Attempt organization enrichment if we have an organization_linkedin_url:
|
|
913
|
+
company_data = {}
|
|
914
|
+
if input_user_properties.get("organization_linkedin_url"):
|
|
915
|
+
company_data = await enrich_organization_info_from_proxycurl(
|
|
916
|
+
organization_linkedin_url=input_user_properties["organization_linkedin_url"],
|
|
917
|
+
tool_config=tool_config
|
|
918
|
+
)
|
|
919
|
+
if company_data and not company_data.get("error"):
|
|
920
|
+
if company_data.get("organization_linkedin_url"):
|
|
921
|
+
input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
|
|
922
|
+
if company_data.get("organization_name"):
|
|
923
|
+
input_user_properties["organization_name"] = company_data.get("organization_name", "")
|
|
924
|
+
input_user_properties["organization_size"] = str(
|
|
925
|
+
company_data.get("company_size_on_linkedin", "")
|
|
926
|
+
)
|
|
927
|
+
input_user_properties["company_size"] = str(
|
|
928
|
+
company_data.get("company_size_on_linkedin", "")
|
|
929
|
+
)
|
|
930
|
+
input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
|
|
931
|
+
input_user_properties["industry"] = company_data.get("organization_industry", "")
|
|
932
|
+
input_user_properties["organization_revenue"] = ""
|
|
933
|
+
|
|
934
|
+
# Always clean & store any returned org info:
|
|
935
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
936
|
+
company_data = cleanup_properties(company_data)
|
|
937
|
+
additional_props["pc_company_data"] = json.dumps(company_data)
|
|
938
|
+
input_user_properties["additional_properties"] = additional_props
|
|
939
|
+
|
|
940
|
+
logger.info("Returning after skipping user enrichment for proxy URL.")
|
|
941
|
+
return input_user_properties
|
|
942
|
+
|
|
943
|
+
# ----------------------------------------------------------------
|
|
944
|
+
# 2) If not proxy, proceed with normal user enrichment logic
|
|
945
|
+
# ----------------------------------------------------------------
|
|
946
|
+
if linkedin_url or email:
|
|
947
|
+
user_data = await enrich_person_info_from_proxycurl(
|
|
948
|
+
linkedin_url=linkedin_url,
|
|
949
|
+
email=email,
|
|
950
|
+
tool_config=tool_config
|
|
951
|
+
)
|
|
952
|
+
if not user_data or 'error' in user_data:
|
|
953
|
+
logger.warning("No valid person data found by LinkedIn or email.")
|
|
954
|
+
else:
|
|
955
|
+
user_data_from_proxycurl = user_data
|
|
956
|
+
if linkedin_url:
|
|
957
|
+
logger.info(f"User data found for LinkedIn URL: {linkedin_url}")
|
|
958
|
+
input_user_properties["user_linkedin_url"] = linkedin_url
|
|
959
|
+
else:
|
|
960
|
+
# Otherwise, fallback to name-based lookup
|
|
961
|
+
first_name = input_user_properties.get("first_name", "")
|
|
962
|
+
last_name = input_user_properties.get("last_name", "")
|
|
963
|
+
full_name = input_user_properties.get("full_name", "")
|
|
964
|
+
|
|
965
|
+
if not first_name or not last_name:
|
|
966
|
+
if full_name:
|
|
967
|
+
name_parts = full_name.split(" ", 1)
|
|
968
|
+
first_name = first_name or name_parts[0]
|
|
969
|
+
if len(name_parts) > 1:
|
|
970
|
+
last_name = last_name or name_parts[1]
|
|
971
|
+
|
|
972
|
+
if not full_name:
|
|
973
|
+
full_name = f"{first_name} {last_name}".strip()
|
|
974
|
+
|
|
975
|
+
company = input_user_properties.get("organization_name", "")
|
|
976
|
+
logger.debug(f"Looking up person by name: {first_name} {last_name}, company: {company}")
|
|
977
|
+
|
|
978
|
+
if first_name and last_name:
|
|
979
|
+
lookup_result = await lookup_person_in_proxy_curl_by_name(
|
|
980
|
+
first_name=first_name,
|
|
981
|
+
last_name=last_name,
|
|
982
|
+
company_name=company,
|
|
983
|
+
tool_config=tool_config
|
|
984
|
+
)
|
|
985
|
+
# Expecting a dict (search_result)
|
|
986
|
+
if lookup_result and not lookup_result.get('error'):
|
|
987
|
+
results = lookup_result.get("results", [])
|
|
988
|
+
person_company = ""
|
|
989
|
+
for person in results:
|
|
990
|
+
linkedin_profile_url = person.get("linkedin_profile_url", "")
|
|
991
|
+
if linkedin_profile_url:
|
|
992
|
+
data_from_proxycurl = await enrich_person_info_from_proxycurl(
|
|
993
|
+
linkedin_url=linkedin_profile_url,
|
|
994
|
+
tool_config=tool_config
|
|
995
|
+
)
|
|
996
|
+
if data_from_proxycurl and not data_from_proxycurl.get('error'):
|
|
997
|
+
person_name = data_from_proxycurl.get("name", "").lower()
|
|
998
|
+
person_first_name = data_from_proxycurl.get("first_name", "").lower()
|
|
999
|
+
person_last_name = data_from_proxycurl.get("last_name", "").lower()
|
|
1000
|
+
experiences = data_from_proxycurl.get('experiences', [])
|
|
1001
|
+
for exp in experiences:
|
|
1002
|
+
exp_company = exp.get("company", "").lower()
|
|
1003
|
+
if exp_company == company.lower():
|
|
1004
|
+
person_company = exp_company
|
|
1005
|
+
break
|
|
1006
|
+
|
|
1007
|
+
if (
|
|
1008
|
+
(person_name == full_name.lower() or
|
|
1009
|
+
(person_first_name == first_name.lower() and person_last_name == last_name.lower()))
|
|
1010
|
+
and (not company or person_company == company.lower())
|
|
1011
|
+
):
|
|
1012
|
+
logger.info(f"User data found for name: {full_name}")
|
|
1013
|
+
input_user_properties["user_linkedin_url"] = linkedin_profile_url
|
|
1014
|
+
user_data_from_proxycurl = data_from_proxycurl
|
|
1015
|
+
break
|
|
1016
|
+
|
|
1017
|
+
if not user_data_from_proxycurl:
|
|
1018
|
+
logger.debug("No user data returned from Proxycurl.")
|
|
1019
|
+
input_user_properties["linkedin_url_match"] = False
|
|
1020
|
+
return input_user_properties
|
|
1021
|
+
|
|
1022
|
+
# ------------------------------------------------------------------
|
|
1023
|
+
# 3) If user data was found, sanitize & fill user properties
|
|
1024
|
+
# ------------------------------------------------------------------
|
|
1025
|
+
url_pattern = re.compile(r'(https?://[^\s]+)', re.IGNORECASE)
|
|
1026
|
+
|
|
1027
|
+
def sanitize_urls_in_data(data):
|
|
1028
|
+
"""
|
|
1029
|
+
Recursively walk through 'data' and remove any URL that is not under linkedin.com domain.
|
|
1030
|
+
"""
|
|
1031
|
+
if isinstance(data, dict):
|
|
1032
|
+
sanitized = {}
|
|
1033
|
+
for k, v in data.items():
|
|
1034
|
+
sanitized[k] = sanitize_urls_in_data(v)
|
|
1035
|
+
return sanitized
|
|
1036
|
+
elif isinstance(data, list):
|
|
1037
|
+
return [sanitize_urls_in_data(item) for item in data]
|
|
1038
|
+
elif isinstance(data, str):
|
|
1039
|
+
def replace_non_linkedin(match):
|
|
1040
|
+
link = match.group(1)
|
|
1041
|
+
if "linkedin.com" not in (urlparse(link).netloc or ""):
|
|
1042
|
+
return ""
|
|
1043
|
+
return link
|
|
1044
|
+
return re.sub(url_pattern, replace_non_linkedin, data)
|
|
1045
|
+
return data
|
|
1046
|
+
|
|
1047
|
+
person_data = sanitize_urls_in_data(user_data_from_proxycurl)
|
|
1048
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
1049
|
+
|
|
1050
|
+
# Check if there's a match on first/last name
|
|
1051
|
+
first_matched = (
|
|
1052
|
+
input_user_properties.get("first_name")
|
|
1053
|
+
and person_data.get("first_name") == input_user_properties["first_name"]
|
|
1054
|
+
)
|
|
1055
|
+
last_matched = (
|
|
1056
|
+
input_user_properties.get("last_name")
|
|
1057
|
+
and person_data.get("last_name") == input_user_properties["last_name"]
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
if first_matched and last_matched:
|
|
1061
|
+
input_user_properties["linkedin_url_match"] = True
|
|
1062
|
+
input_user_properties["linkedin_validation_status"] = "valid"
|
|
1063
|
+
|
|
1064
|
+
input_user_properties = fill_in_missing_properties(input_user_properties, person_data)
|
|
1065
|
+
|
|
1066
|
+
# ------------------------------------------------------------------
|
|
1067
|
+
# 4) Attempt organization enrichment if we have an org LinkedIn URL
|
|
1068
|
+
# ------------------------------------------------------------------
|
|
1069
|
+
company_data = {}
|
|
1070
|
+
if input_user_properties.get("organization_linkedin_url"):
|
|
1071
|
+
company_data = await enrich_organization_info_from_proxycurl(
|
|
1072
|
+
organization_linkedin_url=input_user_properties["organization_linkedin_url"],
|
|
1073
|
+
tool_config=tool_config
|
|
1074
|
+
)
|
|
1075
|
+
if company_data and not company_data.get("error"):
|
|
1076
|
+
if company_data.get("organization_linkedin_url"):
|
|
1077
|
+
input_user_properties["organization_linkedin_url"] = company_data.get("organization_linkedin_url", "")
|
|
1078
|
+
if company_data.get("organization_name"):
|
|
1079
|
+
input_user_properties["organization_name"] = company_data.get("organization_name", "")
|
|
1080
|
+
input_user_properties["organization_size"] = str(
|
|
1081
|
+
company_data.get("company_size_on_linkedin", "")
|
|
1082
|
+
)
|
|
1083
|
+
input_user_properties["company_size"] = str(
|
|
1084
|
+
company_data.get("company_size_on_linkedin", "")
|
|
1085
|
+
)
|
|
1086
|
+
input_user_properties["company_size_list"] = company_data.get("company_size", "")
|
|
1087
|
+
input_user_properties["organization_industry"] = company_data.get("organization_industry", "")
|
|
1088
|
+
input_user_properties["industry"] = company_data.get("organization_industry", "")
|
|
1089
|
+
input_user_properties["organization_revenue"] = ""
|
|
1090
|
+
|
|
1091
|
+
person_data = cleanup_properties(person_data)
|
|
1092
|
+
additional_props["pc_person_data"] = json.dumps(person_data)
|
|
1093
|
+
|
|
1094
|
+
company_data = cleanup_properties(company_data)
|
|
1095
|
+
additional_props["pc_company_data"] = json.dumps(company_data)
|
|
1096
|
+
input_user_properties["additional_properties"] = additional_props
|
|
1097
|
+
|
|
1098
|
+
logger.info("Enrichment of user info with Proxycurl complete.")
|
|
1099
|
+
return input_user_properties
|
|
1100
|
+
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
@assistant_tool
|
|
1106
|
+
async def find_leads_by_job_openings_proxy_curl(
|
|
1107
|
+
query_params: Dict[str, Any],
|
|
1108
|
+
hiring_manager_roles: List[str],
|
|
1109
|
+
tool_config: Optional[List[Dict]] = None,
|
|
1110
|
+
) -> List[Dict]:
|
|
1111
|
+
"""Search LinkedIn job postings using Proxycurl and find hiring manager leads.
|
|
1112
|
+
|
|
1113
|
+
Args:
|
|
1114
|
+
query_params: Dictionary of parameters to Proxycurl job search API. The
|
|
1115
|
+
key ``job_title`` is required. Other keys like ``location`` may also
|
|
1116
|
+
be supplied.
|
|
1117
|
+
hiring_manager_roles: List of job titles to lookup at the company for
|
|
1118
|
+
potential hiring managers.
|
|
1119
|
+
tool_config: Optional configuration containing Proxycurl credentials.
|
|
1120
|
+
|
|
1121
|
+
Returns:
|
|
1122
|
+
A list of lead dictionaries with normalized keys such as
|
|
1123
|
+
``first_name``, ``last_name``, ``user_linkedin_url``,
|
|
1124
|
+
``organization_name``, and ``organization_linkedin_url``.
|
|
1125
|
+
"""
|
|
1126
|
+
logger.info("Entering find_leads_by_job_openings_proxy_curl")
|
|
1127
|
+
|
|
1128
|
+
if not isinstance(query_params, dict) or not query_params.get("job_title"):
|
|
1129
|
+
logger.warning("query_params must include 'job_title'")
|
|
1130
|
+
return []
|
|
1131
|
+
|
|
1132
|
+
try:
|
|
1133
|
+
API_KEY = get_proxycurl_access_token(tool_config)
|
|
1134
|
+
except ValueError as e:
|
|
1135
|
+
logger.error(str(e))
|
|
1136
|
+
return []
|
|
1137
|
+
|
|
1138
|
+
headers = {
|
|
1139
|
+
"Authorization": f"Bearer {API_KEY}",
|
|
1140
|
+
"Content-Type": "application/json",
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
job_search_url = "https://enrichlayer.com/api/v2/company/job"
|
|
1144
|
+
leads: List[Dict] = []
|
|
1145
|
+
|
|
1146
|
+
# ------------------------------------------------------------------
|
|
1147
|
+
# 1) Look up job openings
|
|
1148
|
+
# ------------------------------------------------------------------
|
|
1149
|
+
try:
|
|
1150
|
+
async with aiohttp.ClientSession() as session:
|
|
1151
|
+
async with session.get(job_search_url, headers=headers, params=query_params) as resp:
|
|
1152
|
+
if resp.status == 200:
|
|
1153
|
+
job_result = await resp.json()
|
|
1154
|
+
jobs = job_result.get("results") or job_result.get("jobs") or []
|
|
1155
|
+
elif resp.status == 429:
|
|
1156
|
+
logger.warning("Rate limit exceeded on job search")
|
|
1157
|
+
await asyncio.sleep(30)
|
|
1158
|
+
return []
|
|
1159
|
+
else:
|
|
1160
|
+
error_text = await resp.text()
|
|
1161
|
+
logger.error("Job search error %s: %s", resp.status, error_text)
|
|
1162
|
+
return []
|
|
1163
|
+
except Exception:
|
|
1164
|
+
logger.exception("Exception while searching jobs on Proxycurl")
|
|
1165
|
+
return []
|
|
1166
|
+
|
|
1167
|
+
# ------------------------------------------------------------------
|
|
1168
|
+
# 2) For each job, find leads for specified hiring manager roles
|
|
1169
|
+
# ------------------------------------------------------------------
|
|
1170
|
+
for job in jobs:
|
|
1171
|
+
company = job.get("company", {}) if isinstance(job, dict) else {}
|
|
1172
|
+
company_name = company.get("name", "")
|
|
1173
|
+
company_url = company.get("url", "")
|
|
1174
|
+
if not company_name:
|
|
1175
|
+
continue
|
|
1176
|
+
|
|
1177
|
+
for role in hiring_manager_roles:
|
|
1178
|
+
employee_params = {
|
|
1179
|
+
"url": company_url,
|
|
1180
|
+
"role_search": role,
|
|
1181
|
+
"employment_status": "current",
|
|
1182
|
+
"page_size": 1,
|
|
1183
|
+
}
|
|
1184
|
+
employees = []
|
|
1185
|
+
try:
|
|
1186
|
+
async with aiohttp.ClientSession() as session:
|
|
1187
|
+
async with session.get(
|
|
1188
|
+
"https://enrichlayer.com/api/v2/company/employees",
|
|
1189
|
+
headers=headers,
|
|
1190
|
+
params=employee_params,
|
|
1191
|
+
) as e_resp:
|
|
1192
|
+
if e_resp.status == 200:
|
|
1193
|
+
data = await e_resp.json()
|
|
1194
|
+
employees = data.get("employees") or data.get("profiles") or []
|
|
1195
|
+
elif e_resp.status == 429:
|
|
1196
|
+
logger.warning("Rate limit exceeded while fetching employees")
|
|
1197
|
+
await asyncio.sleep(30)
|
|
1198
|
+
continue
|
|
1199
|
+
except Exception:
|
|
1200
|
+
logger.exception("Exception while fetching employees from Proxycurl")
|
|
1201
|
+
continue
|
|
1202
|
+
|
|
1203
|
+
for emp in employees:
|
|
1204
|
+
profile_url = emp.get("linkedin_profile_url") or emp.get("profile_url")
|
|
1205
|
+
if not profile_url:
|
|
1206
|
+
continue
|
|
1207
|
+
person = await enrich_person_info_from_proxycurl(
|
|
1208
|
+
linkedin_url=profile_url, tool_config=tool_config
|
|
1209
|
+
)
|
|
1210
|
+
if not person or person.get("error"):
|
|
1211
|
+
continue
|
|
1212
|
+
lead = {
|
|
1213
|
+
"first_name": person.get("first_name", ""),
|
|
1214
|
+
"last_name": person.get("last_name", ""),
|
|
1215
|
+
"full_name": person.get("full_name", ""),
|
|
1216
|
+
"user_linkedin_url": profile_url,
|
|
1217
|
+
"job_title": person.get("occupation", role),
|
|
1218
|
+
"organization_name": company_name,
|
|
1219
|
+
"organization_linkedin_url": company_url,
|
|
1220
|
+
}
|
|
1221
|
+
cleaned = cleanup_properties(lead)
|
|
1222
|
+
if cleaned:
|
|
1223
|
+
leads.append(cleaned)
|
|
1224
|
+
|
|
1225
|
+
logger.info("Returning %d leads from Proxycurl job search", len(leads))
|
|
1226
|
+
return leads
|