dhisana 0.0.1.dev243__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/__init__.py +1 -0
- dhisana/cli/__init__.py +1 -0
- dhisana/cli/cli.py +20 -0
- dhisana/cli/datasets.py +27 -0
- dhisana/cli/models.py +26 -0
- dhisana/cli/predictions.py +20 -0
- dhisana/schemas/__init__.py +1 -0
- dhisana/schemas/common.py +399 -0
- dhisana/schemas/sales.py +965 -0
- dhisana/ui/__init__.py +1 -0
- dhisana/ui/components.py +472 -0
- dhisana/utils/__init__.py +1 -0
- dhisana/utils/add_mapping.py +352 -0
- dhisana/utils/agent_tools.py +51 -0
- dhisana/utils/apollo_tools.py +1597 -0
- dhisana/utils/assistant_tool_tag.py +4 -0
- dhisana/utils/built_with_api_tools.py +282 -0
- dhisana/utils/cache_output_tools.py +98 -0
- dhisana/utils/cache_output_tools_local.py +78 -0
- dhisana/utils/check_email_validity_tools.py +717 -0
- dhisana/utils/check_for_intent_signal.py +107 -0
- dhisana/utils/check_linkedin_url_validity.py +209 -0
- dhisana/utils/clay_tools.py +43 -0
- dhisana/utils/clean_properties.py +135 -0
- dhisana/utils/company_utils.py +60 -0
- dhisana/utils/compose_salesnav_query.py +259 -0
- dhisana/utils/compose_search_query.py +759 -0
- dhisana/utils/compose_three_step_workflow.py +234 -0
- dhisana/utils/composite_tools.py +137 -0
- dhisana/utils/dataframe_tools.py +237 -0
- dhisana/utils/domain_parser.py +45 -0
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_parse_helpers.py +132 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +933 -0
- dhisana/utils/extract_email_content_for_llm.py +101 -0
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +426 -0
- dhisana/utils/g2_tools.py +104 -0
- dhisana/utils/generate_content.py +41 -0
- dhisana/utils/generate_custom_message.py +271 -0
- dhisana/utils/generate_email.py +278 -0
- dhisana/utils/generate_email_response.py +465 -0
- dhisana/utils/generate_flow.py +102 -0
- dhisana/utils/generate_leads_salesnav.py +303 -0
- dhisana/utils/generate_linkedin_connect_message.py +224 -0
- dhisana/utils/generate_linkedin_response_message.py +317 -0
- dhisana/utils/generate_structured_output_internal.py +462 -0
- dhisana/utils/google_custom_search.py +267 -0
- dhisana/utils/google_oauth_tools.py +727 -0
- dhisana/utils/google_workspace_tools.py +1294 -0
- dhisana/utils/hubspot_clearbit.py +96 -0
- dhisana/utils/hubspot_crm_tools.py +2440 -0
- dhisana/utils/instantly_tools.py +149 -0
- dhisana/utils/linkedin_crawler.py +168 -0
- dhisana/utils/lusha_tools.py +333 -0
- dhisana/utils/mailgun_tools.py +156 -0
- dhisana/utils/mailreach_tools.py +123 -0
- dhisana/utils/microsoft365_tools.py +455 -0
- dhisana/utils/openai_assistant_and_file_utils.py +267 -0
- dhisana/utils/openai_helpers.py +977 -0
- dhisana/utils/openapi_spec_to_tools.py +45 -0
- dhisana/utils/openapi_tool/__init__.py +1 -0
- dhisana/utils/openapi_tool/api_models.py +633 -0
- dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +271 -0
- dhisana/utils/openapi_tool/openapi_tool.py +319 -0
- dhisana/utils/parse_linkedin_messages_txt.py +100 -0
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +1226 -0
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/python_function_to_tools.py +83 -0
- dhisana/utils/research_lead.py +176 -0
- dhisana/utils/sales_navigator_crawler.py +1103 -0
- dhisana/utils/salesforce_crm_tools.py +477 -0
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +162 -0
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +852 -0
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +582 -0
- dhisana/utils/test_connect.py +2087 -0
- dhisana/utils/trasform_json.py +173 -0
- dhisana/utils/web_download_parse_tools.py +189 -0
- dhisana/utils/workflow_code_model.py +5 -0
- dhisana/utils/zoominfo_tools.py +357 -0
- dhisana/workflow/__init__.py +1 -0
- dhisana/workflow/agent.py +18 -0
- dhisana/workflow/flow.py +44 -0
- dhisana/workflow/task.py +43 -0
- dhisana/workflow/test.py +90 -0
- dhisana-0.0.1.dev243.dist-info/METADATA +43 -0
- dhisana-0.0.1.dev243.dist-info/RECORD +102 -0
- dhisana-0.0.1.dev243.dist-info/WHEEL +5 -0
- dhisana-0.0.1.dev243.dist-info/entry_points.txt +2 -0
- dhisana-0.0.1.dev243.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import aiohttp
|
|
3
|
+
import logging
|
|
4
|
+
from typing import List, Dict, Any
|
|
5
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
6
|
+
|
|
7
|
+
logging.basicConfig(level=logging.INFO)
|
|
8
|
+
base_url = 'https://api.instantly.ai/v1'
|
|
9
|
+
|
|
10
|
+
# Manage instantly lists for campaigns
|
|
11
|
+
|
|
12
|
+
def get_api_key_and_headers() -> Dict[str, str]:
|
|
13
|
+
api_key = os.environ.get('INSTANTLY_API_KEY')
|
|
14
|
+
if not api_key:
|
|
15
|
+
raise ValueError(
|
|
16
|
+
"Instantly integration is not configured. Please configure the connection to Instantly in Integrations."
|
|
17
|
+
)
|
|
18
|
+
headers = {
|
|
19
|
+
"Authorization": f"Bearer {api_key}",
|
|
20
|
+
"Content-Type": "application/json"
|
|
21
|
+
}
|
|
22
|
+
return headers
|
|
23
|
+
|
|
24
|
+
async def _handle_response(response: aiohttp.ClientResponse) -> Any:
|
|
25
|
+
if response.status == 200:
|
|
26
|
+
return await response.json()
|
|
27
|
+
elif response.status == 429:
|
|
28
|
+
raise aiohttp.ClientResponseError(
|
|
29
|
+
request_info=response.request_info,
|
|
30
|
+
history=response.history,
|
|
31
|
+
status=response.status,
|
|
32
|
+
message="Rate limit exceeded",
|
|
33
|
+
headers=response.headers
|
|
34
|
+
)
|
|
35
|
+
else:
|
|
36
|
+
error_message = await response.text()
|
|
37
|
+
logging.error(f"Error {response.status}: {error_message}")
|
|
38
|
+
response.raise_for_status()
|
|
39
|
+
|
|
40
|
+
@assistant_tool
|
|
41
|
+
async def add_leads_to_campaign(campaign_id: str, leads: List[Dict[str, str]]) -> Any:
|
|
42
|
+
"""
|
|
43
|
+
Add leads to a campaign.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
campaign_id (str): The ID of the campaign.
|
|
47
|
+
leads (List[Dict[str, str]]): A list of leads to add, where each lead is represented as a dictionary.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Any: The response from the API.
|
|
51
|
+
"""
|
|
52
|
+
url = f"{base_url}/lead/add"
|
|
53
|
+
payload = {
|
|
54
|
+
"campaign_id": campaign_id,
|
|
55
|
+
"leads": leads
|
|
56
|
+
}
|
|
57
|
+
headers = get_api_key_and_headers()
|
|
58
|
+
async with aiohttp.ClientSession() as session:
|
|
59
|
+
async with session.post(url, json=payload, headers=headers) as response:
|
|
60
|
+
return await _handle_response(response)
|
|
61
|
+
|
|
62
|
+
@assistant_tool
|
|
63
|
+
async def delete_leads_from_campaign(campaign_id: str, lead_emails: List[str]) -> Any:
|
|
64
|
+
"""
|
|
65
|
+
Delete leads from a campaign.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
campaign_id (str): The ID of the campaign.
|
|
69
|
+
lead_emails (List[str]): A list of lead emails to delete.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Any: The response from the API.
|
|
73
|
+
"""
|
|
74
|
+
url = f"{base_url}/lead/delete"
|
|
75
|
+
payload = {
|
|
76
|
+
"campaign_id": campaign_id,
|
|
77
|
+
"leads": lead_emails
|
|
78
|
+
}
|
|
79
|
+
headers = get_api_key_and_headers()
|
|
80
|
+
async with aiohttp.ClientSession() as session:
|
|
81
|
+
async with session.post(url, json=payload, headers=headers) as response:
|
|
82
|
+
return await _handle_response(response)
|
|
83
|
+
|
|
84
|
+
@assistant_tool
|
|
85
|
+
async def update_lead_variables(lead_email: str, variables: Dict[str, str]) -> Any:
|
|
86
|
+
"""
|
|
87
|
+
Update variables for a lead.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
lead_email (str): The email of the lead.
|
|
91
|
+
variables (Dict[str, str]): A dictionary of variables to update.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Any: The response from the API.
|
|
95
|
+
"""
|
|
96
|
+
url = f"{base_url}/lead/variable/update"
|
|
97
|
+
payload = {
|
|
98
|
+
"lead": lead_email,
|
|
99
|
+
"variables": variables
|
|
100
|
+
}
|
|
101
|
+
headers = get_api_key_and_headers()
|
|
102
|
+
async with aiohttp.ClientSession() as session:
|
|
103
|
+
async with session.post(url, json=payload, headers=headers) as response:
|
|
104
|
+
return await _handle_response(response)
|
|
105
|
+
|
|
106
|
+
@assistant_tool
|
|
107
|
+
async def set_lead_variables(lead_email: str, variables: Dict[str, str]) -> Any:
|
|
108
|
+
"""
|
|
109
|
+
Set variables for a lead.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
lead_email (str): The email of the lead.
|
|
113
|
+
variables (Dict[str, str]): A dictionary of variables to set.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Any: The response from the API.
|
|
117
|
+
"""
|
|
118
|
+
url = f"{base_url}/lead/variable/set"
|
|
119
|
+
payload = {
|
|
120
|
+
"lead": lead_email,
|
|
121
|
+
"variables": variables
|
|
122
|
+
}
|
|
123
|
+
headers = get_api_key_and_headers()
|
|
124
|
+
async with aiohttp.ClientSession() as session:
|
|
125
|
+
async with session.post(url, json=payload, headers=headers) as response:
|
|
126
|
+
return await _handle_response(response)
|
|
127
|
+
|
|
128
|
+
@assistant_tool
|
|
129
|
+
async def is_lead_in_campaign(campaign_id: str, lead_email: str) -> bool:
|
|
130
|
+
"""
|
|
131
|
+
Check if a lead is in a campaign.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
campaign_id (str): The ID of the campaign.
|
|
135
|
+
lead_email (str): The email of the lead.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
bool: True if the lead is in the campaign, False otherwise.
|
|
139
|
+
"""
|
|
140
|
+
url = f"{base_url}/campaign/leads"
|
|
141
|
+
params = {
|
|
142
|
+
"campaign_id": campaign_id
|
|
143
|
+
}
|
|
144
|
+
headers = get_api_key_and_headers()
|
|
145
|
+
async with aiohttp.ClientSession() as session:
|
|
146
|
+
async with session.get(url, headers=headers, params=params) as response:
|
|
147
|
+
data = await _handle_response(response)
|
|
148
|
+
leads = data.get("leads", [])
|
|
149
|
+
return any(lead["email"] == lead_email for lead in leads)
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
# Login and Crawl linked in for relevant information in background.
|
|
2
|
+
import asyncio
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import logging
|
|
6
|
+
from typing import List, Optional
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
from playwright.async_api import async_playwright
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from dhisana.utils.dataframe_tools import get_structured_output
|
|
12
|
+
from dhisana.utils.web_download_parse_tools import parse_html_content_as_text
|
|
13
|
+
|
|
14
|
+
# Configure logging
|
|
15
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
# Pydantic models for structured data
|
|
19
|
+
class LinkedInUserProfile(BaseModel):
|
|
20
|
+
first_name: Optional[str] = None
|
|
21
|
+
last_name: Optional[str] = None
|
|
22
|
+
headline: Optional[str] = None
|
|
23
|
+
location: Optional[str] = None
|
|
24
|
+
num_of_connections: Optional[int] = None
|
|
25
|
+
num_of_followers: Optional[int] = None
|
|
26
|
+
summary: Optional[str] = None
|
|
27
|
+
experience: Optional[List[str]] = None
|
|
28
|
+
education: Optional[List[str]] = None
|
|
29
|
+
skills: Optional[List[str]] = None
|
|
30
|
+
recommendations: Optional[List[str]] = None
|
|
31
|
+
accomplishments: Optional[List[str]] = None
|
|
32
|
+
interests: Optional[List[str]] = None
|
|
33
|
+
profile_url: Optional[str] = None
|
|
34
|
+
|
|
35
|
+
class SalesNavigatorInsights(BaseModel):
|
|
36
|
+
sales_navigator_insight: Optional[str] = None
|
|
37
|
+
key_signals: Optional[str] = None
|
|
38
|
+
common_connection_paths: Optional[List[str]] = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
async def get_html_content_from_url_internal(page, url):
|
|
42
|
+
"""
|
|
43
|
+
Navigate to a URL using Playwright and retrieve the page content.
|
|
44
|
+
"""
|
|
45
|
+
logging.info(f"Requesting {url}")
|
|
46
|
+
try:
|
|
47
|
+
await page.goto(url, timeout=10000)
|
|
48
|
+
html_content = await page.content()
|
|
49
|
+
return parse_html_content_as_text(html_content)
|
|
50
|
+
except Exception as e:
|
|
51
|
+
logging.info(f"Failed to fetch {url}: {e}")
|
|
52
|
+
return ""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
async def login_to_linkedin(page, email, password, headless):
|
|
58
|
+
"""
|
|
59
|
+
Log into LinkedIn using the provided email and password.
|
|
60
|
+
"""
|
|
61
|
+
await page.goto("https://www.linkedin.com/uas/login")
|
|
62
|
+
await page.wait_for_load_state('load')
|
|
63
|
+
|
|
64
|
+
await page.get_by_label("Email or Phone").click()
|
|
65
|
+
await page.get_by_label("Email or Phone").fill(email)
|
|
66
|
+
await page.get_by_label("Password").click()
|
|
67
|
+
await page.get_by_label("Password").fill(password)
|
|
68
|
+
await page.locator("#organic-div form").get_by_role("button", name="Sign in", exact=True).click()
|
|
69
|
+
await page.wait_for_load_state('load')
|
|
70
|
+
|
|
71
|
+
if "checkpoint/challenge" in page.url:
|
|
72
|
+
if not headless:
|
|
73
|
+
logger.warning("Captcha page encountered! Human intervention is needed.")
|
|
74
|
+
max_iterations = 25
|
|
75
|
+
for attempt in range(max_iterations):
|
|
76
|
+
await asyncio.sleep(3) # Wait for 3 seconds before checking again
|
|
77
|
+
await page.wait_for_load_state('load') # Ensure the page is loaded
|
|
78
|
+
if "checkpoint/challenge" not in page.url:
|
|
79
|
+
logger.info("Captcha solved. Continuing with the process.")
|
|
80
|
+
break
|
|
81
|
+
else:
|
|
82
|
+
logger.error(f"Captcha not solved after {max_iterations} attempts. Exiting.")
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
await asyncio.sleep(3)
|
|
85
|
+
else:
|
|
86
|
+
logger.error("Captcha page encountered! Aborting due to headless mode.")
|
|
87
|
+
sys.exit(1)
|
|
88
|
+
|
|
89
|
+
async def extract_from_page(page, url, response_type):
|
|
90
|
+
"""
|
|
91
|
+
Extract structured data from a web page using OpenAI's API.
|
|
92
|
+
"""
|
|
93
|
+
# Get page HTML content
|
|
94
|
+
content_text = await get_html_content_from_url_internal(page, url)
|
|
95
|
+
if not content_text:
|
|
96
|
+
return None, 'FAIL'
|
|
97
|
+
|
|
98
|
+
# Get structured content using OpenAI's API
|
|
99
|
+
extract_content, status = await get_structured_output(content_text, response_type)
|
|
100
|
+
return extract_content, status
|
|
101
|
+
|
|
102
|
+
async def extract_user_content_from_linkedin(linkedin_id:str, output_csv_path:str):
|
|
103
|
+
"""
|
|
104
|
+
Main function to orchestrate scraping and data extraction.
|
|
105
|
+
"""
|
|
106
|
+
email = os.environ.get("LINKEDIN_EMAIL")
|
|
107
|
+
password = os.environ.get("LINKEDIN_PASSWORD")
|
|
108
|
+
|
|
109
|
+
if not email or not password:
|
|
110
|
+
logger.error("LinkedIn credentials not found in environment variables.")
|
|
111
|
+
return {"status": "FAIL", "message": "LinkedIn credentials not found in environment variables."}
|
|
112
|
+
|
|
113
|
+
# Start the browser using Playwright
|
|
114
|
+
async with async_playwright() as p:
|
|
115
|
+
browser = await p.chromium.launch(headless=False)
|
|
116
|
+
context = await browser.new_context()
|
|
117
|
+
page = await context.new_page()
|
|
118
|
+
|
|
119
|
+
# Login to LinkedIn
|
|
120
|
+
await login_to_linkedin(page, email, password, False)
|
|
121
|
+
|
|
122
|
+
# List of LinkedIn profiles to scrape
|
|
123
|
+
job_profiles = [
|
|
124
|
+
f"https://www.linkedin.com/in/{linkedin_id}/",
|
|
125
|
+
# Add more profile URLs as needed
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
# Extract data from profiles
|
|
129
|
+
outputs = []
|
|
130
|
+
for profile in job_profiles:
|
|
131
|
+
output, status = await extract_from_page(page, profile, LinkedInUserProfile)
|
|
132
|
+
if status == 'SUCCESS':
|
|
133
|
+
outputs.append(output)
|
|
134
|
+
else:
|
|
135
|
+
outputs.append({})
|
|
136
|
+
logger.error(f"Failed to extract data from {profile}")
|
|
137
|
+
|
|
138
|
+
# Create a DataFrame from the outputs and save to CSV
|
|
139
|
+
df = pd.DataFrame(outputs)
|
|
140
|
+
csv_file_path = '/tmp/profile_data.csv'
|
|
141
|
+
df.to_csv(csv_file_path, index=False)
|
|
142
|
+
logger.info(f"Saved profile data to {csv_file_path}")
|
|
143
|
+
|
|
144
|
+
# List of Sales Navigator insights URLs to scrape
|
|
145
|
+
sales_navigator_insights = [
|
|
146
|
+
f"https://www.linkedin.com/in/{linkedin_id}/details/sales-lead-insights-details/",
|
|
147
|
+
# Add more URLs as needed
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
# Extract data from Sales Navigator insights
|
|
151
|
+
insights_outputs = []
|
|
152
|
+
for profile in sales_navigator_insights:
|
|
153
|
+
output, status = await extract_from_page(page, profile, SalesNavigatorInsights)
|
|
154
|
+
if status == 'SUCCESS':
|
|
155
|
+
insights_outputs.append(output)
|
|
156
|
+
else:
|
|
157
|
+
insights_outputs.append({})
|
|
158
|
+
logger.error(f"Failed to extract data from {profile}")
|
|
159
|
+
|
|
160
|
+
# Create a DataFrame from the outputs and save to CSV
|
|
161
|
+
df_insights = pd.DataFrame(insights_outputs)
|
|
162
|
+
insights_csv_file_path = output_csv_path
|
|
163
|
+
df_insights.to_csv(insights_csv_file_path, index=False)
|
|
164
|
+
logger.info(f"Saved Sales Navigator insights to {insights_csv_file_path}")
|
|
165
|
+
|
|
166
|
+
# Close the browser
|
|
167
|
+
await browser.close()
|
|
168
|
+
return {"status": "SUCCESS", "message": f"Data extraction completed successfully to {insights_csv_file_path}."}
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
import aiohttp
|
|
7
|
+
import backoff
|
|
8
|
+
|
|
9
|
+
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
10
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_lusha_credentials_from_config(
|
|
14
|
+
tool_config: Optional[List[Dict]] = None
|
|
15
|
+
) -> Optional[str]:
|
|
16
|
+
"""
|
|
17
|
+
Retrieve Lusha API key from the tool_config (looking for 'name' == 'lusha'),
|
|
18
|
+
or fall back to environment variables if not found.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
tool_config (List[Dict], optional):
|
|
22
|
+
Configuration list that may contain Lusha credentials.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
str: Lusha API key from tool_config or environment variables
|
|
26
|
+
"""
|
|
27
|
+
lusha_api_key = None
|
|
28
|
+
|
|
29
|
+
if tool_config:
|
|
30
|
+
lusha_config = next(
|
|
31
|
+
(item for item in tool_config if item.get("name") == "lusha"),
|
|
32
|
+
None
|
|
33
|
+
)
|
|
34
|
+
if lusha_config:
|
|
35
|
+
# Convert the list of dicts under 'configuration' to a map {name: value}
|
|
36
|
+
config_map = {
|
|
37
|
+
cfg["name"]: cfg["value"]
|
|
38
|
+
for cfg in lusha_config.get("configuration", [])
|
|
39
|
+
if cfg
|
|
40
|
+
}
|
|
41
|
+
lusha_api_key = config_map.get("apiKey")
|
|
42
|
+
config_map.get("apiSecret")
|
|
43
|
+
|
|
44
|
+
# Fallback to environment variables if not found in tool_config
|
|
45
|
+
lusha_api_key = lusha_api_key or os.environ.get("LUSHA_API_KEY")
|
|
46
|
+
if not lusha_api_key:
|
|
47
|
+
raise ValueError(
|
|
48
|
+
"Lusha integration is not configured. Please configure the connection to Lusha in Integrations."
|
|
49
|
+
)
|
|
50
|
+
return lusha_api_key
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@assistant_tool
|
|
54
|
+
@backoff.on_exception(
|
|
55
|
+
backoff.expo,
|
|
56
|
+
(aiohttp.ClientResponseError, Exception),
|
|
57
|
+
max_tries=3,
|
|
58
|
+
giveup=lambda e: not (isinstance(e, aiohttp.ClientResponseError) and e.status == 429),
|
|
59
|
+
factor=2,
|
|
60
|
+
)
|
|
61
|
+
async def enrich_person_info_from_lusha(
|
|
62
|
+
linkedin_url: Optional[str] = None,
|
|
63
|
+
email: Optional[str] = None,
|
|
64
|
+
phone: Optional[str] = None,
|
|
65
|
+
tool_config: Optional[List[Dict]] = None
|
|
66
|
+
) -> dict:
|
|
67
|
+
"""
|
|
68
|
+
Fetch a person's details from Lusha using LinkedIn URL, email, or phone number.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
linkedin_url (str, optional): LinkedIn profile URL of the person.
|
|
72
|
+
email (str, optional): Email address of the person.
|
|
73
|
+
phone (str, optional): Phone number of the person.
|
|
74
|
+
tool_config (List[Dict], optional): Configuration list that may contain Lusha credentials.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
dict: JSON response containing person information, or an error message.
|
|
78
|
+
"""
|
|
79
|
+
try:
|
|
80
|
+
access_token = get_lusha_credentials_from_config(tool_config)
|
|
81
|
+
except ValueError as e:
|
|
82
|
+
return {"error": str(e)}
|
|
83
|
+
|
|
84
|
+
if not linkedin_url and not email and not phone:
|
|
85
|
+
return {"error": "At least one of linkedin_url, email, or phone must be provided"}
|
|
86
|
+
|
|
87
|
+
# Adjust these details according to Lusha’s actual enrichment endpoint and request format
|
|
88
|
+
url = "https://api.lusha.com/enrich/v1/person"
|
|
89
|
+
headers = {
|
|
90
|
+
"Authorization": f"Bearer {access_token}",
|
|
91
|
+
"Content-Type": "application/json"
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
data: Dict[str, str] = {}
|
|
95
|
+
cache_key_value = None
|
|
96
|
+
|
|
97
|
+
if linkedin_url:
|
|
98
|
+
data["linkedin_url"] = linkedin_url
|
|
99
|
+
cache_key_value = linkedin_url
|
|
100
|
+
if email:
|
|
101
|
+
data["email"] = email
|
|
102
|
+
if phone:
|
|
103
|
+
data["phone"] = phone
|
|
104
|
+
|
|
105
|
+
if cache_key_value:
|
|
106
|
+
cached_response = retrieve_output("enrich_person_info_from_lusha", cache_key_value)
|
|
107
|
+
if cached_response is not None:
|
|
108
|
+
return cached_response
|
|
109
|
+
|
|
110
|
+
async with aiohttp.ClientSession() as session:
|
|
111
|
+
async with session.post(url, headers=headers, json=data) as response:
|
|
112
|
+
if response.status == 200:
|
|
113
|
+
json_result = await response.json()
|
|
114
|
+
if cache_key_value:
|
|
115
|
+
cache_output("enrich_person_info_from_lusha", cache_key_value, json_result)
|
|
116
|
+
return json_result
|
|
117
|
+
elif response.status == 429:
|
|
118
|
+
logging.warning("enrich_person_info_from_lusha rate limit hit")
|
|
119
|
+
raise aiohttp.ClientResponseError(
|
|
120
|
+
request_info=response.request_info,
|
|
121
|
+
history=response.history,
|
|
122
|
+
status=response.status,
|
|
123
|
+
message="Rate limit exceeded",
|
|
124
|
+
headers=response.headers
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
error_result = await response.json()
|
|
128
|
+
logging.warning(
|
|
129
|
+
f"enrich_person_info_from_lusha failed with status {response.status}: {error_result}"
|
|
130
|
+
)
|
|
131
|
+
return {"error": error_result}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@assistant_tool
|
|
135
|
+
@backoff.on_exception(
|
|
136
|
+
backoff.expo,
|
|
137
|
+
(aiohttp.ClientResponseError, Exception),
|
|
138
|
+
max_tries=3,
|
|
139
|
+
giveup=lambda e: not (isinstance(e, aiohttp.ClientResponseError) and e.status == 429),
|
|
140
|
+
factor=2,
|
|
141
|
+
)
|
|
142
|
+
async def enrich_organization_info_from_lusha(
|
|
143
|
+
organization_domain: Optional[str] = None,
|
|
144
|
+
tool_config: Optional[List[Dict]] = None
|
|
145
|
+
) -> dict:
|
|
146
|
+
"""
|
|
147
|
+
Fetch an organization's details from Lusha using the organization domain.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
organization_domain (str, optional): Domain of the organization.
|
|
151
|
+
tool_config (List[Dict], optional): Configuration list that may contain Lusha credentials.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
dict: JSON response containing organization information, or an error message.
|
|
155
|
+
"""
|
|
156
|
+
access_token = get_lusha_credentials_from_config(tool_config)
|
|
157
|
+
if not access_token:
|
|
158
|
+
return {"error": "Failed to obtain Lusha access token"}
|
|
159
|
+
|
|
160
|
+
if not organization_domain:
|
|
161
|
+
return {"error": "Organization domain must be provided"}
|
|
162
|
+
|
|
163
|
+
# Adjust these details according to Lusha’s actual company enrichment endpoint
|
|
164
|
+
url = "https://api.lusha.com/enrich/v1/company"
|
|
165
|
+
headers = {
|
|
166
|
+
"Authorization": f"Bearer {access_token}",
|
|
167
|
+
"Content-Type": "application/json"
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
cached_response = retrieve_output("enrich_organization_info_from_lusha", organization_domain)
|
|
171
|
+
if cached_response is not None:
|
|
172
|
+
return cached_response
|
|
173
|
+
|
|
174
|
+
data = {"domain": organization_domain}
|
|
175
|
+
|
|
176
|
+
async with aiohttp.ClientSession() as session:
|
|
177
|
+
async with session.post(url, headers=headers, json=data) as response:
|
|
178
|
+
if response.status == 200:
|
|
179
|
+
json_result = await response.json()
|
|
180
|
+
cache_output("enrich_organization_info_from_lusha", organization_domain, json_result)
|
|
181
|
+
return json_result
|
|
182
|
+
elif response.status == 429:
|
|
183
|
+
logging.warning("enrich_organization_info_from_lusha rate limit hit")
|
|
184
|
+
raise aiohttp.ClientResponseError(
|
|
185
|
+
request_info=response.request_info,
|
|
186
|
+
history=response.history,
|
|
187
|
+
status=response.status,
|
|
188
|
+
message="Rate limit exceeded",
|
|
189
|
+
headers=response.headers
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
error_result = await response.json()
|
|
193
|
+
logging.warning(
|
|
194
|
+
f"enrich_organization_info_from_lusha failed with status {response.status}: {error_result}"
|
|
195
|
+
)
|
|
196
|
+
return {"error": error_result}
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
async def enrich_user_info_with_lusha(
|
|
200
|
+
input_user_properties: dict,
|
|
201
|
+
tool_config: Optional[List[Dict]] = None
|
|
202
|
+
) -> dict:
|
|
203
|
+
"""
|
|
204
|
+
Update user info using Lusha data. Checks LinkedIn URL, fetches data, and updates
|
|
205
|
+
the user's properties accordingly.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
input_user_properties (dict): Existing properties about the user.
|
|
209
|
+
tool_config (List[Dict], optional): Configuration list that may contain Lusha credentials.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
dict: Updated user properties dictionary with Lusha data.
|
|
213
|
+
"""
|
|
214
|
+
linkedin_url = input_user_properties.get("user_linkedin_url", "")
|
|
215
|
+
if not linkedin_url:
|
|
216
|
+
input_user_properties["linkedin_url_match"] = False
|
|
217
|
+
return input_user_properties
|
|
218
|
+
|
|
219
|
+
# Fetch person data from Lusha
|
|
220
|
+
lusha_data = await enrich_person_info_from_lusha(
|
|
221
|
+
linkedin_url=linkedin_url,
|
|
222
|
+
tool_config=tool_config
|
|
223
|
+
)
|
|
224
|
+
if not lusha_data:
|
|
225
|
+
input_user_properties["linkedin_url_match"] = False
|
|
226
|
+
return input_user_properties
|
|
227
|
+
|
|
228
|
+
person_data = lusha_data.get("person", {})
|
|
229
|
+
additional_props = input_user_properties.get("additional_properties") or {}
|
|
230
|
+
additional_props["lusha_person_data"] = json.dumps(person_data)
|
|
231
|
+
input_user_properties["additional_properties"] = additional_props
|
|
232
|
+
|
|
233
|
+
# Fill missing contact info
|
|
234
|
+
if not input_user_properties.get("email"):
|
|
235
|
+
input_user_properties["email"] = person_data.get("email", "")
|
|
236
|
+
if not input_user_properties.get("phone"):
|
|
237
|
+
input_user_properties["phone"] = person_data.get("phone", "")
|
|
238
|
+
|
|
239
|
+
# Map some fields
|
|
240
|
+
if person_data.get("name"):
|
|
241
|
+
input_user_properties["full_name"] = person_data["name"]
|
|
242
|
+
if person_data.get("first_name"):
|
|
243
|
+
input_user_properties["first_name"] = person_data["first_name"]
|
|
244
|
+
if person_data.get("last_name"):
|
|
245
|
+
input_user_properties["last_name"] = person_data["last_name"]
|
|
246
|
+
if person_data.get("linkedin_url"):
|
|
247
|
+
input_user_properties["user_linkedin_url"] = person_data["linkedin_url"]
|
|
248
|
+
if person_data.get("company") and person_data["company"].get("domain"):
|
|
249
|
+
input_user_properties["primary_domain_of_organization"] = person_data["company"]["domain"]
|
|
250
|
+
if person_data.get("title"):
|
|
251
|
+
input_user_properties["job_title"] = person_data["title"]
|
|
252
|
+
if person_data.get("headline"):
|
|
253
|
+
input_user_properties["headline"] = person_data["headline"]
|
|
254
|
+
if person_data.get("company") and person_data["company"].get("name"):
|
|
255
|
+
input_user_properties["organization_name"] = person_data["company"]["name"]
|
|
256
|
+
if person_data.get("company") and person_data["company"].get("website"):
|
|
257
|
+
input_user_properties["organization_website"] = person_data["company"]["website"]
|
|
258
|
+
if person_data.get("headline") and not input_user_properties.get("summary_about_lead"):
|
|
259
|
+
input_user_properties["summary_about_lead"] = person_data["headline"]
|
|
260
|
+
|
|
261
|
+
# Example: If Lusha provides a list of "keywords" in the company object
|
|
262
|
+
if person_data.get("company") and person_data["company"].get("keywords"):
|
|
263
|
+
input_user_properties["keywords"] = ", ".join(person_data["company"]["keywords"])
|
|
264
|
+
|
|
265
|
+
# Derive location
|
|
266
|
+
if person_data.get("city") or person_data.get("state"):
|
|
267
|
+
input_user_properties["lead_location"] = (
|
|
268
|
+
f"{person_data.get('city', '')}, {person_data.get('state', '')}".strip(", ")
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# Check for a match
|
|
272
|
+
first_matched = bool(
|
|
273
|
+
input_user_properties.get("first_name")
|
|
274
|
+
and person_data.get("first_name") == input_user_properties["first_name"]
|
|
275
|
+
)
|
|
276
|
+
last_matched = bool(
|
|
277
|
+
input_user_properties.get("last_name")
|
|
278
|
+
and person_data.get("last_name") == input_user_properties["last_name"]
|
|
279
|
+
)
|
|
280
|
+
if first_matched and last_matched:
|
|
281
|
+
input_user_properties["linkedin_url_match"] = True
|
|
282
|
+
|
|
283
|
+
return input_user_properties
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
@assistant_tool
|
|
287
|
+
@backoff.on_exception(
|
|
288
|
+
backoff.expo,
|
|
289
|
+
(aiohttp.ClientResponseError, Exception),
|
|
290
|
+
max_tries=3,
|
|
291
|
+
giveup=lambda e: not (isinstance(e, aiohttp.ClientResponseError) and e.status == 429),
|
|
292
|
+
factor=2,
|
|
293
|
+
)
|
|
294
|
+
async def get_person_info_from_lusha(
|
|
295
|
+
first_name: str,
|
|
296
|
+
last_name: str,
|
|
297
|
+
company_name: str,
|
|
298
|
+
tool_config: Optional[List[Dict]] = None
|
|
299
|
+
) -> dict:
|
|
300
|
+
"""
|
|
301
|
+
Calls Lusha v2 GET endpoint with firstName, lastName, and companyName.
|
|
302
|
+
"""
|
|
303
|
+
lusha_api_key = get_lusha_credentials_from_config(tool_config)
|
|
304
|
+
if not lusha_api_key:
|
|
305
|
+
return {"error": "No Lusha API key found."}
|
|
306
|
+
|
|
307
|
+
url = "https://api.lusha.com/v2/person"
|
|
308
|
+
headers = {"api_key": lusha_api_key}
|
|
309
|
+
params = {
|
|
310
|
+
"firstName": first_name,
|
|
311
|
+
"lastName": last_name,
|
|
312
|
+
"companyName": company_name
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
async with aiohttp.ClientSession() as session:
|
|
316
|
+
async with session.get(url, headers=headers, params=params) as response:
|
|
317
|
+
if response.status == 200:
|
|
318
|
+
return await response.json()
|
|
319
|
+
elif response.status == 429:
|
|
320
|
+
logging.warning("get_person_info_from_lusha rate limit hit")
|
|
321
|
+
raise aiohttp.ClientResponseError(
|
|
322
|
+
request_info=response.request_info,
|
|
323
|
+
history=response.history,
|
|
324
|
+
status=response.status,
|
|
325
|
+
message="Rate limit exceeded",
|
|
326
|
+
headers=response.headers
|
|
327
|
+
)
|
|
328
|
+
else:
|
|
329
|
+
error_result = await response.json()
|
|
330
|
+
logging.warning(
|
|
331
|
+
f"get_person_info_from_lusha failed with status {response.status}: {error_result}"
|
|
332
|
+
)
|
|
333
|
+
return {"error": error_result}
|