dhisana 0.0.1.dev13__tar.gz → 0.0.1.dev14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/PKG-INFO +1 -1
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/setup.py +1 -1
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/ui/components.py +29 -7
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/agent_tools.py +4 -1
- dhisana-0.0.1.dev14/src/dhisana/utils/apollo_tools.py +319 -0
- dhisana-0.0.1.dev14/src/dhisana/utils/built_with_api_tools.py +129 -0
- dhisana-0.0.1.dev14/src/dhisana/utils/cache_output.py +70 -0
- dhisana-0.0.1.dev14/src/dhisana/utils/composite_tools.py +88 -0
- dhisana-0.0.1.dev14/src/dhisana/utils/google_custom_search.py +104 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/openai_helpers.py +102 -37
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/openapi_spec_to_tools.py +0 -1
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/python_function_to_tools.py +0 -2
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/serpapi_search_tools.py +4 -5
- dhisana-0.0.1.dev14/src/dhisana/utils/web_download_parse_tools.py +149 -0
- dhisana-0.0.1.dev14/src/dhisana/workflow/agent.py +18 -0
- dhisana-0.0.1.dev14/src/dhisana/workflow/flow.py +44 -0
- dhisana-0.0.1.dev14/src/dhisana/workflow/task.py +43 -0
- dhisana-0.0.1.dev14/src/dhisana/workflow/test.py +90 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana.egg-info/PKG-INFO +1 -1
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana.egg-info/SOURCES.txt +7 -0
- dhisana-0.0.1.dev13/src/dhisana/utils/apollo_tools.py +0 -121
- dhisana-0.0.1.dev13/src/dhisana/utils/web_download_parse_tools.py +0 -46
- dhisana-0.0.1.dev13/src/dhisana/workflow/task.py +0 -60
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/README.md +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/pyproject.toml +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/setup.cfg +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/__init__.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/cli/__init__.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/cli/cli.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/cli/datasets.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/cli/models.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/cli/predictions.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/ui/__init__.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/__init__.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/assistant_tool_tag.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/check_email_validity_tools.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/dataframe_tools.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/google_workspace_tools.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/hubspot_crm_tools.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/linkedin_crawler.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/openapi_tool/__init__.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/openapi_tool/api_models.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/openapi_tool/convert_openai_spec_to_tool.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/openapi_tool/openapi_tool.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/salesforce_crm_tools.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/utils/tools_json.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana/workflow/__init__.py +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana.egg-info/dependency_links.txt +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana.egg-info/entry_points.txt +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana.egg-info/requires.txt +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/src/dhisana.egg-info/top_level.txt +0 -0
- {dhisana-0.0.1.dev13 → dhisana-0.0.1.dev14}/tests/test_agent_tools.py +0 -0
|
@@ -208,24 +208,46 @@ class Upload(Component):
|
|
|
208
208
|
},
|
|
209
209
|
}
|
|
210
210
|
|
|
211
|
-
class Button
|
|
212
|
-
def __init__(
|
|
211
|
+
class Button:
|
|
212
|
+
def __init__(
|
|
213
|
+
self,
|
|
214
|
+
label: str = '',
|
|
215
|
+
on_click: str = None,
|
|
216
|
+
button_type: str = 'button',
|
|
217
|
+
condition: str = None,
|
|
218
|
+
confirm: bool = False,
|
|
219
|
+
confirmMessage: str = '',
|
|
220
|
+
style: dict = None,
|
|
221
|
+
icon: str = None,
|
|
222
|
+
disabled: bool = False,
|
|
223
|
+
btn_class: str = 'btn btn-secondary',
|
|
224
|
+
):
|
|
213
225
|
self.label = label
|
|
226
|
+
self.on_click = on_click
|
|
214
227
|
self.button_type = button_type
|
|
228
|
+
self.condition = condition
|
|
229
|
+
self.confirm = confirm
|
|
230
|
+
self.confirmMessage = confirmMessage
|
|
231
|
+
self.style = style or {}
|
|
232
|
+
self.icon = icon
|
|
215
233
|
self.disabled = disabled
|
|
216
|
-
self.
|
|
217
|
-
self.style = style
|
|
234
|
+
self.btn_class = btn_class
|
|
218
235
|
|
|
219
236
|
def to_dict(self):
|
|
220
237
|
return {
|
|
221
238
|
'type': 'button',
|
|
222
239
|
'properties': {
|
|
223
240
|
'label': self.label,
|
|
224
|
-
'type': self.button_type,
|
|
225
|
-
'disabled': self.disabled,
|
|
226
241
|
'onClick': self.on_click,
|
|
242
|
+
'buttonType': self.button_type,
|
|
243
|
+
'condition': self.condition,
|
|
244
|
+
'confirm': self.confirm,
|
|
245
|
+
'confirmMessage': self.confirmMessage,
|
|
227
246
|
'style': self.style,
|
|
228
|
-
|
|
247
|
+
'icon': self.icon,
|
|
248
|
+
'disabled': self.disabled,
|
|
249
|
+
'btnClass': self.btn_class,
|
|
250
|
+
},
|
|
229
251
|
}
|
|
230
252
|
|
|
231
253
|
class Text(Component):
|
|
@@ -45,4 +45,7 @@ if 'GLOBAL_TOOLS_FUNCTIONS' not in globals():
|
|
|
45
45
|
|
|
46
46
|
# Ensure GLOBAL_OPENAI_ASSISTANT_TOOLS is only initialized once
|
|
47
47
|
if 'GLOBAL_OPENAI_ASSISTANT_TOOLS' not in globals():
|
|
48
|
-
GLOBAL_OPENAI_ASSISTANT_TOOLS = []
|
|
48
|
+
GLOBAL_OPENAI_ASSISTANT_TOOLS = []
|
|
49
|
+
|
|
50
|
+
if 'GLOBAL_TOOLS_CACHE_PATH' not in globals():
|
|
51
|
+
GLOBAL_TOOLS_CACHE_PATH = '/tmp/dhisana_ai'
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import aiohttp
|
|
7
|
+
import backoff
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from dhisana.utils.cache_output import cache_output,retrieve_output
|
|
11
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
12
|
+
|
|
13
|
+
# Assuming cache_output and retrieve_output are defined elsewhere in the file
|
|
14
|
+
# from .cache_utils import cache_output, retrieve_output
|
|
15
|
+
|
|
16
|
+
@assistant_tool
|
|
17
|
+
@backoff.on_exception(
|
|
18
|
+
backoff.expo,
|
|
19
|
+
aiohttp.ClientResponseError,
|
|
20
|
+
max_tries=2,
|
|
21
|
+
giveup=lambda e: e.status != 429,
|
|
22
|
+
factor=10,
|
|
23
|
+
)
|
|
24
|
+
async def enrich_person_info_from_apollo(
|
|
25
|
+
linkedin_url: Optional[str] = None,
|
|
26
|
+
email: Optional[str] = None,
|
|
27
|
+
phone: Optional[str] = None,
|
|
28
|
+
):
|
|
29
|
+
"""
|
|
30
|
+
Fetch a person's details from Apollo using LinkedIn URL, email, or phone number.
|
|
31
|
+
|
|
32
|
+
Parameters:
|
|
33
|
+
- **linkedin_url** (*str*, optional): LinkedIn profile URL of the person.
|
|
34
|
+
- **email** (*str*, optional): Email address of the person.
|
|
35
|
+
- **phone** (*str*, optional): Phone number of the person.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
- **dict**: JSON response containing person information.
|
|
39
|
+
"""
|
|
40
|
+
APOLLO_API_KEY = os.environ.get('APOLLO_API_KEY')
|
|
41
|
+
if not APOLLO_API_KEY:
|
|
42
|
+
return {'error': "Apollo API key not found in environment variables"}
|
|
43
|
+
|
|
44
|
+
if not linkedin_url and not email and not phone:
|
|
45
|
+
return {'error': "At least one of linkedin_url, email, or phone must be provided"}
|
|
46
|
+
|
|
47
|
+
headers = {
|
|
48
|
+
"X-Api-Key": f"{APOLLO_API_KEY}",
|
|
49
|
+
"Content-Type": "application/json"
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
data = {}
|
|
53
|
+
if linkedin_url:
|
|
54
|
+
data['linkedin_url'] = linkedin_url
|
|
55
|
+
cached_response = retrieve_output("enrich_person_info_from_apollo", linkedin_url)
|
|
56
|
+
if cached_response:
|
|
57
|
+
return cached_response
|
|
58
|
+
if email:
|
|
59
|
+
data['email'] = email
|
|
60
|
+
if phone:
|
|
61
|
+
data['phone_numbers'] = [phone] # Apollo expects a list for phone numbers
|
|
62
|
+
|
|
63
|
+
url = 'https://api.apollo.io/v1/people/match'
|
|
64
|
+
|
|
65
|
+
async with aiohttp.ClientSession() as session:
|
|
66
|
+
async with session.post(url, headers=headers, json=data) as response:
|
|
67
|
+
if response.status == 200:
|
|
68
|
+
result = await response.json()
|
|
69
|
+
if linkedin_url:
|
|
70
|
+
cache_output("enrich_person_info_from_apollo", linkedin_url, result)
|
|
71
|
+
return result
|
|
72
|
+
elif response.status == 429:
|
|
73
|
+
logging.warning("enrich_person_info_from_apollo Rate limit hit")
|
|
74
|
+
await asyncio.sleep(30)
|
|
75
|
+
raise aiohttp.ClientResponseError(
|
|
76
|
+
request_info=response.request_info,
|
|
77
|
+
history=response.history,
|
|
78
|
+
status=response.status,
|
|
79
|
+
message="Rate limit exceeded",
|
|
80
|
+
headers=response.headers
|
|
81
|
+
)
|
|
82
|
+
else:
|
|
83
|
+
result = await response.json()
|
|
84
|
+
logging.warning(f"enrich_person_info_from_apollo Failed to run assistant: ${result}")
|
|
85
|
+
return {'error': result}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@assistant_tool
|
|
89
|
+
@backoff.on_exception(
|
|
90
|
+
backoff.expo,
|
|
91
|
+
aiohttp.ClientResponseError,
|
|
92
|
+
max_tries=2,
|
|
93
|
+
giveup=lambda e: e.status != 429,
|
|
94
|
+
factor=30,
|
|
95
|
+
)
|
|
96
|
+
async def enrich_company_info_from_apollo(
|
|
97
|
+
company_domain: Optional[str] = None,
|
|
98
|
+
):
|
|
99
|
+
"""
|
|
100
|
+
Fetch a company's details from Apollo using the company domain.
|
|
101
|
+
|
|
102
|
+
Parameters:
|
|
103
|
+
- **company_domain** (*str*, optional): Domain of the company.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
- **dict**: JSON response containing company information.
|
|
107
|
+
"""
|
|
108
|
+
APOLLO_API_KEY = os.environ.get('APOLLO_API_KEY')
|
|
109
|
+
if not APOLLO_API_KEY:
|
|
110
|
+
return {'error': "Apollo API key not found in environment variables"}
|
|
111
|
+
|
|
112
|
+
if not company_domain:
|
|
113
|
+
return {'error': "Company domain must be provided"}
|
|
114
|
+
|
|
115
|
+
headers = {
|
|
116
|
+
"X-Api-Key": f"{APOLLO_API_KEY}",
|
|
117
|
+
"Content-Type": "application/json",
|
|
118
|
+
"Cache-Control": "no-cache",
|
|
119
|
+
"accept": "application/json"
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
cached_response = retrieve_output("enrich_company_info_from_apollo", company_domain)
|
|
123
|
+
if cached_response:
|
|
124
|
+
return cached_response
|
|
125
|
+
|
|
126
|
+
url = f'https://api.apollo.io/api/v1/organizations/enrich?domain={company_domain}'
|
|
127
|
+
|
|
128
|
+
async with aiohttp.ClientSession() as session:
|
|
129
|
+
async with session.get(url, headers=headers) as response:
|
|
130
|
+
if response.status == 200:
|
|
131
|
+
result = await response.json()
|
|
132
|
+
cache_output("enrich_company_info_from_apollo", company_domain, result)
|
|
133
|
+
return result
|
|
134
|
+
elif response.status == 429:
|
|
135
|
+
raise aiohttp.ClientResponseError(
|
|
136
|
+
request_info=response.request_info,
|
|
137
|
+
history=response.history,
|
|
138
|
+
status=response.status,
|
|
139
|
+
message="Rate limit exceeded",
|
|
140
|
+
headers=response.headers
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
result = await response.json()
|
|
144
|
+
return {'error': result}
|
|
145
|
+
|
|
146
|
+
@assistant_tool
|
|
147
|
+
async def get_enriched_customer_information(
|
|
148
|
+
linkedin_url: Optional[str] = None,
|
|
149
|
+
email: Optional[str] = None,
|
|
150
|
+
phone: Optional[str] = None,
|
|
151
|
+
required_fields: Optional[List[str]] = None,
|
|
152
|
+
data_sources: Optional[List[str]] = None,
|
|
153
|
+
):
|
|
154
|
+
"""
|
|
155
|
+
Fetch a person's details from specified data sources using LinkedIn URL, email, or phone number.
|
|
156
|
+
|
|
157
|
+
Parameters:
|
|
158
|
+
- **linkedin_url** (*str*, optional): LinkedIn profile URL of the person.
|
|
159
|
+
- **email** (*str*, optional): Email address of the person.
|
|
160
|
+
- **phone** (*str*, optional): Phone number of the person.
|
|
161
|
+
- **required_fields** (*List[str]*, optional): Properties of the customer to fetch (e.g., 'job_history', 'education_history', 'skills', etc.).
|
|
162
|
+
- **data_sources** (*List[str]*, optional): Data sources to fetch from (e.g., 'apollo', 'zoominfo', 'websearch', 'linkedin'). Defaults to all sources.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
- **dict**: JSON response containing person information.
|
|
166
|
+
"""
|
|
167
|
+
# Set default values if not provided
|
|
168
|
+
if required_fields is None:
|
|
169
|
+
required_fields = [
|
|
170
|
+
'job_history',
|
|
171
|
+
'education_history',
|
|
172
|
+
'skills',
|
|
173
|
+
'headline',
|
|
174
|
+
'summary',
|
|
175
|
+
'experiences',
|
|
176
|
+
'projects',
|
|
177
|
+
'certifications',
|
|
178
|
+
'publications',
|
|
179
|
+
'languages',
|
|
180
|
+
'volunteer_work',
|
|
181
|
+
]
|
|
182
|
+
if data_sources is None:
|
|
183
|
+
data_sources = ['apollo', 'zoominfo', 'websearch', 'linkedin']
|
|
184
|
+
|
|
185
|
+
data = await enrich_person_info_from_apollo(
|
|
186
|
+
linkedin_url=linkedin_url,
|
|
187
|
+
email=email,
|
|
188
|
+
phone=phone,
|
|
189
|
+
)
|
|
190
|
+
return data
|
|
191
|
+
|
|
192
|
+
@assistant_tool
|
|
193
|
+
async def get_enriched_company_information(
|
|
194
|
+
company_domain: Optional[str] = None,
|
|
195
|
+
required_fields: Optional[List[str]] = None,
|
|
196
|
+
data_sources: Optional[List[str]] = None,
|
|
197
|
+
):
|
|
198
|
+
"""
|
|
199
|
+
Fetch a company's details from specified data sources using the company domain.
|
|
200
|
+
|
|
201
|
+
Parameters:
|
|
202
|
+
- **company_domain** (*str*, optional): Domain of the company.
|
|
203
|
+
- **required_fields** (*List[str]*, optional): Properties of the company to fetch (e.g., 'technographics', 'firmographics', 'employee_count', etc.).
|
|
204
|
+
- **data_sources** (*List[str]*, optional): Data sources to fetch from (e.g., 'apollo', 'zoominfo', 'builtwith', 'linkedin'). Defaults to all sources.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
- **dict**: JSON response containing company information.
|
|
208
|
+
"""
|
|
209
|
+
return await enrich_company_info_from_apollo(company_domain=company_domain)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# Define the backoff strategy for handling rate limiting
|
|
214
|
+
@backoff.on_exception(
|
|
215
|
+
backoff.expo,
|
|
216
|
+
aiohttp.ClientResponseError,
|
|
217
|
+
max_tries=5,
|
|
218
|
+
giveup=lambda e: e.status != 429,
|
|
219
|
+
factor=2,
|
|
220
|
+
)
|
|
221
|
+
async def fetch_apollo_data(session, url, headers, payload):
|
|
222
|
+
key_data = f"{url}_{json.dumps(payload, sort_keys=True)}"
|
|
223
|
+
key_hash = hashlib.sha256(key_data.encode()).hexdigest()
|
|
224
|
+
cached_response = retrieve_output("fetch_apollo_data", key_hash)
|
|
225
|
+
if cached_response:
|
|
226
|
+
return cached_response
|
|
227
|
+
|
|
228
|
+
async with session.post(url, headers=headers, json=payload) as response:
|
|
229
|
+
if response.status == 200:
|
|
230
|
+
result = await response.json()
|
|
231
|
+
cache_output("fetch_apollo_data", key_hash, result)
|
|
232
|
+
return result
|
|
233
|
+
elif response.status == 429:
|
|
234
|
+
raise aiohttp.ClientResponseError(
|
|
235
|
+
request_info=response.request_info,
|
|
236
|
+
history=response.history,
|
|
237
|
+
status=response.status,
|
|
238
|
+
message="Rate limit exceeded",
|
|
239
|
+
headers=response.headers
|
|
240
|
+
)
|
|
241
|
+
else:
|
|
242
|
+
response.raise_for_status()
|
|
243
|
+
|
|
244
|
+
@assistant_tool
|
|
245
|
+
async def search_recent_job_changes(
|
|
246
|
+
job_titles: List[str],
|
|
247
|
+
locations: List[str],
|
|
248
|
+
organization_num_employees_ranges: Optional[List[str]] = None,
|
|
249
|
+
items_to_return: int = 100
|
|
250
|
+
) -> List[dict]:
|
|
251
|
+
"""
|
|
252
|
+
Search for individuals with specified job titles, locations, and optionally organization employee ranges who have recently changed jobs using searchSignalIds.
|
|
253
|
+
|
|
254
|
+
Parameters:
|
|
255
|
+
- **job_titles** (*List[str]*): List of job titles to search for.
|
|
256
|
+
- **locations** (*List[str]*): List of locations to search in.
|
|
257
|
+
- **organization_num_employees_ranges** (*Optional[List[str]]*, optional): List of employee ranges to filter organizations by (e.g., ["1,10", "11,50"]). Defaults to None.
|
|
258
|
+
- **items_to_return** (*int*, optional): Total number of items to return. Defaults to 100.
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
- **List[dict]**: List of individuals matching the criteria or error details.
|
|
262
|
+
"""
|
|
263
|
+
APOLLO_API_KEY = os.getenv('APOLLO_API_KEY')
|
|
264
|
+
if not APOLLO_API_KEY:
|
|
265
|
+
raise EnvironmentError("Apollo API key not found in environment variables")
|
|
266
|
+
|
|
267
|
+
headers = {
|
|
268
|
+
"Cache-Control": "no-cache",
|
|
269
|
+
"Content-Type": "application/json",
|
|
270
|
+
"X-Api-Key": f"{APOLLO_API_KEY}",
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
url = 'https://api.apollo.io/v1/mixed_people/search'
|
|
274
|
+
|
|
275
|
+
# Define the search signal ID for recent job changes (e.g., "Job Change (90 Days)")
|
|
276
|
+
search_signal_ids = ["643daa349293c1cdaa4d00f8"]
|
|
277
|
+
|
|
278
|
+
# Initialize the session
|
|
279
|
+
async with aiohttp.ClientSession() as session:
|
|
280
|
+
results = []
|
|
281
|
+
page = 1
|
|
282
|
+
per_page = min(items_to_return, 100) # Apollo API allows a maximum of 100 items per page
|
|
283
|
+
|
|
284
|
+
while len(results) < items_to_return:
|
|
285
|
+
payload = {
|
|
286
|
+
"person_titles": job_titles,
|
|
287
|
+
"person_locations": locations,
|
|
288
|
+
"search_signal_ids": search_signal_ids,
|
|
289
|
+
"page": page,
|
|
290
|
+
"per_page": per_page
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if organization_num_employees_ranges:
|
|
294
|
+
payload["organization_num_employees_ranges"] = organization_num_employees_ranges
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
data = await fetch_apollo_data(session, url, headers, payload)
|
|
298
|
+
people = data.get('people', [])
|
|
299
|
+
if not people:
|
|
300
|
+
break
|
|
301
|
+
results.extend(people)
|
|
302
|
+
if len(people) < per_page:
|
|
303
|
+
break
|
|
304
|
+
page += 1
|
|
305
|
+
except aiohttp.ClientResponseError as e:
|
|
306
|
+
if e.status == 429:
|
|
307
|
+
await asyncio.sleep(30) # Wait before retrying
|
|
308
|
+
else:
|
|
309
|
+
# Return error details as JSON string in an array
|
|
310
|
+
error_details = {
|
|
311
|
+
'status': e.status,
|
|
312
|
+
'message': str(e),
|
|
313
|
+
'url': str(e.request_info.url),
|
|
314
|
+
'headers': dict(e.headers),
|
|
315
|
+
}
|
|
316
|
+
error_json = json.dumps(error_details)
|
|
317
|
+
return [error_json]
|
|
318
|
+
|
|
319
|
+
return results[:items_to_return]
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import aiohttp
|
|
3
|
+
from typing import Optional
|
|
4
|
+
import os
|
|
5
|
+
import aiohttp
|
|
6
|
+
import backoff
|
|
7
|
+
from dhisana.utils.cache_output import cache_output,retrieve_output
|
|
8
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
@assistant_tool
|
|
12
|
+
@backoff.on_exception(
|
|
13
|
+
backoff.expo,
|
|
14
|
+
aiohttp.ClientResponseError,
|
|
15
|
+
max_tries=2,
|
|
16
|
+
giveup=lambda e: e.status != 429,
|
|
17
|
+
factor=10,
|
|
18
|
+
)
|
|
19
|
+
async def get_company_info_from_builtwith(
|
|
20
|
+
company_domain: Optional[str] = None,
|
|
21
|
+
):
|
|
22
|
+
"""
|
|
23
|
+
Fetch a company's technology details from BuiltWith using the company domain.
|
|
24
|
+
|
|
25
|
+
Parameters:
|
|
26
|
+
- **company_domain** (*str*, optional): Domain of the company.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
- **dict**: JSON response containing technology information.
|
|
30
|
+
"""
|
|
31
|
+
BUILTWITH_API_KEY = os.environ.get('BUILTWITH_API_KEY')
|
|
32
|
+
if not BUILTWITH_API_KEY:
|
|
33
|
+
return {'error': "BuiltWith API key not found in environment variables"}
|
|
34
|
+
|
|
35
|
+
if not company_domain:
|
|
36
|
+
return {'error': "Company domain must be provided"}
|
|
37
|
+
|
|
38
|
+
headers = {
|
|
39
|
+
"Content-Type": "application/json",
|
|
40
|
+
"Cache-Control": "no-cache",
|
|
41
|
+
"accept": "application/json"
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
cached_response = retrieve_output("get_company_info_from_builtwith", company_domain) # Replace with your caching logic if needed
|
|
45
|
+
if cached_response:
|
|
46
|
+
return cached_response
|
|
47
|
+
|
|
48
|
+
url = f'https://api.builtwith.com/v19/api.json?KEY={BUILTWITH_API_KEY}&LOOKUP={company_domain}'
|
|
49
|
+
|
|
50
|
+
async with aiohttp.ClientSession() as session:
|
|
51
|
+
async with session.get(url, headers=headers) as response:
|
|
52
|
+
if response.status == 200:
|
|
53
|
+
result = await response.json()
|
|
54
|
+
cache_output("get_company_info_from_builtwith", company_domain, result) # Replace with your caching logic if needed
|
|
55
|
+
return result
|
|
56
|
+
elif response.status == 429:
|
|
57
|
+
raise aiohttp.ClientResponseError(
|
|
58
|
+
request_info=response.request_info,
|
|
59
|
+
history=response.history,
|
|
60
|
+
status=response.status,
|
|
61
|
+
message="Rate limit exceeded",
|
|
62
|
+
headers=response.headers
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
try:
|
|
66
|
+
result = await response.json()
|
|
67
|
+
return {'error': result}
|
|
68
|
+
except Exception as e:
|
|
69
|
+
return {'error': f"Unexpected error: {str(e)}"}
|
|
70
|
+
|
|
71
|
+
@assistant_tool
|
|
72
|
+
@backoff.on_exception(
|
|
73
|
+
backoff.expo,
|
|
74
|
+
aiohttp.ClientResponseError,
|
|
75
|
+
max_tries=2,
|
|
76
|
+
giveup=lambda e: e.status != 429,
|
|
77
|
+
factor=10,
|
|
78
|
+
)
|
|
79
|
+
async def get_company_financials_from_builtwith(
|
|
80
|
+
company_domain: Optional[str] = None,
|
|
81
|
+
):
|
|
82
|
+
"""
|
|
83
|
+
Fetch a company's financial details from BuiltWith using the company domain.
|
|
84
|
+
|
|
85
|
+
Parameters:
|
|
86
|
+
- **company_domain** (*str*, optional): Domain of the company.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
- **dict**: JSON response containing financial information.
|
|
90
|
+
"""
|
|
91
|
+
BUILTWITH_API_KEY = os.environ.get('BUILTWITH_API_KEY')
|
|
92
|
+
if not BUILTWITH_API_KEY:
|
|
93
|
+
return {'error': "BuiltWith API key not found in environment variables"}
|
|
94
|
+
|
|
95
|
+
if not company_domain:
|
|
96
|
+
return {'error': "Company domain must be provided"}
|
|
97
|
+
|
|
98
|
+
headers = {
|
|
99
|
+
"Content-Type": "application/json",
|
|
100
|
+
"Cache-Control": "no-cache",
|
|
101
|
+
"accept": "application/json"
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
cached_response = retrieve_output("get_company_financials_from_builtwith", company_domain) # Replace with your caching logic if needed
|
|
105
|
+
if cached_response:
|
|
106
|
+
return cached_response
|
|
107
|
+
|
|
108
|
+
url = f'https://api.builtwith.com/v19/financial.json?KEY={BUILTWITH_API_KEY}&LOOKUP={company_domain}'
|
|
109
|
+
|
|
110
|
+
async with aiohttp.ClientSession() as session:
|
|
111
|
+
async with session.get(url, headers=headers) as response:
|
|
112
|
+
if response.status == 200:
|
|
113
|
+
result = await response.json()
|
|
114
|
+
cache_output("get_company_financials_from_builtwith", company_domain, result) # Replace with your caching logic if needed
|
|
115
|
+
return result
|
|
116
|
+
elif response.status == 429:
|
|
117
|
+
raise aiohttp.ClientResponseError(
|
|
118
|
+
request_info=response.request_info,
|
|
119
|
+
history=response.history,
|
|
120
|
+
status=response.status,
|
|
121
|
+
message="Rate limit exceeded",
|
|
122
|
+
headers=response.headers
|
|
123
|
+
)
|
|
124
|
+
else:
|
|
125
|
+
try:
|
|
126
|
+
result = await response.json()
|
|
127
|
+
return {'error': result}
|
|
128
|
+
except Exception as e:
|
|
129
|
+
return {'error': f"Unexpected error: {str(e)}"}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
CACHE_PATH = '/tmp/dhisana_ai/cache_run_outputs/'
|
|
6
|
+
|
|
7
|
+
def cache_output(tool_name, key, value, ttl=None):
|
|
8
|
+
"""
|
|
9
|
+
Cache the output of a function using the provided key and value.
|
|
10
|
+
|
|
11
|
+
Parameters:
|
|
12
|
+
tool_name (str): Name of the tool whose output is being cached.
|
|
13
|
+
key (str): The cache key.
|
|
14
|
+
value (Any): The value to be cached.
|
|
15
|
+
ttl (int, optional): The time-to-live (TTL) for the cached value in seconds.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
bool: True if the value was successfully cached, False otherwise.
|
|
19
|
+
"""
|
|
20
|
+
# Ensure the cache directory exists
|
|
21
|
+
if not os.path.exists(CACHE_PATH):
|
|
22
|
+
os.makedirs(CACHE_PATH)
|
|
23
|
+
|
|
24
|
+
# Create a hash of the key
|
|
25
|
+
key_hash = hashlib.sha256(key.encode()).hexdigest()
|
|
26
|
+
|
|
27
|
+
# Create the cache file path
|
|
28
|
+
cache_file_path = os.path.join(CACHE_PATH, f"{tool_name}_{key_hash}.json")
|
|
29
|
+
|
|
30
|
+
# Create the cache data
|
|
31
|
+
cache_data = {
|
|
32
|
+
'value': value,
|
|
33
|
+
'ttl': ttl
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
# Write the cache data to the file
|
|
37
|
+
try:
|
|
38
|
+
with open(cache_file_path, 'w') as cache_file:
|
|
39
|
+
json.dump(cache_data, cache_file)
|
|
40
|
+
return True
|
|
41
|
+
except IOError:
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
def retrieve_output(tool_name, key):
|
|
45
|
+
"""
|
|
46
|
+
Retrieve the cached output for a given tool and cache key.
|
|
47
|
+
|
|
48
|
+
Parameters:
|
|
49
|
+
tool_name (str): Name of the tool whose output is being retrieved.
|
|
50
|
+
key (str): The cache key.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Any: The cached value if found, None otherwise.
|
|
54
|
+
"""
|
|
55
|
+
# Create a hash of the key
|
|
56
|
+
key_hash = hashlib.sha256(key.encode()).hexdigest()
|
|
57
|
+
|
|
58
|
+
# Create the cache file path
|
|
59
|
+
cache_file_path = os.path.join(CACHE_PATH, f"{tool_name}{key_hash}.json")
|
|
60
|
+
|
|
61
|
+
# Read the cache data from the file
|
|
62
|
+
if os.path.exists(cache_file_path):
|
|
63
|
+
try:
|
|
64
|
+
with open(cache_file_path, 'r') as cache_file:
|
|
65
|
+
cache_data = json.load(cache_file)
|
|
66
|
+
return cache_data['value']
|
|
67
|
+
except IOError:
|
|
68
|
+
return None
|
|
69
|
+
else:
|
|
70
|
+
return None
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
4
|
+
from dhisana.utils.built_with_api_tools import get_company_info_from_builtwith
|
|
5
|
+
from dhisana.utils.dataframe_tools import get_structured_output
|
|
6
|
+
from dhisana.utils.serpapi_search_tools import search_google
|
|
7
|
+
|
|
8
|
+
class QualifyCompanyBasedOnTechUsage(BaseModel):
|
|
9
|
+
company_name: str = Field(..., description="Name of the company")
|
|
10
|
+
is_company_qualified: str = Field(..., description="True if the company satifises qualification criteria in input. False otherwise.")
|
|
11
|
+
reason_for_qualification: str = Field(..., description="Reason for qualification")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@assistant_tool
|
|
15
|
+
async def find_tech_usage_in_company(
|
|
16
|
+
company_domain: str,
|
|
17
|
+
company_name: str,
|
|
18
|
+
technology_to_look_for: str
|
|
19
|
+
):
|
|
20
|
+
"""
|
|
21
|
+
Determine if a company is using a specific technology.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
company_domain (str): The domain name of the company's website.
|
|
25
|
+
company_name (str): The name of the company.
|
|
26
|
+
technology_to_look_for (str): The technology to look for.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
str: A JSON string containing the structured output.
|
|
30
|
+
"""
|
|
31
|
+
# Search for job postings on the company's website mentioning the technology
|
|
32
|
+
company_domain_search = f"site:{company_domain} {company_name} jobs or careers having {technology_to_look_for}"
|
|
33
|
+
search_google_results = await search_google(company_domain_search, 2)
|
|
34
|
+
|
|
35
|
+
# Search LinkedIn for people at the company with skills in the technology
|
|
36
|
+
linked_in_search = f"site:linkedin.com/in OR site:linkedin.com/jobs {company_name} having people with {technology_to_look_for} skills"
|
|
37
|
+
people_with_skills_results = await search_google(linked_in_search, 2)
|
|
38
|
+
|
|
39
|
+
# Get technologies used by the company from BuiltWith
|
|
40
|
+
data = await get_company_info_from_builtwith(company_domain)
|
|
41
|
+
technologies = get_technologies(data)
|
|
42
|
+
tech_found_in_builtwith = any(
|
|
43
|
+
tech.lower() == technology_to_look_for.lower() for tech in technologies
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Prepare the prompt for structured output
|
|
47
|
+
prompt = f"""
|
|
48
|
+
Mark the company as qualified in is_company_qualified if the company {company_name} is using technology {technology_to_look_for}.
|
|
49
|
+
DO NOT make up information.
|
|
50
|
+
Give reasoning why company is qualified based on one of the reasons:
|
|
51
|
+
1. There is a job posting on the company website.
|
|
52
|
+
2. There are people with that skill in the company.
|
|
53
|
+
3. BuiltWith shows the company uses the tech.
|
|
54
|
+
|
|
55
|
+
Google search on company careers:
|
|
56
|
+
{search_google_results}
|
|
57
|
+
|
|
58
|
+
Google search on LinkedIn for people with skills:
|
|
59
|
+
{people_with_skills_results}
|
|
60
|
+
|
|
61
|
+
BuiltWith shows technology used: {tech_found_in_builtwith}
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# Get structured output based on the prompt
|
|
65
|
+
output, _ = await get_structured_output(prompt, QualifyCompanyBasedOnTechUsage)
|
|
66
|
+
return json.dumps(output.dict())
|
|
67
|
+
|
|
68
|
+
def get_technologies(data):
|
|
69
|
+
"""
|
|
70
|
+
Extract the list of technologies from BuiltWith data.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
data (dict): The data returned by BuiltWith API.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
List[str]: A list of technology names used by the company.
|
|
77
|
+
"""
|
|
78
|
+
technologies = []
|
|
79
|
+
results = data.get('Results', [])
|
|
80
|
+
if results:
|
|
81
|
+
paths = results[0].get('Result', {}).get('Paths', [])
|
|
82
|
+
if paths:
|
|
83
|
+
techs = paths[0].get('Technologies', [])
|
|
84
|
+
for tech in techs:
|
|
85
|
+
tech_name = tech.get('Name', '')
|
|
86
|
+
if tech_name:
|
|
87
|
+
technologies.append(tech_name)
|
|
88
|
+
return technologies
|