dhisana 0.0.1.dev116__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/schemas/common.py +10 -1
- dhisana/schemas/sales.py +203 -22
- dhisana/utils/add_mapping.py +0 -2
- dhisana/utils/apollo_tools.py +739 -119
- dhisana/utils/built_with_api_tools.py +4 -2
- dhisana/utils/check_email_validity_tools.py +35 -18
- dhisana/utils/check_for_intent_signal.py +1 -2
- dhisana/utils/check_linkedin_url_validity.py +34 -8
- dhisana/utils/clay_tools.py +3 -2
- dhisana/utils/clean_properties.py +1 -4
- dhisana/utils/compose_salesnav_query.py +0 -1
- dhisana/utils/compose_search_query.py +7 -3
- dhisana/utils/composite_tools.py +0 -1
- dhisana/utils/dataframe_tools.py +2 -2
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_provider.py +174 -35
- dhisana/utils/enrich_lead_information.py +183 -53
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +1 -1
- dhisana/utils/g2_tools.py +0 -1
- dhisana/utils/generate_content.py +0 -1
- dhisana/utils/generate_email.py +68 -23
- dhisana/utils/generate_email_response.py +294 -46
- dhisana/utils/generate_flow.py +0 -1
- dhisana/utils/generate_linkedin_connect_message.py +9 -2
- dhisana/utils/generate_linkedin_response_message.py +137 -66
- dhisana/utils/generate_structured_output_internal.py +317 -164
- dhisana/utils/google_custom_search.py +150 -44
- dhisana/utils/google_oauth_tools.py +721 -0
- dhisana/utils/google_workspace_tools.py +278 -54
- dhisana/utils/hubspot_clearbit.py +3 -1
- dhisana/utils/hubspot_crm_tools.py +718 -272
- dhisana/utils/instantly_tools.py +3 -1
- dhisana/utils/lusha_tools.py +10 -7
- dhisana/utils/mailgun_tools.py +150 -0
- dhisana/utils/microsoft365_tools.py +447 -0
- dhisana/utils/openai_assistant_and_file_utils.py +121 -177
- dhisana/utils/openai_helpers.py +8 -6
- dhisana/utils/parse_linkedin_messages_txt.py +1 -3
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +377 -76
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/research_lead.py +3 -3
- dhisana/utils/sales_navigator_crawler.py +1 -6
- dhisana/utils/salesforce_crm_tools.py +323 -50
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +126 -91
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +360 -432
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +178 -18
- dhisana/utils/test_connect.py +1603 -130
- dhisana/utils/trasform_json.py +3 -3
- dhisana/utils/web_download_parse_tools.py +0 -1
- dhisana/utils/zoominfo_tools.py +2 -3
- dhisana/workflow/test.py +1 -1
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +1 -1
- dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
- dhisana-0.0.1.dev116.dist-info/RECORD +0 -83
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
- {dhisana-0.0.1.dev116.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
|
@@ -1,385 +1,150 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import os
|
|
3
2
|
import re
|
|
4
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Optional, Set
|
|
5
4
|
from urllib.parse import urlparse
|
|
5
|
+
import urllib.parse
|
|
6
6
|
import aiohttp
|
|
7
7
|
from bs4 import BeautifulSoup
|
|
8
8
|
import urllib
|
|
9
|
+
from pydantic import BaseModel
|
|
9
10
|
|
|
10
|
-
from dhisana.utils.
|
|
11
|
-
from dhisana.utils.
|
|
12
|
-
|
|
11
|
+
from dhisana.utils.serperdev_search import search_google_serper
|
|
12
|
+
from dhisana.utils.generate_structured_output_internal import (
|
|
13
|
+
get_structured_output_internal,
|
|
14
|
+
)
|
|
13
15
|
|
|
14
16
|
import logging
|
|
17
|
+
|
|
15
18
|
logging.basicConfig(level=logging.INFO)
|
|
16
19
|
logger = logging.getLogger(__name__)
|
|
17
20
|
|
|
21
|
+
from dhisana.utils.search_router import search_google_with_tools
|
|
22
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
18
23
|
|
|
19
|
-
|
|
20
|
-
"""
|
|
21
|
-
Retrieves the SERPAPI_KEY access token from the provided tool configuration.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
tool_config (list): A list of dictionaries containing the tool configuration.
|
|
25
|
-
Each dictionary should have a "name" key and a "configuration" key,
|
|
26
|
-
where "configuration" is a list of dictionaries containing "name" and "value" keys.
|
|
27
|
-
|
|
28
|
-
Returns:
|
|
29
|
-
str: The SERPAPI_KEY access token.
|
|
30
|
-
|
|
31
|
-
Raises:
|
|
32
|
-
ValueError: If the access token is not found in the tool configuration or environment variable.
|
|
33
|
-
"""
|
|
34
|
-
logger.info("Entering get_serp_api_access_token")
|
|
35
|
-
SERPAPI_KEY = None
|
|
36
|
-
|
|
37
|
-
if tool_config:
|
|
38
|
-
logger.debug(f"Tool config provided: {tool_config}")
|
|
39
|
-
serpapi_config = next(
|
|
40
|
-
(item for item in tool_config if item.get("name") == "serpapi"), None
|
|
41
|
-
)
|
|
42
|
-
if serpapi_config:
|
|
43
|
-
config_map = {
|
|
44
|
-
item["name"]: item["value"]
|
|
45
|
-
for item in serpapi_config.get("configuration", [])
|
|
46
|
-
if item
|
|
47
|
-
}
|
|
48
|
-
SERPAPI_KEY = config_map.get("apiKey")
|
|
49
|
-
else:
|
|
50
|
-
logger.warning("No 'serpapi' config item found in tool_config.")
|
|
51
|
-
else:
|
|
52
|
-
logger.debug("No tool_config provided or it's None.")
|
|
53
|
-
|
|
54
|
-
SERPAPI_KEY = SERPAPI_KEY or os.getenv("SERPAPI_KEY")
|
|
55
|
-
if not SERPAPI_KEY:
|
|
56
|
-
logger.error("SERPAPI_KEY not found in configuration or environment.")
|
|
57
|
-
raise ValueError("SERPAPI_KEY access token not found in tool_config or environment variable")
|
|
58
|
-
|
|
59
|
-
logger.info("Retrieved SERPAPI_KEY successfully.")
|
|
60
|
-
return SERPAPI_KEY
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
@assistant_tool
|
|
64
|
-
async def search_google(
|
|
65
|
-
query: str,
|
|
66
|
-
number_of_results: int = 10,
|
|
67
|
-
offset: int = 0,
|
|
68
|
-
tool_config: Optional[List[Dict]] = None,
|
|
69
|
-
as_oq: Optional[str] = None # <-- NEW PARAM for optional keywords
|
|
70
|
-
) -> List[str]:
|
|
71
|
-
"""
|
|
72
|
-
Search Google using SERP API, supporting pagination and an explicit 'offset'
|
|
73
|
-
parameter to start from a specific result index.
|
|
74
|
-
Now also supports 'as_oq' for optional query terms in SERP API.
|
|
75
|
-
|
|
76
|
-
Parameters:
|
|
77
|
-
- query (str): The search query.
|
|
78
|
-
- number_of_results (int): The total number of results to return. Default is 10.
|
|
79
|
-
- offset (int): The starting index for the first result returned (Google pagination).
|
|
80
|
-
- tool_config (Optional[List[Dict]]): Configuration containing SERP API token, etc.
|
|
81
|
-
- as_oq (Optional[str]): Optional query terms for SerpAPI (if supported).
|
|
82
|
-
|
|
83
|
-
Returns:
|
|
84
|
-
- List[str]: A list of organic search results, each serialized as a JSON string.
|
|
85
|
-
"""
|
|
86
|
-
logger.info("Entering search_google")
|
|
87
|
-
if not query:
|
|
88
|
-
logger.warning("Empty query string provided.")
|
|
89
|
-
return []
|
|
90
|
-
|
|
91
|
-
# Use 'as_oq' in the cache key too, so different optional terms don't conflict
|
|
92
|
-
cache_key = f"{query}_{number_of_results}_{offset}_{as_oq or ''}"
|
|
93
|
-
cached_response = retrieve_output("search_google_serp", cache_key)
|
|
94
|
-
if cached_response is not None:
|
|
95
|
-
logger.info("Cache hit for search_google.")
|
|
96
|
-
return cached_response
|
|
97
|
-
|
|
98
|
-
SERPAPI_KEY = get_serp_api_access_token(tool_config)
|
|
99
|
-
url = "https://serpapi.com/search"
|
|
100
|
-
|
|
101
|
-
page_size = 100
|
|
102
|
-
all_results: List[Dict[str, Any]] = []
|
|
103
|
-
start_index = offset
|
|
104
|
-
|
|
105
|
-
logger.debug(f"Requesting up to {number_of_results} results for '{query}' starting at offset {offset}.")
|
|
106
|
-
|
|
107
|
-
async with aiohttp.ClientSession() as session:
|
|
108
|
-
while len(all_results) < number_of_results:
|
|
109
|
-
to_fetch = min(page_size, number_of_results - len(all_results))
|
|
110
|
-
params = {
|
|
111
|
-
"q": query,
|
|
112
|
-
"num": to_fetch,
|
|
113
|
-
"start": start_index,
|
|
114
|
-
"api_key": SERPAPI_KEY,
|
|
115
|
-
"engine": "google",
|
|
116
|
-
"location": "United States"
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
# If we have optional terms, add them
|
|
120
|
-
if as_oq:
|
|
121
|
-
params["as_oq"] = as_oq
|
|
122
|
-
|
|
123
|
-
logger.debug(f"SERP API GET request with params: {params}")
|
|
124
|
-
|
|
125
|
-
try:
|
|
126
|
-
async with session.get(url, params=params) as response:
|
|
127
|
-
logger.debug(f"Received response status: {response.status}")
|
|
128
|
-
if response.status != 200:
|
|
129
|
-
try:
|
|
130
|
-
error_content = await response.json()
|
|
131
|
-
except Exception:
|
|
132
|
-
error_content = await response.text()
|
|
133
|
-
logger.warning(f"Non-200 response from SERP API: {error_content}")
|
|
134
|
-
return [json.dumps({"error": error_content})]
|
|
135
|
-
|
|
136
|
-
result = await response.json()
|
|
137
|
-
except Exception as e:
|
|
138
|
-
logger.exception("Exception during SERP API request.")
|
|
139
|
-
return [json.dumps({"error": str(e)})]
|
|
140
|
-
|
|
141
|
-
organic_results = result.get('organic_results', [])
|
|
142
|
-
if not organic_results:
|
|
143
|
-
logger.debug("No more organic results returned; stopping.")
|
|
144
|
-
break
|
|
145
|
-
|
|
146
|
-
all_results.extend(organic_results)
|
|
147
|
-
start_index += to_fetch
|
|
148
|
-
|
|
149
|
-
if len(all_results) >= number_of_results:
|
|
150
|
-
break
|
|
151
|
-
|
|
152
|
-
all_results = all_results[:number_of_results]
|
|
153
|
-
logger.info(f"Found {len(all_results)} results for query '{query}'.")
|
|
154
|
-
|
|
155
|
-
serialized_results = [json.dumps(item) for item in all_results]
|
|
156
|
-
cache_output("search_google_serp", cache_key, serialized_results)
|
|
157
|
-
return serialized_results
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
@assistant_tool
|
|
161
|
-
async def search_google_maps(
|
|
162
|
-
query: str,
|
|
163
|
-
number_of_results: int = 3,
|
|
164
|
-
tool_config: Optional[List[Dict]] = None
|
|
165
|
-
) -> List[str]:
|
|
166
|
-
"""
|
|
167
|
-
Search Google Maps using SERP API and return the results as an array of serialized JSON strings.
|
|
168
|
-
|
|
169
|
-
Parameters:
|
|
170
|
-
- query (str): The search query.
|
|
171
|
-
- number_of_results (int): The number of results to return.
|
|
172
|
-
"""
|
|
173
|
-
logger.info("Entering search_google_maps")
|
|
174
|
-
if not query:
|
|
175
|
-
logger.warning("Empty query string provided for search_google_maps.")
|
|
176
|
-
return []
|
|
177
|
-
|
|
178
|
-
SERPAPI_KEY = get_serp_api_access_token(tool_config)
|
|
179
|
-
params = {
|
|
180
|
-
"q": query,
|
|
181
|
-
"num": number_of_results,
|
|
182
|
-
"api_key": SERPAPI_KEY,
|
|
183
|
-
"engine": "google_maps"
|
|
184
|
-
}
|
|
185
|
-
url = "https://serpapi.com/search"
|
|
186
|
-
|
|
187
|
-
logger.debug(f"Searching Google Maps with params: {params}")
|
|
188
|
-
try:
|
|
189
|
-
async with aiohttp.ClientSession() as session:
|
|
190
|
-
async with session.get(url, params=params) as response:
|
|
191
|
-
logger.debug(f"Received status: {response.status}")
|
|
192
|
-
result = await response.json()
|
|
193
|
-
if response.status != 200:
|
|
194
|
-
logger.warning(f"Non-200 response from SERP API: {result}")
|
|
195
|
-
return [json.dumps({"error": result})]
|
|
196
|
-
|
|
197
|
-
serialized_results = [json.dumps(item) for item in result.get('local_results', [])]
|
|
198
|
-
logger.info(f"Returning {len(serialized_results)} map results.")
|
|
199
|
-
return serialized_results
|
|
200
|
-
except Exception as e:
|
|
201
|
-
logger.exception("Exception during search_google_maps request.")
|
|
202
|
-
return [json.dumps({"error": str(e)})]
|
|
203
|
-
|
|
24
|
+
from dhisana.utils.web_download_parse_tools import fetch_html_content
|
|
204
25
|
|
|
205
|
-
@assistant_tool
|
|
206
|
-
async def search_google_news(
|
|
207
|
-
query: str,
|
|
208
|
-
number_of_results: int = 3,
|
|
209
|
-
tool_config: Optional[List[Dict]] = None
|
|
210
|
-
) -> List[str]:
|
|
211
|
-
"""
|
|
212
|
-
Search Google News using SERP API and return the results as an array of serialized JSON strings.
|
|
213
|
-
|
|
214
|
-
Parameters:
|
|
215
|
-
- query (str): The search query.
|
|
216
|
-
- number_of_results (int): The number of results to return.
|
|
217
|
-
"""
|
|
218
|
-
logger.info("Entering search_google_news")
|
|
219
|
-
if not query:
|
|
220
|
-
logger.warning("Empty query string provided for search_google_news.")
|
|
221
|
-
return []
|
|
222
26
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
27
|
+
class LeadSearchResult(BaseModel):
|
|
28
|
+
first_name: str = ""
|
|
29
|
+
last_name: str = ""
|
|
30
|
+
full_name: str = ""
|
|
31
|
+
job_title: str = ""
|
|
32
|
+
linkedin_follower_count: int = 0
|
|
33
|
+
lead_location: str = ""
|
|
34
|
+
summary_about_lead: str = ""
|
|
35
|
+
user_linkedin_url: str = ""
|
|
231
36
|
|
|
232
|
-
logger.debug(f"Searching Google News with params: {params}")
|
|
233
|
-
try:
|
|
234
|
-
async with aiohttp.ClientSession() as session:
|
|
235
|
-
async with session.get(url, params=params) as response:
|
|
236
|
-
logger.debug(f"Received status: {response.status}")
|
|
237
|
-
result = await response.json()
|
|
238
|
-
if response.status != 200:
|
|
239
|
-
logger.warning(f"Non-200 response from SERP API: {result}")
|
|
240
|
-
return [json.dumps({"error": result})]
|
|
241
|
-
|
|
242
|
-
serialized_results = [json.dumps(item) for item in result.get('news_results', [])]
|
|
243
|
-
logger.info(f"Returning {len(serialized_results)} news results.")
|
|
244
|
-
return serialized_results
|
|
245
|
-
except Exception as e:
|
|
246
|
-
logger.exception("Exception during search_google_news request.")
|
|
247
|
-
return [json.dumps({"error": str(e)})]
|
|
248
37
|
|
|
38
|
+
class LinkedinCandidateChoice(BaseModel):
|
|
39
|
+
chosen_link: str = ""
|
|
40
|
+
confidence: float = 0.0
|
|
41
|
+
reasoning: str = ""
|
|
249
42
|
|
|
250
|
-
@assistant_tool
|
|
251
|
-
async def search_job_postings(
|
|
252
|
-
query: str,
|
|
253
|
-
number_of_results: int,
|
|
254
|
-
tool_config: Optional[List[Dict]] = None
|
|
255
|
-
) -> List[str]:
|
|
256
|
-
"""
|
|
257
|
-
Search for job postings using SERP API and return the results as an array of serialized JSON strings.
|
|
258
|
-
|
|
259
|
-
Parameters:
|
|
260
|
-
- query (str): The search query.
|
|
261
|
-
- number_of_results (int): The number of results to return.
|
|
262
|
-
"""
|
|
263
|
-
logger.info("Entering search_job_postings")
|
|
264
|
-
if not query:
|
|
265
|
-
logger.warning("Empty query string provided for search_job_postings.")
|
|
266
|
-
return []
|
|
267
43
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
"q": query,
|
|
271
|
-
"num": number_of_results,
|
|
272
|
-
"api_key": SERPAPI_KEY,
|
|
273
|
-
"engine": "google_jobs"
|
|
274
|
-
}
|
|
275
|
-
url = "https://serpapi.com/search"
|
|
44
|
+
async def get_structured_output(text: str, tool_config: Optional[List[Dict]] = None) -> LeadSearchResult:
|
|
45
|
+
"""Parse text snippet into ``LeadSearchResult`` using OpenAI."""
|
|
276
46
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
return serialized_results
|
|
290
|
-
except Exception as e:
|
|
291
|
-
logger.exception("Exception during search_job_postings request.")
|
|
292
|
-
return [json.dumps({"error": str(e)})]
|
|
47
|
+
prompt = (
|
|
48
|
+
"Extract lead details from the text below.\n"
|
|
49
|
+
"If follower counts are mentioned, convert values like '1.5k+ followers' to an integer (e.g. 1500).\n"
|
|
50
|
+
f"Return JSON matching this schema:\n{json.dumps(LeadSearchResult.model_json_schema(), indent=2)}\n\n"
|
|
51
|
+
f"Text:\n{text}"
|
|
52
|
+
)
|
|
53
|
+
result, status = await get_structured_output_internal(
|
|
54
|
+
prompt, LeadSearchResult, model = "gpt-5.1-chat", tool_config=tool_config
|
|
55
|
+
)
|
|
56
|
+
if status != "SUCCESS" or result is None:
|
|
57
|
+
return LeadSearchResult()
|
|
58
|
+
return result
|
|
293
59
|
|
|
294
60
|
|
|
295
61
|
@assistant_tool
|
|
296
|
-
async def
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
"""
|
|
302
|
-
Search Google Images using SERP API and return the results as an array of serialized JSON strings.
|
|
303
|
-
|
|
304
|
-
Parameters:
|
|
305
|
-
- query (str): The search query.
|
|
306
|
-
- number_of_results (int): The number of results to return.
|
|
307
|
-
"""
|
|
308
|
-
logger.info("Entering search_google_images")
|
|
309
|
-
if not query:
|
|
310
|
-
logger.warning("Empty query string provided for search_google_images.")
|
|
311
|
-
return []
|
|
312
|
-
|
|
313
|
-
SERPAPI_KEY = get_serp_api_access_token(tool_config)
|
|
314
|
-
params = {
|
|
315
|
-
"q": query,
|
|
316
|
-
"num": number_of_results,
|
|
317
|
-
"api_key": SERPAPI_KEY,
|
|
318
|
-
"engine": "google_images"
|
|
319
|
-
}
|
|
320
|
-
url = "https://serpapi.com/search"
|
|
321
|
-
|
|
322
|
-
logger.debug(f"Searching Google Images with params: {params}")
|
|
323
|
-
try:
|
|
324
|
-
async with aiohttp.ClientSession() as session:
|
|
325
|
-
async with session.get(url, params=params) as response:
|
|
326
|
-
logger.debug(f"Received status: {response.status}")
|
|
327
|
-
result = await response.json()
|
|
328
|
-
if response.status != 200:
|
|
329
|
-
logger.warning(f"Non-200 response from SERP API: {result}")
|
|
330
|
-
return [json.dumps({"error": result})]
|
|
331
|
-
|
|
332
|
-
serialized_results = [json.dumps(item) for item in result.get('images_results', [])]
|
|
333
|
-
logger.info(f"Returning {len(serialized_results)} image results.")
|
|
334
|
-
return serialized_results
|
|
335
|
-
except Exception as e:
|
|
336
|
-
logger.exception("Exception during search_google_images request.")
|
|
337
|
-
return [json.dumps({"error": str(e)})]
|
|
338
|
-
|
|
62
|
+
async def find_user_linkedin_url_with_serper(
|
|
63
|
+
user_linkedin_url: str,
|
|
64
|
+
tool_config: Optional[List[Dict]] = None,
|
|
65
|
+
) -> Optional[Dict]:
|
|
66
|
+
"""Search Google via Serper.dev for ``user_linkedin_url`` and parse lead details."""
|
|
339
67
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
query: str,
|
|
343
|
-
number_of_results: int,
|
|
344
|
-
tool_config: Optional[List[Dict]] = None
|
|
345
|
-
) -> List[str]:
|
|
346
|
-
"""
|
|
347
|
-
Search Google Videos using SERP API and return the results as an array of serialized JSON strings.
|
|
348
|
-
|
|
349
|
-
Parameters:
|
|
350
|
-
- query (str): The search query.
|
|
351
|
-
- number_of_results (int): The number of results to return.
|
|
352
|
-
"""
|
|
353
|
-
logger.info("Entering search_google_videos")
|
|
354
|
-
if not query:
|
|
355
|
-
logger.warning("Empty query string provided for search_google_videos.")
|
|
356
|
-
return []
|
|
68
|
+
if not user_linkedin_url:
|
|
69
|
+
return None
|
|
357
70
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
71
|
+
normalized_input = extract_user_linkedin_page(user_linkedin_url)
|
|
72
|
+
results = await search_google_serper(user_linkedin_url, 10, tool_config=tool_config)
|
|
73
|
+
for item_json in results:
|
|
74
|
+
try:
|
|
75
|
+
item = json.loads(item_json)
|
|
76
|
+
except Exception:
|
|
77
|
+
continue
|
|
78
|
+
link = item.get("link", "")
|
|
79
|
+
if not link:
|
|
80
|
+
continue
|
|
81
|
+
if extract_user_linkedin_page(link) == normalized_input:
|
|
82
|
+
text = " ".join(
|
|
83
|
+
[item.get("title", ""), item.get("subtitle", ""), item.get("snippet", "")]
|
|
84
|
+
).strip()
|
|
85
|
+
structured = await get_structured_output(text, tool_config=tool_config)
|
|
86
|
+
structured.user_linkedin_url = normalized_input
|
|
87
|
+
return json.loads(structured.model_dump_json())
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def pick_best_linkedin_candidate_with_llm(
|
|
92
|
+
email: str,
|
|
93
|
+
user_name: str,
|
|
94
|
+
user_title: str,
|
|
95
|
+
user_location: str,
|
|
96
|
+
user_company: str,
|
|
97
|
+
candidates: List[Dict],
|
|
98
|
+
tool_config: Optional[List[Dict]] = None,
|
|
99
|
+
) -> Optional[LinkedinCandidateChoice]:
|
|
100
|
+
"""Ask the LLM to assess candidate LinkedIn URLs and pick the best match."""
|
|
101
|
+
|
|
102
|
+
if not candidates:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
candidates_sorted = candidates[-3:]
|
|
106
|
+
candidate_lines = []
|
|
107
|
+
for idx, candidate in enumerate(candidates_sorted, start=1):
|
|
108
|
+
candidate_lines.append(
|
|
109
|
+
"\n".join(
|
|
110
|
+
[
|
|
111
|
+
f"Candidate {idx}:",
|
|
112
|
+
f" Link: {candidate.get('link', '')}",
|
|
113
|
+
f" Title: {candidate.get('title', '')}",
|
|
114
|
+
f" Snippet: {candidate.get('snippet', '')}",
|
|
115
|
+
f" Subtitle: {candidate.get('subtitle', '')}",
|
|
116
|
+
f" Query: {candidate.get('query', '')}",
|
|
117
|
+
]
|
|
118
|
+
)
|
|
119
|
+
)
|
|
366
120
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
121
|
+
prompt = (
|
|
122
|
+
"You are validating LinkedIn profile matches for a lead enrichment workflow.\n"
|
|
123
|
+
"Given the lead context and candidate search results, pick the most likely LinkedIn profile.\n"
|
|
124
|
+
"If no candidate seems appropriate, return an empty link and confidence 0.\n"
|
|
125
|
+
"Consider whether the email, name, company, title, or location aligns with the candidate.\n"
|
|
126
|
+
"Lead context:\n"
|
|
127
|
+
f"- Email: {email or 'unknown'}\n"
|
|
128
|
+
f"- Name: {user_name or 'unknown'}\n"
|
|
129
|
+
f"- Title: {user_title or 'unknown'}\n"
|
|
130
|
+
f"- Company: {user_company or 'unknown'}\n"
|
|
131
|
+
f"- Location: {user_location or 'unknown'}\n\n"
|
|
132
|
+
"Candidates:\n"
|
|
133
|
+
f"{chr(10).join(candidate_lines)}\n\n"
|
|
134
|
+
"Return JSON with fields: chosen_link (string), confidence (0-1 float), reasoning (short string)."
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
result, status = await get_structured_output_internal(
|
|
138
|
+
prompt,
|
|
139
|
+
LinkedinCandidateChoice,
|
|
140
|
+
model="gpt-5.1-chat",
|
|
141
|
+
tool_config=tool_config,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
if status != "SUCCESS" or result is None:
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
return result
|
|
383
148
|
|
|
384
149
|
|
|
385
150
|
@assistant_tool
|
|
@@ -389,14 +154,7 @@ async def get_company_domain_from_google_search(
|
|
|
389
154
|
tool_config: Optional[List[Dict]] = None
|
|
390
155
|
) -> str:
|
|
391
156
|
"""
|
|
392
|
-
Tries to find the company domain from the company name using Google
|
|
393
|
-
|
|
394
|
-
Args:
|
|
395
|
-
company_name (str): The name of the company to search for.
|
|
396
|
-
location (str, optional): A location to include in the query.
|
|
397
|
-
|
|
398
|
-
Returns:
|
|
399
|
-
str: The domain of the company's official website if found, otherwise an empty string.
|
|
157
|
+
Tries to find the company domain from the company name using Google (SerpAPI or Serper.dev).
|
|
400
158
|
"""
|
|
401
159
|
logger.info("Entering get_company_domain_from_google_search")
|
|
402
160
|
|
|
@@ -405,22 +163,21 @@ async def get_company_domain_from_google_search(
|
|
|
405
163
|
logger.debug("Invalid or excluded company_name provided.")
|
|
406
164
|
return ""
|
|
407
165
|
|
|
408
|
-
exclude_company_names = ["linkedin", "wikipedia", "facebook", "instagram", "twitter", "youtube", "netflix", "zoominfo", "reditt"]
|
|
409
166
|
query = f"\"{company_name}\" official website"
|
|
410
167
|
if location:
|
|
411
168
|
query = f"\"{company_name}\" official website, {location}"
|
|
412
169
|
|
|
413
170
|
try:
|
|
414
171
|
logger.debug(f"Performing search with query: {query}")
|
|
415
|
-
result = await
|
|
172
|
+
result = await search_google_with_tools(query, 1, tool_config=tool_config)
|
|
416
173
|
if not isinstance(result, list) or len(result) == 0:
|
|
417
174
|
logger.debug("No results for first attempt, retrying with fallback query.")
|
|
418
175
|
query = f"{company_name} official website"
|
|
419
|
-
result = await
|
|
176
|
+
result = await search_google_with_tools(query, 1, tool_config=tool_config)
|
|
420
177
|
if not isinstance(result, list) or len(result) == 0:
|
|
421
178
|
logger.debug("No results from fallback query either.")
|
|
422
179
|
return ''
|
|
423
|
-
except Exception
|
|
180
|
+
except Exception:
|
|
424
181
|
logger.exception("Exception during get_company_domain_from_google_search.")
|
|
425
182
|
return ''
|
|
426
183
|
|
|
@@ -472,16 +229,6 @@ async def get_signal_strength(
|
|
|
472
229
|
"""
|
|
473
230
|
Find how strong a match for the keywords in search is by checking
|
|
474
231
|
how many search results contain all desired keywords in the snippet.
|
|
475
|
-
|
|
476
|
-
Args:
|
|
477
|
-
domain_to_search (str): The domain to search inside.
|
|
478
|
-
keywords (List[str]): The keywords to search for.
|
|
479
|
-
in_title (List[str]): Keywords that must appear in the title.
|
|
480
|
-
not_in_title (List[str]): Keywords that must not appear in the title.
|
|
481
|
-
negative_keywords (List[str]): Keywords to exclude from results.
|
|
482
|
-
|
|
483
|
-
Returns:
|
|
484
|
-
int: A strength score on a scale of 0 to 5.
|
|
485
232
|
"""
|
|
486
233
|
logger.info("Entering get_signal_strength")
|
|
487
234
|
|
|
@@ -508,8 +255,8 @@ async def get_signal_strength(
|
|
|
508
255
|
|
|
509
256
|
logger.debug(f"Performing get_signal_strength search with query: {final_query}")
|
|
510
257
|
try:
|
|
511
|
-
results = await
|
|
512
|
-
except Exception
|
|
258
|
+
results = await search_google_with_tools(final_query, 5, tool_config=tool_config)
|
|
259
|
+
except Exception:
|
|
513
260
|
logger.exception("Exception occurred while searching for signal strength.")
|
|
514
261
|
return 0
|
|
515
262
|
|
|
@@ -518,9 +265,9 @@ async def get_signal_strength(
|
|
|
518
265
|
return 0
|
|
519
266
|
|
|
520
267
|
score = 0
|
|
521
|
-
for
|
|
268
|
+
for result_item in results:
|
|
522
269
|
try:
|
|
523
|
-
result_json = json.loads(
|
|
270
|
+
result_json = json.loads(result_item)
|
|
524
271
|
snippet_text = result_json.get('snippet', '').lower()
|
|
525
272
|
if all(kw.lower() in snippet_text for kw in keywords):
|
|
526
273
|
logger.debug(f"Found match in snippet: {snippet_text[:60]}...")
|
|
@@ -544,8 +291,8 @@ def extract_user_linkedin_page(url: str) -> str:
|
|
|
544
291
|
if not url:
|
|
545
292
|
return ""
|
|
546
293
|
|
|
547
|
-
normalized_url = re.sub(r"(https?://)?([\w\-]+\.)?linkedin\.com", "https://www.linkedin.com", url)
|
|
548
|
-
match = re.match(r"https://www
|
|
294
|
+
normalized_url = re.sub(r"^(https?://)?([\w\-]+\.)?linkedin\.com", "https://www.linkedin.com", url)
|
|
295
|
+
match = re.match(r"https://www\.linkedin\.com/in/([^/?#]+)", normalized_url)
|
|
549
296
|
if match:
|
|
550
297
|
page = f"https://www.linkedin.com/in/{match.group(1)}"
|
|
551
298
|
logger.debug(f"Extracted user LinkedIn page: {page}")
|
|
@@ -567,16 +314,6 @@ async def find_user_linkedin_url_google(
|
|
|
567
314
|
) -> str:
|
|
568
315
|
"""
|
|
569
316
|
Find the LinkedIn URL for a user based on their name, title, location, and company.
|
|
570
|
-
|
|
571
|
-
Args:
|
|
572
|
-
user_name (str): The name of the user.
|
|
573
|
-
user_title (str): The title of the user.
|
|
574
|
-
user_location (str): The location of the user.
|
|
575
|
-
user_company (str): The company of the user.
|
|
576
|
-
use_strict_check (bool): Whether to use a strict single query or a series of relaxed queries.
|
|
577
|
-
|
|
578
|
-
Returns:
|
|
579
|
-
str: The LinkedIn URL if found, otherwise an empty string.
|
|
580
317
|
"""
|
|
581
318
|
logger.info("Entering find_user_linkedin_url_google")
|
|
582
319
|
|
|
@@ -596,14 +333,14 @@ async def find_user_linkedin_url_google(
|
|
|
596
333
|
f'site:linkedin.com/in "{user_name}" intitle:"{user_name}"'
|
|
597
334
|
]
|
|
598
335
|
|
|
599
|
-
async with aiohttp.ClientSession() as session:
|
|
336
|
+
async with aiohttp.ClientSession() as session:
|
|
600
337
|
for query in queries:
|
|
601
338
|
if not query.strip():
|
|
602
339
|
continue
|
|
603
340
|
logger.debug(f"Searching with query: {query}")
|
|
604
341
|
try:
|
|
605
|
-
results = await
|
|
606
|
-
except Exception
|
|
342
|
+
results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
|
|
343
|
+
except Exception:
|
|
607
344
|
logger.exception("Error searching for LinkedIn user URL.")
|
|
608
345
|
continue
|
|
609
346
|
|
|
@@ -632,6 +369,221 @@ async def find_user_linkedin_url_google(
|
|
|
632
369
|
return ""
|
|
633
370
|
|
|
634
371
|
|
|
372
|
+
@assistant_tool
|
|
373
|
+
async def find_user_linkedin_url_by_email_google(
|
|
374
|
+
email: str,
|
|
375
|
+
user_name: str = "",
|
|
376
|
+
user_title: str = "",
|
|
377
|
+
user_location: str = "",
|
|
378
|
+
user_company: str = "",
|
|
379
|
+
tool_config: Optional[List[Dict]] = None,
|
|
380
|
+
) -> Optional[Dict[str, Any]]:
|
|
381
|
+
"""
|
|
382
|
+
Find the LinkedIn URL for a user based primarily on their email address.
|
|
383
|
+
|
|
384
|
+
Additional profile hints (name, title, location, company) improve query precision
|
|
385
|
+
when supplied. Returns a dict with the best LinkedIn URL, LLM confidence score,
|
|
386
|
+
and short reasoning when a match clears the confidence threshold; otherwise ``None``.
|
|
387
|
+
"""
|
|
388
|
+
logger.info("Entering find_user_linkedin_url_by_email_google")
|
|
389
|
+
|
|
390
|
+
if not email:
|
|
391
|
+
logger.warning("No email provided.")
|
|
392
|
+
return None
|
|
393
|
+
|
|
394
|
+
normalized_email = email.strip().lower()
|
|
395
|
+
email_local_part = normalized_email.split("@")[0] if "@" in normalized_email else normalized_email
|
|
396
|
+
email_local_humanized = re.sub(r"[._-]+", " ", email_local_part).strip()
|
|
397
|
+
|
|
398
|
+
queries: List[str] = []
|
|
399
|
+
|
|
400
|
+
def add_query(query: str) -> None:
|
|
401
|
+
query = query.strip()
|
|
402
|
+
if query and query not in queries:
|
|
403
|
+
queries.append(query)
|
|
404
|
+
|
|
405
|
+
def add_query_parts(*parts: str) -> None:
|
|
406
|
+
tokens = [part.strip() for part in parts if part and part.strip()]
|
|
407
|
+
if not tokens:
|
|
408
|
+
return
|
|
409
|
+
add_query(" ".join(tokens))
|
|
410
|
+
|
|
411
|
+
enriched_terms = []
|
|
412
|
+
if user_name:
|
|
413
|
+
enriched_terms.append(f'"{user_name}"')
|
|
414
|
+
if user_company:
|
|
415
|
+
enriched_terms.append(f'"{user_company}"')
|
|
416
|
+
if user_title:
|
|
417
|
+
enriched_terms.append(f'"{user_title}"')
|
|
418
|
+
if user_location:
|
|
419
|
+
enriched_terms.append(f'"{user_location}"')
|
|
420
|
+
base_hint = " ".join(enriched_terms)
|
|
421
|
+
|
|
422
|
+
# Prioritise the direct email search variants before broader fallbacks.
|
|
423
|
+
add_query_parts(normalized_email, "linkedin.com/in", base_hint)
|
|
424
|
+
add_query_parts(normalized_email, "linkedin.com", base_hint)
|
|
425
|
+
add_query_parts(normalized_email, "linkedin", base_hint)
|
|
426
|
+
add_query_parts(normalized_email, base_hint)
|
|
427
|
+
add_query(f'"{normalized_email}" "linkedin.com/in" {base_hint}')
|
|
428
|
+
add_query(f'"{normalized_email}" "linkedin.com" {base_hint}')
|
|
429
|
+
add_query(f'"{normalized_email}" linkedin {base_hint}')
|
|
430
|
+
|
|
431
|
+
if email_local_part and email_local_part != normalized_email:
|
|
432
|
+
add_query_parts(email_local_part, "linkedin.com/in", base_hint)
|
|
433
|
+
add_query_parts(email_local_part, "linkedin.com", base_hint)
|
|
434
|
+
add_query_parts(email_local_part, "linkedin", base_hint)
|
|
435
|
+
add_query(f'"{email_local_part}" "linkedin.com/in" {base_hint}')
|
|
436
|
+
add_query(f'"{email_local_part}" "linkedin.com" {base_hint}')
|
|
437
|
+
|
|
438
|
+
if email_local_humanized and email_local_humanized not in {email_local_part, normalized_email}:
|
|
439
|
+
add_query_parts(email_local_humanized, "linkedin", base_hint)
|
|
440
|
+
add_query(f'"{email_local_humanized}" linkedin {base_hint}')
|
|
441
|
+
|
|
442
|
+
if normalized_email:
|
|
443
|
+
add_query(f'site:linkedin.com/in "{normalized_email}" {base_hint}')
|
|
444
|
+
|
|
445
|
+
if email_local_part:
|
|
446
|
+
add_query(f'site:linkedin.com/in "{email_local_part}" {base_hint}')
|
|
447
|
+
|
|
448
|
+
if email_local_humanized and email_local_humanized != email_local_part:
|
|
449
|
+
add_query(f'site:linkedin.com/in "{email_local_humanized}" {base_hint}')
|
|
450
|
+
|
|
451
|
+
if base_hint:
|
|
452
|
+
lookup_hint = user_name or email_local_humanized or email_local_part or normalized_email
|
|
453
|
+
add_query(
|
|
454
|
+
f'site:linkedin.com/in "{normalized_email}" {base_hint} '
|
|
455
|
+
f'intitle:"{lookup_hint}" -intitle:"profiles"'
|
|
456
|
+
)
|
|
457
|
+
if email_local_humanized:
|
|
458
|
+
add_query(
|
|
459
|
+
f'site:linkedin.com/in "{email_local_humanized}" {base_hint} '
|
|
460
|
+
f'intitle:"{lookup_hint}" -intitle:"profiles"'
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
candidate_records: List[Dict[str, str]] = []
|
|
464
|
+
seen_links: Set[str] = set()
|
|
465
|
+
best_llm_choice: Optional[LinkedinCandidateChoice] = None
|
|
466
|
+
best_llm_link: str = ""
|
|
467
|
+
HIGH_CONFIDENCE_THRESHOLD = 0.8
|
|
468
|
+
MIN_CONFIDENCE_THRESHOLD = 0.75
|
|
469
|
+
|
|
470
|
+
async def evaluate_with_llm() -> Optional[LinkedinCandidateChoice]:
|
|
471
|
+
nonlocal best_llm_choice, best_llm_link
|
|
472
|
+
|
|
473
|
+
llm_choice = await pick_best_linkedin_candidate_with_llm(
|
|
474
|
+
email=email,
|
|
475
|
+
user_name=user_name,
|
|
476
|
+
user_title=user_title,
|
|
477
|
+
user_location=user_location,
|
|
478
|
+
user_company=user_company,
|
|
479
|
+
candidates=candidate_records,
|
|
480
|
+
tool_config=tool_config,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
if not llm_choice or not llm_choice.chosen_link:
|
|
484
|
+
return None
|
|
485
|
+
|
|
486
|
+
chosen_link = extract_user_linkedin_page(llm_choice.chosen_link)
|
|
487
|
+
if not chosen_link:
|
|
488
|
+
return None
|
|
489
|
+
|
|
490
|
+
llm_choice.chosen_link = chosen_link
|
|
491
|
+
|
|
492
|
+
if best_llm_choice is None or llm_choice.confidence > best_llm_choice.confidence:
|
|
493
|
+
best_llm_choice = llm_choice
|
|
494
|
+
best_llm_link = chosen_link
|
|
495
|
+
logger.debug(
|
|
496
|
+
"LLM updated best candidate: %s (confidence %.2f) reason: %s",
|
|
497
|
+
chosen_link,
|
|
498
|
+
llm_choice.confidence,
|
|
499
|
+
llm_choice.reasoning,
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
if llm_choice.confidence >= HIGH_CONFIDENCE_THRESHOLD:
|
|
503
|
+
logger.info(
|
|
504
|
+
"Returning LinkedIn user page by email via LLM scoring: %s (confidence %.2f)",
|
|
505
|
+
chosen_link,
|
|
506
|
+
llm_choice.confidence,
|
|
507
|
+
)
|
|
508
|
+
return llm_choice
|
|
509
|
+
|
|
510
|
+
return None
|
|
511
|
+
|
|
512
|
+
async with aiohttp.ClientSession() as session:
|
|
513
|
+
for query in queries:
|
|
514
|
+
query = query.strip()
|
|
515
|
+
if not query:
|
|
516
|
+
continue
|
|
517
|
+
logger.debug(f"Searching with query: {query}")
|
|
518
|
+
|
|
519
|
+
try:
|
|
520
|
+
results = await search_google_with_tools(query, 5, tool_config=tool_config)
|
|
521
|
+
except Exception:
|
|
522
|
+
logger.exception("Error searching for LinkedIn user URL by email.")
|
|
523
|
+
continue
|
|
524
|
+
|
|
525
|
+
if not isinstance(results, list) or len(results) == 0:
|
|
526
|
+
logger.debug("No results for this query, moving to next.")
|
|
527
|
+
continue
|
|
528
|
+
|
|
529
|
+
for result_item in results:
|
|
530
|
+
try:
|
|
531
|
+
result_json = json.loads(result_item)
|
|
532
|
+
except (json.JSONDecodeError, IndexError):
|
|
533
|
+
logger.debug("Failed to parse JSON from the search result.")
|
|
534
|
+
continue
|
|
535
|
+
|
|
536
|
+
link = result_json.get('link', '')
|
|
537
|
+
if not link:
|
|
538
|
+
continue
|
|
539
|
+
|
|
540
|
+
parsed_url = urlparse(link)
|
|
541
|
+
if 'linkedin.com/in' in (parsed_url.netloc + parsed_url.path):
|
|
542
|
+
link = extract_user_linkedin_page(link)
|
|
543
|
+
if not link or link in seen_links:
|
|
544
|
+
continue
|
|
545
|
+
|
|
546
|
+
title = result_json.get('title', '')
|
|
547
|
+
snippet = result_json.get('snippet', '')
|
|
548
|
+
subtitle = result_json.get('subtitle', '')
|
|
549
|
+
|
|
550
|
+
candidate_records.append(
|
|
551
|
+
{
|
|
552
|
+
"link": link,
|
|
553
|
+
"title": title,
|
|
554
|
+
"snippet": snippet,
|
|
555
|
+
"subtitle": subtitle,
|
|
556
|
+
"query": query,
|
|
557
|
+
}
|
|
558
|
+
)
|
|
559
|
+
if len(candidate_records) > 6:
|
|
560
|
+
candidate_records.pop(0)
|
|
561
|
+
seen_links.add(link)
|
|
562
|
+
|
|
563
|
+
high_conf_choice = await evaluate_with_llm()
|
|
564
|
+
if high_conf_choice:
|
|
565
|
+
return {
|
|
566
|
+
"linkedin_url": high_conf_choice.chosen_link,
|
|
567
|
+
"confidence": high_conf_choice.confidence,
|
|
568
|
+
"reasoning": high_conf_choice.reasoning,
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
if best_llm_choice and best_llm_link and best_llm_choice.confidence >= MIN_CONFIDENCE_THRESHOLD:
|
|
572
|
+
logger.info(
|
|
573
|
+
"Returning LinkedIn user page by email via LLM scoring (best overall): %s (confidence %.2f)",
|
|
574
|
+
best_llm_link,
|
|
575
|
+
best_llm_choice.confidence,
|
|
576
|
+
)
|
|
577
|
+
return {
|
|
578
|
+
"linkedin_url": best_llm_link,
|
|
579
|
+
"confidence": best_llm_choice.confidence,
|
|
580
|
+
"reasoning": best_llm_choice.reasoning,
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
logger.info("No matching LinkedIn user page found using email queries.")
|
|
584
|
+
return None
|
|
585
|
+
|
|
586
|
+
|
|
635
587
|
@assistant_tool
|
|
636
588
|
async def find_user_linkedin_url_by_job_title_google(
|
|
637
589
|
user_title: str,
|
|
@@ -641,14 +593,6 @@ async def find_user_linkedin_url_by_job_title_google(
|
|
|
641
593
|
) -> str:
|
|
642
594
|
"""
|
|
643
595
|
Find the LinkedIn URL for a user based on their job_title, location, and company.
|
|
644
|
-
|
|
645
|
-
Args:
|
|
646
|
-
user_title (str): The title of the user.
|
|
647
|
-
user_location (str): The location of the user.
|
|
648
|
-
user_company (str): The company of the user.
|
|
649
|
-
|
|
650
|
-
Returns:
|
|
651
|
-
str: The LinkedIn URL if found, otherwise an empty string.
|
|
652
596
|
"""
|
|
653
597
|
logger.info("Entering find_user_linkedin_url_by_job_title_google")
|
|
654
598
|
|
|
@@ -656,15 +600,15 @@ async def find_user_linkedin_url_by_job_title_google(
|
|
|
656
600
|
f'site:linkedin.com/in "{user_company}" AND "{user_title}" -intitle:"profiles" ',
|
|
657
601
|
]
|
|
658
602
|
|
|
659
|
-
async with aiohttp.ClientSession() as session:
|
|
603
|
+
async with aiohttp.ClientSession() as session:
|
|
660
604
|
for query in queries:
|
|
661
605
|
if not query.strip():
|
|
662
606
|
continue
|
|
663
607
|
logger.debug(f"Searching with query: {query}")
|
|
664
608
|
|
|
665
609
|
try:
|
|
666
|
-
results = await
|
|
667
|
-
except Exception
|
|
610
|
+
results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
|
|
611
|
+
except Exception:
|
|
668
612
|
logger.exception("Error searching for LinkedIn URL by job title.")
|
|
669
613
|
continue
|
|
670
614
|
|
|
@@ -701,14 +645,6 @@ async def find_user_linkedin_url_by_google_search(
|
|
|
701
645
|
) -> List[str]:
|
|
702
646
|
"""
|
|
703
647
|
Find LinkedIn user URLs based on provided Google search queries.
|
|
704
|
-
|
|
705
|
-
Args:
|
|
706
|
-
queries (List[str]): A list of Google search queries.
|
|
707
|
-
number_of_results (int): Number of results to return from each query (default is 5).
|
|
708
|
-
tool_config (Optional[List[Dict]]): Optional configuration for the SERP API.
|
|
709
|
-
|
|
710
|
-
Returns:
|
|
711
|
-
List[str]: A list of matching LinkedIn user URLs found, or an empty list if none.
|
|
712
648
|
"""
|
|
713
649
|
logger.info("Entering find_user_linkedin_url_by_google_search")
|
|
714
650
|
found_urls = []
|
|
@@ -719,8 +655,8 @@ async def find_user_linkedin_url_by_google_search(
|
|
|
719
655
|
logger.debug(f"Searching with query: {query}")
|
|
720
656
|
|
|
721
657
|
try:
|
|
722
|
-
results = await
|
|
723
|
-
except Exception
|
|
658
|
+
results = await search_google_with_tools(query.strip(), number_of_results, tool_config=tool_config)
|
|
659
|
+
except Exception:
|
|
724
660
|
logger.exception("Error searching for LinkedIn URL using Google search.")
|
|
725
661
|
continue
|
|
726
662
|
|
|
@@ -780,14 +716,6 @@ async def find_organization_linkedin_url_with_google_search(
|
|
|
780
716
|
) -> str:
|
|
781
717
|
"""
|
|
782
718
|
Find the LinkedIn URL for a company based on its name and optional location using Google search.
|
|
783
|
-
|
|
784
|
-
Args:
|
|
785
|
-
company_name (str): The name of the company.
|
|
786
|
-
company_location (str, optional): The location of the company.
|
|
787
|
-
use_strict_check (bool): Whether to use stricter or multiple queries.
|
|
788
|
-
|
|
789
|
-
Returns:
|
|
790
|
-
str: The LinkedIn URL if found, otherwise an empty string.
|
|
791
719
|
"""
|
|
792
720
|
logger.info("Entering find_organization_linkedin_url_with_google_search")
|
|
793
721
|
|
|
@@ -796,7 +724,7 @@ async def find_organization_linkedin_url_with_google_search(
|
|
|
796
724
|
return ""
|
|
797
725
|
|
|
798
726
|
if use_strict_check:
|
|
799
|
-
queries = [f'site:linkedin.com/company "{company_name}" {company_domain}
|
|
727
|
+
queries = [f'site:linkedin.com/company "{company_name}" {company_domain} ']
|
|
800
728
|
else:
|
|
801
729
|
if company_location:
|
|
802
730
|
queries = [
|
|
@@ -817,8 +745,8 @@ async def find_organization_linkedin_url_with_google_search(
|
|
|
817
745
|
|
|
818
746
|
logger.debug(f"Searching with query: {query}")
|
|
819
747
|
try:
|
|
820
|
-
results = await
|
|
821
|
-
except Exception
|
|
748
|
+
results = await search_google_with_tools(query.strip(), 1, tool_config=tool_config)
|
|
749
|
+
except Exception:
|
|
822
750
|
logger.exception("Error searching for organization LinkedIn URL.")
|
|
823
751
|
continue
|
|
824
752
|
|
|
@@ -871,7 +799,7 @@ async def get_external_links(url: str) -> List[str]:
|
|
|
871
799
|
else:
|
|
872
800
|
logger.warning(f"Non-200 status ({response.status}) while fetching external links.")
|
|
873
801
|
return []
|
|
874
|
-
except Exception
|
|
802
|
+
except Exception:
|
|
875
803
|
logger.exception("Exception occurred while fetching external links.")
|
|
876
804
|
return []
|
|
877
805
|
|
|
@@ -883,7 +811,7 @@ async def get_resolved_linkedin_links(url: str) -> List[str]:
|
|
|
883
811
|
logger.debug(f"Entering get_resolved_linkedin_links for URL: {url}")
|
|
884
812
|
try:
|
|
885
813
|
content = await fetch_html_content(url)
|
|
886
|
-
except Exception
|
|
814
|
+
except Exception:
|
|
887
815
|
logger.exception("Exception occurred while fetching HTML content.")
|
|
888
816
|
return []
|
|
889
817
|
|
|
@@ -907,7 +835,7 @@ async def get_company_website_from_linkedin_url(linkedin_url: str) -> str:
|
|
|
907
835
|
|
|
908
836
|
try:
|
|
909
837
|
links = await get_external_links(linkedin_url)
|
|
910
|
-
except Exception
|
|
838
|
+
except Exception:
|
|
911
839
|
logger.exception("Exception occurred while getting external links for LinkedIn URL.")
|
|
912
840
|
return ""
|
|
913
841
|
|