dhisana 0.0.1.dev85__py3-none-any.whl → 0.0.1.dev236__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dhisana/schemas/common.py +33 -0
- dhisana/schemas/sales.py +224 -23
- dhisana/utils/add_mapping.py +72 -63
- dhisana/utils/apollo_tools.py +739 -109
- dhisana/utils/built_with_api_tools.py +4 -2
- dhisana/utils/cache_output_tools.py +23 -23
- dhisana/utils/check_email_validity_tools.py +456 -458
- dhisana/utils/check_for_intent_signal.py +1 -2
- dhisana/utils/check_linkedin_url_validity.py +34 -8
- dhisana/utils/clay_tools.py +3 -2
- dhisana/utils/clean_properties.py +3 -1
- dhisana/utils/compose_salesnav_query.py +0 -1
- dhisana/utils/compose_search_query.py +7 -3
- dhisana/utils/composite_tools.py +0 -1
- dhisana/utils/dataframe_tools.py +2 -2
- dhisana/utils/email_body_utils.py +72 -0
- dhisana/utils/email_provider.py +375 -0
- dhisana/utils/enrich_lead_information.py +585 -85
- dhisana/utils/fetch_openai_config.py +129 -0
- dhisana/utils/field_validators.py +1 -1
- dhisana/utils/g2_tools.py +0 -1
- dhisana/utils/generate_content.py +0 -1
- dhisana/utils/generate_email.py +69 -16
- dhisana/utils/generate_email_response.py +298 -41
- dhisana/utils/generate_flow.py +0 -1
- dhisana/utils/generate_linkedin_connect_message.py +19 -6
- dhisana/utils/generate_linkedin_response_message.py +156 -65
- dhisana/utils/generate_structured_output_internal.py +351 -131
- dhisana/utils/google_custom_search.py +150 -44
- dhisana/utils/google_oauth_tools.py +721 -0
- dhisana/utils/google_workspace_tools.py +391 -25
- dhisana/utils/hubspot_clearbit.py +3 -1
- dhisana/utils/hubspot_crm_tools.py +771 -167
- dhisana/utils/instantly_tools.py +3 -1
- dhisana/utils/lusha_tools.py +10 -7
- dhisana/utils/mailgun_tools.py +150 -0
- dhisana/utils/microsoft365_tools.py +447 -0
- dhisana/utils/openai_assistant_and_file_utils.py +121 -177
- dhisana/utils/openai_helpers.py +19 -16
- dhisana/utils/parse_linkedin_messages_txt.py +2 -3
- dhisana/utils/profile.py +37 -0
- dhisana/utils/proxy_curl_tools.py +507 -206
- dhisana/utils/proxycurl_search_leads.py +426 -0
- dhisana/utils/research_lead.py +121 -68
- dhisana/utils/sales_navigator_crawler.py +1 -6
- dhisana/utils/salesforce_crm_tools.py +323 -50
- dhisana/utils/search_router.py +131 -0
- dhisana/utils/search_router_jobs.py +51 -0
- dhisana/utils/sendgrid_tools.py +126 -91
- dhisana/utils/serarch_router_local_business.py +75 -0
- dhisana/utils/serpapi_additional_tools.py +290 -0
- dhisana/utils/serpapi_google_jobs.py +117 -0
- dhisana/utils/serpapi_google_search.py +188 -0
- dhisana/utils/serpapi_local_business_search.py +129 -0
- dhisana/utils/serpapi_search_tools.py +363 -432
- dhisana/utils/serperdev_google_jobs.py +125 -0
- dhisana/utils/serperdev_local_business.py +154 -0
- dhisana/utils/serperdev_search.py +233 -0
- dhisana/utils/smtp_email_tools.py +576 -0
- dhisana/utils/test_connect.py +1765 -92
- dhisana/utils/trasform_json.py +95 -16
- dhisana/utils/web_download_parse_tools.py +0 -1
- dhisana/utils/zoominfo_tools.py +2 -3
- dhisana/workflow/test.py +1 -1
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/METADATA +5 -2
- dhisana-0.0.1.dev236.dist-info/RECORD +100 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/WHEEL +1 -1
- dhisana-0.0.1.dev85.dist-info/RECORD +0 -81
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/entry_points.txt +0 -0
- {dhisana-0.0.1.dev85.dist-info → dhisana-0.0.1.dev236.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
import aiohttp
|
|
6
|
+
|
|
7
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
8
|
+
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
9
|
+
from dhisana.utils.serpapi_google_search import get_serp_api_access_token
|
|
10
|
+
|
|
11
|
+
logging.basicConfig(level=logging.INFO)
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _normalise_job_result(raw: Dict[str, Any]) -> Dict[str, Any]:
|
|
16
|
+
"""Convert a SerpApi jobs result to a simplified schema."""
|
|
17
|
+
# ...existing code...
|
|
18
|
+
apply_link = ""
|
|
19
|
+
apply_options = raw.get("apply_options") or raw.get("apply_links") or []
|
|
20
|
+
if isinstance(apply_options, list) and apply_options:
|
|
21
|
+
first = apply_options[0]
|
|
22
|
+
if isinstance(first, dict):
|
|
23
|
+
apply_link = first.get("link") or first.get("apply_link") or ""
|
|
24
|
+
if isinstance(apply_options, dict):
|
|
25
|
+
apply_link = apply_options.get("link") or apply_options.get("apply_link") or ""
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
"job_title": raw.get("title", ""),
|
|
29
|
+
"organization_name": raw.get("company_name") or raw.get("company", ""),
|
|
30
|
+
"location": raw.get("location", ""),
|
|
31
|
+
"via": raw.get("via", ""),
|
|
32
|
+
"description": raw.get("description", ""),
|
|
33
|
+
"job_posting_url": raw.get("job_highlight_url")
|
|
34
|
+
or raw.get("apply_link")
|
|
35
|
+
or apply_link
|
|
36
|
+
or raw.get("link", ""),
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@assistant_tool
|
|
41
|
+
async def search_google_jobs_serpapi(
|
|
42
|
+
query: str,
|
|
43
|
+
number_of_results: int = 10,
|
|
44
|
+
offset: int = 0,
|
|
45
|
+
tool_config: Optional[List[Dict]] = None,
|
|
46
|
+
location: Optional[str] = None,
|
|
47
|
+
) -> List[str]:
|
|
48
|
+
"""Search Google Jobs via SerpApi and return normalised JSON strings."""
|
|
49
|
+
if not query:
|
|
50
|
+
logger.warning("Empty query provided to search_google_jobs_serpapi")
|
|
51
|
+
return []
|
|
52
|
+
|
|
53
|
+
cache_key = f"jobs_serpapi_{query}_{number_of_results}_{offset}_{location or ''}"
|
|
54
|
+
cached = retrieve_output("search_google_jobs_serpapi", cache_key)
|
|
55
|
+
if cached is not None:
|
|
56
|
+
return cached
|
|
57
|
+
|
|
58
|
+
api_key = get_serp_api_access_token(tool_config)
|
|
59
|
+
page_size = 10
|
|
60
|
+
collected: List[Dict[str, Any]] = []
|
|
61
|
+
next_page_token = None
|
|
62
|
+
|
|
63
|
+
async with aiohttp.ClientSession() as session:
|
|
64
|
+
while len(collected) < number_of_results:
|
|
65
|
+
to_fetch = min(page_size, number_of_results - len(collected))
|
|
66
|
+
params = {
|
|
67
|
+
"engine": "google_jobs",
|
|
68
|
+
"q": query,
|
|
69
|
+
"api_key": api_key,
|
|
70
|
+
"num": to_fetch,
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if offset > 0 and next_page_token is None:
|
|
74
|
+
# If offset is provided but we don't have a token yet,
|
|
75
|
+
# we can't properly paginate. Log a warning.
|
|
76
|
+
logger.warning("Offset provided but next_page_token not available. Results may be incomplete.")
|
|
77
|
+
|
|
78
|
+
if next_page_token:
|
|
79
|
+
params["next_page_token"] = next_page_token
|
|
80
|
+
|
|
81
|
+
if location:
|
|
82
|
+
params["location"] = location
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
async with session.get("https://serpapi.com/search", params=params) as resp:
|
|
86
|
+
if resp.status != 200:
|
|
87
|
+
try:
|
|
88
|
+
err = await resp.json()
|
|
89
|
+
except Exception:
|
|
90
|
+
err = await resp.text()
|
|
91
|
+
logger.warning("SerpApi jobs error: %s", err)
|
|
92
|
+
return [json.dumps({"error": err})]
|
|
93
|
+
payload = await resp.json()
|
|
94
|
+
except Exception as exc:
|
|
95
|
+
logger.exception("SerpApi jobs request failed")
|
|
96
|
+
return [json.dumps({"error": str(exc)})]
|
|
97
|
+
|
|
98
|
+
jobs = payload.get("jobs_results", [])
|
|
99
|
+
if not jobs:
|
|
100
|
+
break
|
|
101
|
+
|
|
102
|
+
collected.extend(jobs)
|
|
103
|
+
|
|
104
|
+
# Get the next_page_token for pagination
|
|
105
|
+
next_page_token = payload.get("serpapi_pagination", {}).get("next_page_token")
|
|
106
|
+
if not next_page_token:
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
# If we've collected enough results, stop pagination
|
|
110
|
+
if len(collected) >= number_of_results:
|
|
111
|
+
break
|
|
112
|
+
|
|
113
|
+
normalised = [_normalise_job_result(j) for j in collected[:number_of_results]]
|
|
114
|
+
serialised = [json.dumps(item) for item in normalised]
|
|
115
|
+
cache_output("search_google_jobs_serpapi", cache_key, serialised)
|
|
116
|
+
logger.info("Returned %d job results for '%s'", len(serialised), query)
|
|
117
|
+
return serialised
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
import aiohttp
|
|
5
|
+
|
|
6
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
7
|
+
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
logging.basicConfig(level=logging.INFO)
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_serp_api_access_token(tool_config: Optional[List[Dict]] = None) -> str:
|
|
15
|
+
"""
|
|
16
|
+
Retrieves the SERPAPI_KEY access token from the provided tool configuration.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
tool_config (list): A list of dictionaries containing the tool configuration.
|
|
20
|
+
Each dictionary should have a "name" key and a "configuration" key,
|
|
21
|
+
where "configuration" is a list of dictionaries containing "name" and "value" keys.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
str: The SERPAPI_KEY access token.
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
ValueError: If the SerpAPI integration has not been configured.
|
|
28
|
+
"""
|
|
29
|
+
logger.info("Entering get_serp_api_access_token")
|
|
30
|
+
SERPAPI_KEY = None
|
|
31
|
+
|
|
32
|
+
if tool_config:
|
|
33
|
+
logger.debug(f"Tool config provided: {tool_config}")
|
|
34
|
+
serpapi_config = next(
|
|
35
|
+
(item for item in tool_config if item.get("name") == "serpapi"), None
|
|
36
|
+
)
|
|
37
|
+
if serpapi_config:
|
|
38
|
+
config_map = {
|
|
39
|
+
item["name"]: item["value"]
|
|
40
|
+
for item in serpapi_config.get("configuration", [])
|
|
41
|
+
if item
|
|
42
|
+
}
|
|
43
|
+
SERPAPI_KEY = config_map.get("apiKey")
|
|
44
|
+
else:
|
|
45
|
+
logger.warning("No 'serpapi' config item found in tool_config.")
|
|
46
|
+
else:
|
|
47
|
+
logger.debug("No tool_config provided or it's None.")
|
|
48
|
+
|
|
49
|
+
SERPAPI_KEY = SERPAPI_KEY or os.getenv("SERPAPI_KEY")
|
|
50
|
+
if not SERPAPI_KEY:
|
|
51
|
+
logger.error("SerpAPI integration is not configured.")
|
|
52
|
+
raise ValueError(
|
|
53
|
+
"SerpAPI integration is not configured. Please configure the connection to SerpAPI in Integrations."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
logger.info("Retrieved SERPAPI_KEY successfully.")
|
|
57
|
+
return SERPAPI_KEY
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@assistant_tool
|
|
61
|
+
async def search_google_serpai(
|
|
62
|
+
query: str,
|
|
63
|
+
number_of_results: int = 10,
|
|
64
|
+
offset: int = 0,
|
|
65
|
+
tool_config: Optional[List[Dict]] = None,
|
|
66
|
+
as_oq: Optional[str] = None, # optional terms
|
|
67
|
+
) -> List[str]:
|
|
68
|
+
"""
|
|
69
|
+
Google search via SerpAPI that returns a *uniform* list of JSON strings.
|
|
70
|
+
Each item is guaranteed to contain a 'link' key, even when the result
|
|
71
|
+
originally came from image/news blocks.
|
|
72
|
+
|
|
73
|
+
Blocks handled:
|
|
74
|
+
• organic_results – keeps SerpAPI structure
|
|
75
|
+
• inline_images – maps source -> link
|
|
76
|
+
• news_results – already has link
|
|
77
|
+
"""
|
|
78
|
+
logger.info("Entering search_google_serpai")
|
|
79
|
+
if not query:
|
|
80
|
+
logger.warning("Empty query string provided.")
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
cache_key = f"{query}_{number_of_results}_{offset}_{as_oq or ''}"
|
|
84
|
+
if cached := retrieve_output("search_google_serp", cache_key):
|
|
85
|
+
logger.info("Cache hit for search_google_serp.")
|
|
86
|
+
return cached
|
|
87
|
+
|
|
88
|
+
SERPAPI_KEY = get_serp_api_access_token(tool_config)
|
|
89
|
+
base_url = "https://serpapi.com/search"
|
|
90
|
+
|
|
91
|
+
page_size = number_of_results
|
|
92
|
+
start_index = 0 if offset == 0 else offset + 1 # SerpAPI Pagination Mechanism: Uses the start parameter to specify the first result (zero-indexed)
|
|
93
|
+
all_items: list[dict] = []
|
|
94
|
+
seen_links: set[str] = set() # dedupe across blocks/pages
|
|
95
|
+
|
|
96
|
+
# ------------------------------------------------------------------ #
|
|
97
|
+
# helpers #
|
|
98
|
+
# ------------------------------------------------------------------ #
|
|
99
|
+
def _extract_block_results(block: str, data: list[dict]) -> list[dict]:
|
|
100
|
+
"""Return items from a given block in unified format (must include link)."""
|
|
101
|
+
mapped: list[dict] = []
|
|
102
|
+
|
|
103
|
+
if block == "organic_results":
|
|
104
|
+
for it in data:
|
|
105
|
+
link = it.get("link")
|
|
106
|
+
if link:
|
|
107
|
+
mapped.append(it) # keep original shape
|
|
108
|
+
elif block == "inline_images":
|
|
109
|
+
for it in data:
|
|
110
|
+
link = it.get("source") # image-pack URL
|
|
111
|
+
if link:
|
|
112
|
+
mapped.append({
|
|
113
|
+
"title": it.get("title"),
|
|
114
|
+
"link": link,
|
|
115
|
+
"type": "inline_image",
|
|
116
|
+
"source_name": it.get("source_name"),
|
|
117
|
+
"thumbnail": it.get("thumbnail"),
|
|
118
|
+
})
|
|
119
|
+
elif block == "news_results":
|
|
120
|
+
for it in data:
|
|
121
|
+
link = it.get("link")
|
|
122
|
+
if link:
|
|
123
|
+
mapped.append(it) # already fine
|
|
124
|
+
return mapped
|
|
125
|
+
# ------------------------------------------------------------------ #
|
|
126
|
+
|
|
127
|
+
async with aiohttp.ClientSession() as session:
|
|
128
|
+
while len(all_items) < number_of_results:
|
|
129
|
+
to_fetch = min(page_size, number_of_results - len(all_items))
|
|
130
|
+
params = {
|
|
131
|
+
"engine": "google",
|
|
132
|
+
"api_key": SERPAPI_KEY,
|
|
133
|
+
"q": query,
|
|
134
|
+
"num": to_fetch,
|
|
135
|
+
"start": start_index,
|
|
136
|
+
"location": "United States",
|
|
137
|
+
}
|
|
138
|
+
if as_oq:
|
|
139
|
+
params["as_oq"] = as_oq
|
|
140
|
+
|
|
141
|
+
logger.debug(f"SERP API GET → {params}")
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
async with session.get(base_url, params=params) as resp:
|
|
145
|
+
if resp.status != 200:
|
|
146
|
+
try:
|
|
147
|
+
err = await resp.json()
|
|
148
|
+
except Exception:
|
|
149
|
+
err = await resp.text()
|
|
150
|
+
logger.warning(f"SerpAPI {resp.status=}: {err}")
|
|
151
|
+
return [json.dumps({"error": err})]
|
|
152
|
+
result = await resp.json()
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.exception("SerpAPI request failed")
|
|
155
|
+
return [json.dumps({"error": str(e)})]
|
|
156
|
+
|
|
157
|
+
# ------------------ harvest every supported block ------------------
|
|
158
|
+
page_items: list[dict] = []
|
|
159
|
+
for block_name in ("organic_results", "inline_images", "news_results"):
|
|
160
|
+
data = result.get(block_name) or []
|
|
161
|
+
page_items.extend(_extract_block_results(block_name, data))
|
|
162
|
+
|
|
163
|
+
# dedupe & accumulate
|
|
164
|
+
new_added = 0
|
|
165
|
+
for it in page_items:
|
|
166
|
+
link = it["link"]
|
|
167
|
+
if link not in seen_links:
|
|
168
|
+
seen_links.add(link)
|
|
169
|
+
all_items.append(it)
|
|
170
|
+
new_added += 1
|
|
171
|
+
if len(all_items) >= number_of_results:
|
|
172
|
+
break
|
|
173
|
+
logger.debug(f"Added {new_added} items (total={len(all_items)})")
|
|
174
|
+
|
|
175
|
+
# stop if Google gave us nothing new
|
|
176
|
+
if new_added == 0:
|
|
177
|
+
logger.debug("No more items returned; stopping.")
|
|
178
|
+
break
|
|
179
|
+
|
|
180
|
+
start_index += to_fetch # next Google results page
|
|
181
|
+
|
|
182
|
+
# truncate and serialise
|
|
183
|
+
all_items = all_items[:number_of_results]
|
|
184
|
+
serialised = [json.dumps(it) for it in all_items]
|
|
185
|
+
cache_output("search_google_serp", cache_key, serialised)
|
|
186
|
+
|
|
187
|
+
logger.info(f"Returning {len(serialised)} items for '{query}'")
|
|
188
|
+
return serialised
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
import aiohttp
|
|
7
|
+
|
|
8
|
+
from dhisana.utils.assistant_tool_tag import assistant_tool
|
|
9
|
+
from dhisana.utils.cache_output_tools import cache_output, retrieve_output
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
logging.basicConfig(level=logging.INFO)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
16
|
+
# Re-use the get_serp_api_access_token helper you already have.
|
|
17
|
+
# ──────────────────────────────────────────────────────────────────────────
|
|
18
|
+
def _normalise_local_result(raw: Dict[str, Any]) -> Dict[str, Any]:
|
|
19
|
+
"""
|
|
20
|
+
Convert a single SerpApi `local_results` item to the standard format.
|
|
21
|
+
– Falls back to ''/None when fields are absent.
|
|
22
|
+
– Derives `google_maps_url` from the `links.directions` entry when present,
|
|
23
|
+
otherwise constructs a CID-based URL from data_cid / place_id.
|
|
24
|
+
"""
|
|
25
|
+
# ── unpack links ──────────────────────────────────────────────────────────
|
|
26
|
+
links = raw.get("links") or {}
|
|
27
|
+
if isinstance(links, list): # older payloads: list of dicts
|
|
28
|
+
links = {x.get("type") or x.get("name"): x.get("link")
|
|
29
|
+
for x in links if isinstance(x, dict)}
|
|
30
|
+
|
|
31
|
+
# ── compute Google Maps URL ───────────────────────────────────────────────
|
|
32
|
+
cid = raw.get("data_cid") or raw.get("place_id")
|
|
33
|
+
google_maps_url = links.get("directions") or (f"https://maps.google.com/?cid={cid}" if cid else "")
|
|
34
|
+
|
|
35
|
+
# ── return unified schema ─────────────────────────────────────────────────
|
|
36
|
+
return {
|
|
37
|
+
"full_name": raw.get("title", ""),
|
|
38
|
+
"organization_name": raw.get("title", ""),
|
|
39
|
+
"phone": raw.get("phone") or raw.get("phone_number") or "",
|
|
40
|
+
"organization_website": raw.get("website") or links.get("website") or "",
|
|
41
|
+
"rating": raw.get("rating"),
|
|
42
|
+
"reviews": raw.get("reviews"),
|
|
43
|
+
"address": raw.get("address", ""),
|
|
44
|
+
"google_maps_url": google_maps_url,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@assistant_tool
|
|
49
|
+
async def search_local_business_serpai(
|
|
50
|
+
query: str,
|
|
51
|
+
number_of_results: int = 20,
|
|
52
|
+
offset: int = 0,
|
|
53
|
+
tool_config: Optional[List[Dict]] = None,
|
|
54
|
+
location: Optional[str] = None,
|
|
55
|
+
) -> List[str]:
|
|
56
|
+
"""
|
|
57
|
+
Fetch Google Local results with SerpApi and return a list of businesses
|
|
58
|
+
normalised to Dhisana's local-business schema (serialized as JSON strings).
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
query: Search term (e.g. "coffee shops near me").
|
|
62
|
+
number_of_results: Total items desired.
|
|
63
|
+
offset: Result offset (multiples of 20 on desktop).
|
|
64
|
+
tool_config: Optional Dhisana tool-config blob holding the API key.
|
|
65
|
+
location: Optional human location string (e.g. "San Jose, CA").
|
|
66
|
+
"""
|
|
67
|
+
if not query:
|
|
68
|
+
logger.warning("Empty query string provided.")
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
# ── cache key
|
|
72
|
+
cache_key = f"local_{query}_{number_of_results}_{offset}_{location or ''}"
|
|
73
|
+
cached = retrieve_output("search_local_serp", cache_key)
|
|
74
|
+
if cached is not None:
|
|
75
|
+
return cached
|
|
76
|
+
|
|
77
|
+
# ── api key
|
|
78
|
+
from your_module import get_serp_api_access_token # adjust import if needed
|
|
79
|
+
SERPAPI_KEY = get_serp_api_access_token(tool_config)
|
|
80
|
+
|
|
81
|
+
page_size = 20 # Google Local desktop page size
|
|
82
|
+
start_index = offset
|
|
83
|
+
collected: List[Dict[str, Any]] = []
|
|
84
|
+
|
|
85
|
+
async with aiohttp.ClientSession() as session:
|
|
86
|
+
while len(collected) < number_of_results:
|
|
87
|
+
to_fetch = min(page_size, number_of_results - len(collected))
|
|
88
|
+
|
|
89
|
+
params = {
|
|
90
|
+
"engine": "google_local",
|
|
91
|
+
"type": "search",
|
|
92
|
+
"q": query,
|
|
93
|
+
"api_key": SERPAPI_KEY,
|
|
94
|
+
"start": start_index,
|
|
95
|
+
"num": to_fetch,
|
|
96
|
+
}
|
|
97
|
+
if location:
|
|
98
|
+
params["location"] = location
|
|
99
|
+
|
|
100
|
+
logger.debug("SerpApi local request params: %s", params)
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
async with session.get("https://serpapi.com/search", params=params) as resp:
|
|
104
|
+
if resp.status != 200:
|
|
105
|
+
try:
|
|
106
|
+
err = await resp.json()
|
|
107
|
+
except Exception:
|
|
108
|
+
err = await resp.text()
|
|
109
|
+
logger.warning("SerpApi error: %s", err)
|
|
110
|
+
return [json.dumps({"error": err})]
|
|
111
|
+
payload = await resp.json()
|
|
112
|
+
except Exception as exc:
|
|
113
|
+
logger.exception("Request failed.")
|
|
114
|
+
return [json.dumps({"error": str(exc)})]
|
|
115
|
+
|
|
116
|
+
local_results = payload.get("local_results", [])
|
|
117
|
+
if not local_results:
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
collected.extend(local_results)
|
|
121
|
+
start_index += to_fetch
|
|
122
|
+
|
|
123
|
+
# truncate & normalise
|
|
124
|
+
normalised = [_normalise_local_result(r) for r in collected[:number_of_results]]
|
|
125
|
+
serialised = [json.dumps(item) for item in normalised]
|
|
126
|
+
|
|
127
|
+
cache_output("search_local_serp", cache_key, serialised)
|
|
128
|
+
logger.info("Returned %d local businesses for '%s'", len(serialised), query)
|
|
129
|
+
return serialised
|