pytrends-modern 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytrends_modern/__init__.py +27 -0
- pytrends_modern/cli.py +352 -0
- pytrends_modern/config.py +196 -0
- pytrends_modern/exceptions.py +68 -0
- pytrends_modern/py.typed +0 -0
- pytrends_modern/request.py +849 -0
- pytrends_modern/rss.py +337 -0
- pytrends_modern/utils.py +267 -0
- pytrends_modern-0.1.0.dist-info/METADATA +394 -0
- pytrends_modern-0.1.0.dist-info/RECORD +14 -0
- pytrends_modern-0.1.0.dist-info/WHEEL +5 -0
- pytrends_modern-0.1.0.dist-info/entry_points.txt +2 -0
- pytrends_modern-0.1.0.dist-info/licenses/LICENSE +37 -0
- pytrends_modern-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,849 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main Google Trends API request module
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import random
|
|
7
|
+
import time
|
|
8
|
+
from itertools import product
|
|
9
|
+
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
|
|
10
|
+
from urllib.parse import quote
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import requests
|
|
14
|
+
from requests.adapters import HTTPAdapter
|
|
15
|
+
from requests.packages.urllib3.util.retry import Retry
|
|
16
|
+
|
|
17
|
+
from pytrends_modern import exceptions
|
|
18
|
+
from pytrends_modern.config import (
|
|
19
|
+
BASE_TRENDS_URL,
|
|
20
|
+
CATEGORIES_URL,
|
|
21
|
+
DEFAULT_BACKOFF_FACTOR,
|
|
22
|
+
DEFAULT_GEO,
|
|
23
|
+
DEFAULT_HL,
|
|
24
|
+
DEFAULT_RETRIES,
|
|
25
|
+
DEFAULT_TIMEOUT,
|
|
26
|
+
DEFAULT_TZ,
|
|
27
|
+
ERROR_CODES,
|
|
28
|
+
GENERAL_URL,
|
|
29
|
+
INTEREST_BY_REGION_URL,
|
|
30
|
+
INTEREST_OVER_TIME_URL,
|
|
31
|
+
MULTIRANGE_INTEREST_OVER_TIME_URL,
|
|
32
|
+
REALTIME_TRENDING_SEARCHES_URL,
|
|
33
|
+
RELATED_QUERIES_URL,
|
|
34
|
+
SUGGESTIONS_URL,
|
|
35
|
+
TODAY_SEARCHES_URL,
|
|
36
|
+
TOP_CHARTS_URL,
|
|
37
|
+
TRENDING_SEARCHES_URL,
|
|
38
|
+
USER_AGENTS,
|
|
39
|
+
VALID_GPROP,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TrendReq:
|
|
44
|
+
"""
|
|
45
|
+
Google Trends API - Enhanced version
|
|
46
|
+
|
|
47
|
+
Features:
|
|
48
|
+
- Full pytrends compatibility
|
|
49
|
+
- Enhanced error handling with automatic retries
|
|
50
|
+
- Proxy rotation support
|
|
51
|
+
- User agent rotation
|
|
52
|
+
- Better rate limit handling
|
|
53
|
+
- Type hints throughout
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
>>> pytrends = TrendReq(hl='en-US', tz=360)
|
|
57
|
+
>>> pytrends.build_payload(['Python', 'JavaScript'], timeframe='today 12-m')
|
|
58
|
+
>>> df = pytrends.interest_over_time()
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
GET_METHOD = "get"
|
|
62
|
+
POST_METHOD = "post"
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
hl: str = DEFAULT_HL,
|
|
67
|
+
tz: int = DEFAULT_TZ,
|
|
68
|
+
geo: str = DEFAULT_GEO,
|
|
69
|
+
timeout: Tuple[int, int] = DEFAULT_TIMEOUT,
|
|
70
|
+
proxies: Optional[Union[List[str], Dict[str, str]]] = None,
|
|
71
|
+
retries: int = DEFAULT_RETRIES,
|
|
72
|
+
backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
|
|
73
|
+
requests_args: Optional[Dict[str, Any]] = None,
|
|
74
|
+
rotate_user_agent: bool = True,
|
|
75
|
+
):
|
|
76
|
+
"""
|
|
77
|
+
Initialize Google Trends API client
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
hl: Language code (e.g., 'en-US', 'es-ES')
|
|
81
|
+
tz: Timezone offset in minutes (e.g., 360 for US CST)
|
|
82
|
+
geo: Geographic location (e.g., 'US', 'GB', 'US-CA')
|
|
83
|
+
timeout: (connect_timeout, read_timeout) in seconds
|
|
84
|
+
proxies: List of proxy URLs or dict of proxies {'http': '...', 'https': '...'}
|
|
85
|
+
retries: Number of retry attempts
|
|
86
|
+
backoff_factor: Backoff factor for exponential backoff
|
|
87
|
+
requests_args: Additional arguments to pass to requests
|
|
88
|
+
rotate_user_agent: Whether to rotate user agents
|
|
89
|
+
"""
|
|
90
|
+
# Rate limit message from Google
|
|
91
|
+
self.google_rl = "You have reached your quota limit. Please try again later."
|
|
92
|
+
|
|
93
|
+
# Initialize instance variables
|
|
94
|
+
self.results: Optional[Dict] = None
|
|
95
|
+
self.tz = tz
|
|
96
|
+
self.hl = hl
|
|
97
|
+
self.geo = geo
|
|
98
|
+
self.kw_list: List[str] = []
|
|
99
|
+
self.timeout = timeout
|
|
100
|
+
self.retries = retries
|
|
101
|
+
self.backoff_factor = backoff_factor
|
|
102
|
+
self.rotate_user_agent = rotate_user_agent
|
|
103
|
+
self.requests_args = requests_args or {}
|
|
104
|
+
|
|
105
|
+
# Handle proxies
|
|
106
|
+
self.proxies: List[str] = []
|
|
107
|
+
self.proxy_index = 0
|
|
108
|
+
if proxies:
|
|
109
|
+
if isinstance(proxies, list):
|
|
110
|
+
self.proxies = proxies
|
|
111
|
+
elif isinstance(proxies, dict):
|
|
112
|
+
# Store dict format in requests_args
|
|
113
|
+
self.requests_args["proxies"] = proxies
|
|
114
|
+
|
|
115
|
+
# Get initial cookies
|
|
116
|
+
self.cookies = self._get_google_cookie()
|
|
117
|
+
|
|
118
|
+
# Initialize widget payloads
|
|
119
|
+
self.token_payload: Dict[str, Any] = {}
|
|
120
|
+
self.interest_over_time_widget: Dict[str, Any] = {}
|
|
121
|
+
self.interest_by_region_widget: Dict[str, Any] = {}
|
|
122
|
+
self.related_topics_widget_list: List[Dict[str, Any]] = []
|
|
123
|
+
self.related_queries_widget_list: List[Dict[str, Any]] = []
|
|
124
|
+
|
|
125
|
+
# Setup headers
|
|
126
|
+
self.headers = {"accept-language": self.hl}
|
|
127
|
+
if self.rotate_user_agent:
|
|
128
|
+
self.headers["User-Agent"] = random.choice(USER_AGENTS)
|
|
129
|
+
self.headers.update(self.requests_args.pop("headers", {}))
|
|
130
|
+
|
|
131
|
+
def _get_user_agent(self) -> str:
|
|
132
|
+
"""Get a random user agent"""
|
|
133
|
+
return random.choice(USER_AGENTS) if self.rotate_user_agent else USER_AGENTS[0]
|
|
134
|
+
|
|
135
|
+
def _get_google_cookie(self) -> Dict[str, str]:
|
|
136
|
+
"""
|
|
137
|
+
Get Google NID cookie for requests
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Dictionary containing NID cookie
|
|
141
|
+
|
|
142
|
+
Raises:
|
|
143
|
+
exceptions.ResponseError: If unable to get cookie
|
|
144
|
+
"""
|
|
145
|
+
max_attempts = 3
|
|
146
|
+
attempt = 0
|
|
147
|
+
|
|
148
|
+
while attempt < max_attempts:
|
|
149
|
+
try:
|
|
150
|
+
# Build request kwargs
|
|
151
|
+
kwargs = dict(self.requests_args)
|
|
152
|
+
|
|
153
|
+
# Handle proxies
|
|
154
|
+
if self.proxies and len(self.proxies) > 0:
|
|
155
|
+
proxy = {"https": self.proxies[self.proxy_index]}
|
|
156
|
+
kwargs["proxies"] = proxy
|
|
157
|
+
|
|
158
|
+
# Make request
|
|
159
|
+
response = requests.get(
|
|
160
|
+
f"{BASE_TRENDS_URL}/?geo={self.hl[-2:]}",
|
|
161
|
+
timeout=self.timeout,
|
|
162
|
+
headers={"User-Agent": self._get_user_agent()},
|
|
163
|
+
**kwargs,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Extract NID cookie
|
|
167
|
+
cookies = dict(filter(lambda i: i[0] == "NID", response.cookies.items()))
|
|
168
|
+
if cookies:
|
|
169
|
+
return cookies
|
|
170
|
+
|
|
171
|
+
except requests.exceptions.ProxyError:
|
|
172
|
+
print(f"[WARN] Proxy error with proxy {self.proxy_index}. Trying next...")
|
|
173
|
+
if len(self.proxies) > 1:
|
|
174
|
+
self.proxies.pop(self.proxy_index)
|
|
175
|
+
if self.proxy_index >= len(self.proxies):
|
|
176
|
+
self.proxy_index = 0
|
|
177
|
+
else:
|
|
178
|
+
raise exceptions.ResponseError("No working proxies available")
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
print(f"[WARN] Error getting cookie (attempt {attempt + 1}/{max_attempts}): {e}")
|
|
182
|
+
|
|
183
|
+
attempt += 1
|
|
184
|
+
if attempt < max_attempts:
|
|
185
|
+
time.sleep(1 * attempt) # Exponential backoff
|
|
186
|
+
|
|
187
|
+
# Return empty dict if all attempts fail (some endpoints work without cookies)
|
|
188
|
+
print("[WARN] Could not get Google cookie, proceeding without it")
|
|
189
|
+
return {}
|
|
190
|
+
|
|
191
|
+
def _get_new_proxy(self) -> None:
|
|
192
|
+
"""Rotate to next proxy in list"""
|
|
193
|
+
if len(self.proxies) > 0:
|
|
194
|
+
self.proxy_index = (self.proxy_index + 1) % len(self.proxies)
|
|
195
|
+
|
|
196
|
+
def _get_data(
|
|
197
|
+
self, url: str, method: str = GET_METHOD, trim_chars: int = 0, **kwargs: Any
|
|
198
|
+
) -> Dict[str, Any]:
|
|
199
|
+
"""
|
|
200
|
+
Send request to Google Trends and return JSON response
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
url: Target URL
|
|
204
|
+
method: HTTP method ('get' or 'post')
|
|
205
|
+
trim_chars: Number of characters to trim from response start
|
|
206
|
+
**kwargs: Additional arguments for request
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Parsed JSON response as dictionary
|
|
210
|
+
|
|
211
|
+
Raises:
|
|
212
|
+
exceptions.TooManyRequestsError: If rate limited (HTTP 429)
|
|
213
|
+
exceptions.ResponseError: For other HTTP errors
|
|
214
|
+
"""
|
|
215
|
+
# Create session
|
|
216
|
+
session = requests.Session()
|
|
217
|
+
|
|
218
|
+
# Setup retries if configured
|
|
219
|
+
if self.retries > 0 or self.backoff_factor > 0:
|
|
220
|
+
retry_strategy = Retry(
|
|
221
|
+
total=self.retries,
|
|
222
|
+
read=self.retries,
|
|
223
|
+
connect=self.retries,
|
|
224
|
+
backoff_factor=self.backoff_factor,
|
|
225
|
+
status_forcelist=ERROR_CODES,
|
|
226
|
+
allowed_methods=frozenset(["GET", "POST"]),
|
|
227
|
+
)
|
|
228
|
+
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
229
|
+
session.mount("https://", adapter)
|
|
230
|
+
session.mount("http://", adapter)
|
|
231
|
+
|
|
232
|
+
# Update session headers
|
|
233
|
+
session.headers.update(self.headers)
|
|
234
|
+
|
|
235
|
+
# Handle proxy rotation
|
|
236
|
+
if self.proxies and len(self.proxies) > 0:
|
|
237
|
+
self.cookies = self._get_google_cookie()
|
|
238
|
+
session.proxies.update({"https": self.proxies[self.proxy_index]})
|
|
239
|
+
|
|
240
|
+
# Make request
|
|
241
|
+
try:
|
|
242
|
+
if method == self.POST_METHOD:
|
|
243
|
+
response = session.post(
|
|
244
|
+
url, timeout=self.timeout, cookies=self.cookies, **kwargs, **self.requests_args
|
|
245
|
+
)
|
|
246
|
+
else:
|
|
247
|
+
response = session.get(
|
|
248
|
+
url, timeout=self.timeout, cookies=self.cookies, **kwargs, **self.requests_args
|
|
249
|
+
)
|
|
250
|
+
except requests.exceptions.RequestException as e:
|
|
251
|
+
raise exceptions.ResponseError(f"Request failed: {str(e)}")
|
|
252
|
+
|
|
253
|
+
# Check response status
|
|
254
|
+
if response.status_code == 429:
|
|
255
|
+
raise exceptions.TooManyRequestsError.from_response(response)
|
|
256
|
+
|
|
257
|
+
# Check if response contains JSON
|
|
258
|
+
content_type = response.headers.get("Content-Type", "")
|
|
259
|
+
if response.status_code == 200 and any(
|
|
260
|
+
t in content_type
|
|
261
|
+
for t in ["application/json", "application/javascript", "text/javascript"]
|
|
262
|
+
):
|
|
263
|
+
# Trim garbage characters and parse JSON
|
|
264
|
+
content = response.text[trim_chars:]
|
|
265
|
+
try:
|
|
266
|
+
data = json.loads(content)
|
|
267
|
+
self._get_new_proxy() # Rotate proxy on success
|
|
268
|
+
return data
|
|
269
|
+
except json.JSONDecodeError as e:
|
|
270
|
+
raise exceptions.ResponseError(f"Invalid JSON response: {str(e)}")
|
|
271
|
+
|
|
272
|
+
# Handle error responses
|
|
273
|
+
raise exceptions.ResponseError.from_response(response)
|
|
274
|
+
|
|
275
|
+
def build_payload(
|
|
276
|
+
self,
|
|
277
|
+
kw_list: List[str],
|
|
278
|
+
cat: int = 0,
|
|
279
|
+
timeframe: Union[str, List[str]] = "today 5-y",
|
|
280
|
+
geo: str = "",
|
|
281
|
+
gprop: str = "",
|
|
282
|
+
) -> None:
|
|
283
|
+
"""
|
|
284
|
+
Build payload for Google Trends request
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
kw_list: List of keywords (max 5)
|
|
288
|
+
cat: Category ID (0 for all categories)
|
|
289
|
+
timeframe: Time range (e.g., 'today 12-m', 'now 7-d')
|
|
290
|
+
Can be list for multirange queries
|
|
291
|
+
geo: Geographic location (e.g., 'US', 'GB', 'US-CA')
|
|
292
|
+
gprop: Google property ('', 'images', 'news', 'youtube', 'froogle')
|
|
293
|
+
|
|
294
|
+
Raises:
|
|
295
|
+
ValueError: If parameters are invalid
|
|
296
|
+
"""
|
|
297
|
+
# Validate gprop
|
|
298
|
+
if gprop not in VALID_GPROP:
|
|
299
|
+
raise ValueError(f"gprop must be one of {VALID_GPROP}, got '{gprop}'")
|
|
300
|
+
|
|
301
|
+
# Validate keyword count
|
|
302
|
+
if len(kw_list) > 5:
|
|
303
|
+
raise ValueError("Maximum 5 keywords allowed")
|
|
304
|
+
|
|
305
|
+
if len(kw_list) == 0:
|
|
306
|
+
raise ValueError("At least one keyword required")
|
|
307
|
+
|
|
308
|
+
# Store keywords and geo
|
|
309
|
+
self.kw_list = kw_list
|
|
310
|
+
self.geo = geo or self.geo
|
|
311
|
+
|
|
312
|
+
# Build token payload
|
|
313
|
+
self.token_payload = {
|
|
314
|
+
"hl": self.hl,
|
|
315
|
+
"tz": self.tz,
|
|
316
|
+
"req": {"comparisonItem": [], "category": cat, "property": gprop},
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
# Handle multiple geos
|
|
320
|
+
geo_list = self.geo if isinstance(self.geo, list) else [self.geo]
|
|
321
|
+
|
|
322
|
+
# Build comparison items
|
|
323
|
+
if isinstance(timeframe, list):
|
|
324
|
+
# Multirange: each keyword-geo pair gets its own timeframe
|
|
325
|
+
for index, (kw, g) in enumerate(product(self.kw_list, geo_list)):
|
|
326
|
+
keyword_payload = {"keyword": kw, "time": timeframe[index], "geo": g}
|
|
327
|
+
self.token_payload["req"]["comparisonItem"].append(keyword_payload)
|
|
328
|
+
else:
|
|
329
|
+
# Single timeframe for all keyword-geo pairs
|
|
330
|
+
for kw, g in product(self.kw_list, geo_list):
|
|
331
|
+
keyword_payload = {"keyword": kw, "time": timeframe, "geo": g}
|
|
332
|
+
self.token_payload["req"]["comparisonItem"].append(keyword_payload)
|
|
333
|
+
|
|
334
|
+
# Convert req to JSON string (required by Google's API)
|
|
335
|
+
self.token_payload["req"] = json.dumps(self.token_payload["req"])
|
|
336
|
+
|
|
337
|
+
# Get tokens from Google
|
|
338
|
+
self._get_tokens()
|
|
339
|
+
|
|
340
|
+
def _get_tokens(self) -> None:
|
|
341
|
+
"""
|
|
342
|
+
Get API tokens from Google Trends for different widget types
|
|
343
|
+
|
|
344
|
+
This method must be called after build_payload() to retrieve
|
|
345
|
+
the necessary tokens for subsequent API calls.
|
|
346
|
+
"""
|
|
347
|
+
# Make request to get widget configurations
|
|
348
|
+
widget_dicts = self._get_data(
|
|
349
|
+
url=GENERAL_URL,
|
|
350
|
+
method=self.POST_METHOD,
|
|
351
|
+
params=self.token_payload,
|
|
352
|
+
trim_chars=4,
|
|
353
|
+
)["widgets"]
|
|
354
|
+
|
|
355
|
+
# Clear previous widget lists
|
|
356
|
+
self.related_queries_widget_list.clear()
|
|
357
|
+
self.related_topics_widget_list.clear()
|
|
358
|
+
|
|
359
|
+
# Parse widgets
|
|
360
|
+
first_region_token = True
|
|
361
|
+
for widget in widget_dicts:
|
|
362
|
+
widget_id = widget.get("id", "")
|
|
363
|
+
|
|
364
|
+
if widget_id == "TIMESERIES":
|
|
365
|
+
self.interest_over_time_widget = widget
|
|
366
|
+
elif widget_id == "GEO_MAP" and first_region_token:
|
|
367
|
+
self.interest_by_region_widget = widget
|
|
368
|
+
first_region_token = False
|
|
369
|
+
elif "RELATED_TOPICS" in widget_id:
|
|
370
|
+
self.related_topics_widget_list.append(widget)
|
|
371
|
+
elif "RELATED_QUERIES" in widget_id:
|
|
372
|
+
self.related_queries_widget_list.append(widget)
|
|
373
|
+
|
|
374
|
+
def interest_over_time(self) -> pd.DataFrame:
|
|
375
|
+
"""
|
|
376
|
+
Get interest over time data
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
DataFrame with date index and columns for each keyword.
|
|
380
|
+
Includes 'isPartial' column indicating if latest data is partial.
|
|
381
|
+
|
|
382
|
+
Example:
|
|
383
|
+
>>> pytrends.build_payload(['Python'], timeframe='today 12-m')
|
|
384
|
+
>>> df = pytrends.interest_over_time()
|
|
385
|
+
>>> print(df.head())
|
|
386
|
+
"""
|
|
387
|
+
if not self.interest_over_time_widget:
|
|
388
|
+
raise exceptions.ResponseError(
|
|
389
|
+
"No interest over time widget available. Call build_payload() first."
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Build request payload
|
|
393
|
+
payload = {
|
|
394
|
+
"req": json.dumps(self.interest_over_time_widget["request"]),
|
|
395
|
+
"token": self.interest_over_time_widget["token"],
|
|
396
|
+
"tz": self.tz,
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
# Get data
|
|
400
|
+
req_json = self._get_data(
|
|
401
|
+
url=INTEREST_OVER_TIME_URL,
|
|
402
|
+
method=self.GET_METHOD,
|
|
403
|
+
trim_chars=5,
|
|
404
|
+
params=payload,
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
# Parse response
|
|
408
|
+
df = pd.DataFrame(req_json["default"]["timelineData"])
|
|
409
|
+
|
|
410
|
+
if df.empty:
|
|
411
|
+
return df
|
|
412
|
+
|
|
413
|
+
# Convert timestamps to datetime
|
|
414
|
+
df["date"] = pd.to_datetime(df["time"].astype("float64"), unit="s")
|
|
415
|
+
df = df.set_index("date").sort_index()
|
|
416
|
+
|
|
417
|
+
# Parse values
|
|
418
|
+
result_df = df["value"].apply(
|
|
419
|
+
lambda x: pd.Series(str(x).replace("[", "").replace("]", "").split(","))
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
# Name columns with keywords
|
|
423
|
+
geo_list = self.geo if isinstance(self.geo, list) else [self.geo]
|
|
424
|
+
for idx, (kw, g) in enumerate(product(self.kw_list, geo_list)):
|
|
425
|
+
name = kw if len(geo_list) == 1 else (kw, g)
|
|
426
|
+
result_df.insert(len(result_df.columns), name, result_df[idx].astype("int"))
|
|
427
|
+
del result_df[idx]
|
|
428
|
+
|
|
429
|
+
# Add isPartial column
|
|
430
|
+
if "isPartial" in df:
|
|
431
|
+
df = df.fillna(False)
|
|
432
|
+
is_partial_df = df["isPartial"].apply(
|
|
433
|
+
lambda x: pd.Series(str(x).replace("[", "").replace("]", "").split(","))
|
|
434
|
+
)
|
|
435
|
+
is_partial_df.columns = ["isPartial"]
|
|
436
|
+
is_partial_df["isPartial"] = is_partial_df["isPartial"] == "True"
|
|
437
|
+
final_df = pd.concat([result_df, is_partial_df], axis=1)
|
|
438
|
+
else:
|
|
439
|
+
final_df = result_df
|
|
440
|
+
final_df["isPartial"] = False
|
|
441
|
+
|
|
442
|
+
# Handle multi-geo with MultiIndex
|
|
443
|
+
if len(geo_list) > 1:
|
|
444
|
+
final_df.columns = pd.MultiIndex.from_tuples(
|
|
445
|
+
[c if isinstance(c, tuple) else (c,) for c in final_df], names=["keyword", "region"]
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
return final_df
|
|
449
|
+
|
|
450
|
+
def interest_by_region(
|
|
451
|
+
self,
|
|
452
|
+
resolution: Literal["COUNTRY", "REGION", "CITY", "DMA"] = "COUNTRY",
|
|
453
|
+
inc_low_vol: bool = False,
|
|
454
|
+
inc_geo_code: bool = False,
|
|
455
|
+
) -> pd.DataFrame:
|
|
456
|
+
"""
|
|
457
|
+
Get interest by geographic region
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
resolution: Geographic resolution level
|
|
461
|
+
inc_low_vol: Include regions with low search volume
|
|
462
|
+
inc_geo_code: Include geographic codes in output
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
DataFrame with geographic distribution of search interest
|
|
466
|
+
|
|
467
|
+
Example:
|
|
468
|
+
>>> pytrends.build_payload(['Python'], geo='US')
|
|
469
|
+
>>> df = pytrends.interest_by_region(resolution='REGION')
|
|
470
|
+
>>> print(df.head())
|
|
471
|
+
"""
|
|
472
|
+
if not self.interest_by_region_widget:
|
|
473
|
+
raise exceptions.ResponseError(
|
|
474
|
+
"No interest by region widget available. Call build_payload() first."
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
# Set resolution
|
|
478
|
+
if self.geo == "" or (self.geo == "US" and resolution in ["DMA", "CITY", "REGION"]):
|
|
479
|
+
self.interest_by_region_widget["request"]["resolution"] = resolution
|
|
480
|
+
|
|
481
|
+
self.interest_by_region_widget["request"]["includeLowSearchVolumeGeos"] = inc_low_vol
|
|
482
|
+
|
|
483
|
+
# Build payload
|
|
484
|
+
payload = {
|
|
485
|
+
"req": json.dumps(self.interest_by_region_widget["request"]),
|
|
486
|
+
"token": self.interest_by_region_widget["token"],
|
|
487
|
+
"tz": self.tz,
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
# Get data
|
|
491
|
+
req_json = self._get_data(
|
|
492
|
+
url=INTEREST_BY_REGION_URL,
|
|
493
|
+
method=self.GET_METHOD,
|
|
494
|
+
trim_chars=5,
|
|
495
|
+
params=payload,
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
# Parse response
|
|
499
|
+
df = pd.DataFrame(req_json["default"]["geoMapData"])
|
|
500
|
+
|
|
501
|
+
if df.empty:
|
|
502
|
+
return df
|
|
503
|
+
|
|
504
|
+
# Determine geo column name
|
|
505
|
+
geo_column = "geoCode" if "geoCode" in df.columns else "coordinates"
|
|
506
|
+
columns = ["geoName", geo_column, "value"]
|
|
507
|
+
df = df[columns].set_index("geoName").sort_index()
|
|
508
|
+
|
|
509
|
+
# Parse values
|
|
510
|
+
result_df = df["value"].apply(
|
|
511
|
+
lambda x: pd.Series(str(x).replace("[", "").replace("]", "").split(","))
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# Add geo code if requested
|
|
515
|
+
if inc_geo_code and geo_column in df.columns:
|
|
516
|
+
result_df[geo_column] = df[geo_column]
|
|
517
|
+
|
|
518
|
+
# Name columns with keywords
|
|
519
|
+
for idx, kw in enumerate(self.kw_list):
|
|
520
|
+
result_df[kw] = result_df[idx].astype("int")
|
|
521
|
+
del result_df[idx]
|
|
522
|
+
|
|
523
|
+
return result_df
|
|
524
|
+
|
|
525
|
+
def related_topics(self) -> Dict[str, Dict[str, Optional[pd.DataFrame]]]:
|
|
526
|
+
"""
|
|
527
|
+
Get related topics for each keyword
|
|
528
|
+
|
|
529
|
+
Returns:
|
|
530
|
+
Dictionary with keywords as keys, each containing:
|
|
531
|
+
- 'top': DataFrame of top related topics
|
|
532
|
+
- 'rising': DataFrame of rising related topics
|
|
533
|
+
|
|
534
|
+
Example:
|
|
535
|
+
>>> pytrends.build_payload(['Python'])
|
|
536
|
+
>>> topics = pytrends.related_topics()
|
|
537
|
+
>>> print(topics['Python']['top'].head())
|
|
538
|
+
"""
|
|
539
|
+
if not self.related_topics_widget_list:
|
|
540
|
+
raise exceptions.ResponseError(
|
|
541
|
+
"No related topics widgets available. Call build_payload() first."
|
|
542
|
+
)
|
|
543
|
+
|
|
544
|
+
result_dict = {}
|
|
545
|
+
|
|
546
|
+
for widget in self.related_topics_widget_list:
|
|
547
|
+
# Extract keyword
|
|
548
|
+
try:
|
|
549
|
+
kw = widget["request"]["restriction"]["complexKeywordsRestriction"]["keyword"][0][
|
|
550
|
+
"value"
|
|
551
|
+
]
|
|
552
|
+
except (KeyError, IndexError):
|
|
553
|
+
kw = ""
|
|
554
|
+
|
|
555
|
+
# Build payload
|
|
556
|
+
payload = {
|
|
557
|
+
"req": json.dumps(widget["request"]),
|
|
558
|
+
"token": widget["token"],
|
|
559
|
+
"tz": self.tz,
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
# Get data
|
|
563
|
+
req_json = self._get_data(
|
|
564
|
+
url=RELATED_QUERIES_URL,
|
|
565
|
+
method=self.GET_METHOD,
|
|
566
|
+
trim_chars=5,
|
|
567
|
+
params=payload,
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
# Parse top topics
|
|
571
|
+
try:
|
|
572
|
+
top_list = req_json["default"]["rankedList"][0]["rankedKeyword"]
|
|
573
|
+
df_top = pd.json_normalize(top_list, sep="_")
|
|
574
|
+
except (KeyError, IndexError):
|
|
575
|
+
df_top = None
|
|
576
|
+
|
|
577
|
+
# Parse rising topics
|
|
578
|
+
try:
|
|
579
|
+
rising_list = req_json["default"]["rankedList"][1]["rankedKeyword"]
|
|
580
|
+
df_rising = pd.json_normalize(rising_list, sep="_")
|
|
581
|
+
except (KeyError, IndexError):
|
|
582
|
+
df_rising = None
|
|
583
|
+
|
|
584
|
+
result_dict[kw] = {"top": df_top, "rising": df_rising}
|
|
585
|
+
|
|
586
|
+
return result_dict
|
|
587
|
+
|
|
588
|
+
def related_queries(self) -> Dict[str, Dict[str, Optional[pd.DataFrame]]]:
|
|
589
|
+
"""
|
|
590
|
+
Get related search queries for each keyword
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
Dictionary with keywords as keys, each containing:
|
|
594
|
+
- 'top': DataFrame of top related queries
|
|
595
|
+
- 'rising': DataFrame of rising related queries
|
|
596
|
+
|
|
597
|
+
Example:
|
|
598
|
+
>>> pytrends.build_payload(['Python'])
|
|
599
|
+
>>> queries = pytrends.related_queries()
|
|
600
|
+
>>> print(queries['Python']['top'].head())
|
|
601
|
+
"""
|
|
602
|
+
if not self.related_queries_widget_list:
|
|
603
|
+
raise exceptions.ResponseError(
|
|
604
|
+
"No related queries widgets available. Call build_payload() first."
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
result_dict = {}
|
|
608
|
+
|
|
609
|
+
for widget in self.related_queries_widget_list:
|
|
610
|
+
# Extract keyword
|
|
611
|
+
try:
|
|
612
|
+
kw = widget["request"]["restriction"]["complexKeywordsRestriction"]["keyword"][0][
|
|
613
|
+
"value"
|
|
614
|
+
]
|
|
615
|
+
except (KeyError, IndexError):
|
|
616
|
+
kw = ""
|
|
617
|
+
|
|
618
|
+
# Build payload
|
|
619
|
+
payload = {
|
|
620
|
+
"req": json.dumps(widget["request"]),
|
|
621
|
+
"token": widget["token"],
|
|
622
|
+
"tz": self.tz,
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
# Get data
|
|
626
|
+
req_json = self._get_data(
|
|
627
|
+
url=RELATED_QUERIES_URL,
|
|
628
|
+
method=self.GET_METHOD,
|
|
629
|
+
trim_chars=5,
|
|
630
|
+
params=payload,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
# Parse top queries
|
|
634
|
+
try:
|
|
635
|
+
top_df = pd.DataFrame(req_json["default"]["rankedList"][0]["rankedKeyword"])
|
|
636
|
+
top_df = top_df[["query", "value"]]
|
|
637
|
+
except (KeyError, IndexError):
|
|
638
|
+
top_df = None
|
|
639
|
+
|
|
640
|
+
# Parse rising queries
|
|
641
|
+
try:
|
|
642
|
+
rising_df = pd.DataFrame(req_json["default"]["rankedList"][1]["rankedKeyword"])
|
|
643
|
+
rising_df = rising_df[["query", "value"]]
|
|
644
|
+
except (KeyError, IndexError):
|
|
645
|
+
rising_df = None
|
|
646
|
+
|
|
647
|
+
result_dict[kw] = {"top": top_df, "rising": rising_df}
|
|
648
|
+
|
|
649
|
+
return result_dict
|
|
650
|
+
|
|
651
|
+
def trending_searches(self, pn: str = "united_states") -> pd.DataFrame:
|
|
652
|
+
"""
|
|
653
|
+
Get trending searches for a country
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
pn: Country name (e.g., 'united_states', 'united_kingdom')
|
|
657
|
+
|
|
658
|
+
Returns:
|
|
659
|
+
DataFrame of trending searches
|
|
660
|
+
|
|
661
|
+
Example:
|
|
662
|
+
>>> pytrends = TrendReq()
|
|
663
|
+
>>> df = pytrends.trending_searches(pn='united_states')
|
|
664
|
+
>>> print(df.head())
|
|
665
|
+
"""
|
|
666
|
+
req_json = self._get_data(url=TRENDING_SEARCHES_URL, method=self.GET_METHOD)
|
|
667
|
+
|
|
668
|
+
if pn not in req_json:
|
|
669
|
+
raise exceptions.InvalidParameterError(
|
|
670
|
+
f"Country '{pn}' not found. Available: {list(req_json.keys())}"
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
return pd.DataFrame(req_json[pn])
|
|
674
|
+
|
|
675
|
+
def today_searches(self, pn: str = "US") -> pd.DataFrame:
|
|
676
|
+
"""
|
|
677
|
+
Get today's trending searches (Daily Trends)
|
|
678
|
+
|
|
679
|
+
Args:
|
|
680
|
+
pn: Country code (e.g., 'US', 'GB')
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
DataFrame of today's trending searches
|
|
684
|
+
|
|
685
|
+
Example:
|
|
686
|
+
>>> pytrends = TrendReq()
|
|
687
|
+
>>> df = pytrends.today_searches(pn='US')
|
|
688
|
+
>>> print(df.head())
|
|
689
|
+
"""
|
|
690
|
+
params = {"ns": 15, "geo": pn, "tz": "-180", "hl": self.hl}
|
|
691
|
+
|
|
692
|
+
req_json = self._get_data(
|
|
693
|
+
url=TODAY_SEARCHES_URL,
|
|
694
|
+
method=self.GET_METHOD,
|
|
695
|
+
trim_chars=5,
|
|
696
|
+
params=params,
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
try:
|
|
700
|
+
trends = req_json["default"]["trendingSearchesDays"][0]["trendingSearches"]
|
|
701
|
+
result_df = pd.DataFrame([trend["title"] for trend in trends])
|
|
702
|
+
return result_df.iloc[:, -1]
|
|
703
|
+
except (KeyError, IndexError):
|
|
704
|
+
return pd.DataFrame()
|
|
705
|
+
|
|
706
|
+
def realtime_trending_searches(
|
|
707
|
+
self, pn: str = "US", cat: str = "all", count: int = 300
|
|
708
|
+
) -> pd.DataFrame:
|
|
709
|
+
"""
|
|
710
|
+
Get real-time trending searches
|
|
711
|
+
|
|
712
|
+
Args:
|
|
713
|
+
pn: Country code (e.g., 'US', 'GB')
|
|
714
|
+
cat: Category ('all' or specific category)
|
|
715
|
+
count: Maximum number of results (max 300)
|
|
716
|
+
|
|
717
|
+
Returns:
|
|
718
|
+
DataFrame of real-time trending searches with entity names and titles
|
|
719
|
+
|
|
720
|
+
Example:
|
|
721
|
+
>>> pytrends = TrendReq()
|
|
722
|
+
>>> df = pytrends.realtime_trending_searches(pn='US', count=50)
|
|
723
|
+
>>> print(df.head())
|
|
724
|
+
"""
|
|
725
|
+
# Validate count
|
|
726
|
+
ri_value = min(count, 300)
|
|
727
|
+
rs_value = min(count - 1, 200)
|
|
728
|
+
|
|
729
|
+
params = {
|
|
730
|
+
"ns": 15,
|
|
731
|
+
"geo": pn,
|
|
732
|
+
"tz": "300",
|
|
733
|
+
"hl": self.hl,
|
|
734
|
+
"cat": cat,
|
|
735
|
+
"fi": "0",
|
|
736
|
+
"fs": "0",
|
|
737
|
+
"ri": ri_value,
|
|
738
|
+
"rs": rs_value,
|
|
739
|
+
"sort": 0,
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
req_json = self._get_data(
|
|
743
|
+
url=REALTIME_TRENDING_SEARCHES_URL,
|
|
744
|
+
method=self.GET_METHOD,
|
|
745
|
+
trim_chars=5,
|
|
746
|
+
params=params,
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
try:
|
|
750
|
+
trending_stories = req_json["storySummaries"]["trendingStories"]
|
|
751
|
+
|
|
752
|
+
# Extract only wanted keys
|
|
753
|
+
wanted_keys = ["entityNames", "title"]
|
|
754
|
+
filtered_data = [
|
|
755
|
+
{key: ts[key] for key in wanted_keys if key in ts} for ts in trending_stories
|
|
756
|
+
]
|
|
757
|
+
|
|
758
|
+
return pd.DataFrame(filtered_data)
|
|
759
|
+
except KeyError:
|
|
760
|
+
return pd.DataFrame()
|
|
761
|
+
|
|
762
|
+
def top_charts(
|
|
763
|
+
self, date: int, hl: str = "en-US", tz: int = 300, geo: str = "GLOBAL"
|
|
764
|
+
) -> Optional[pd.DataFrame]:
|
|
765
|
+
"""
|
|
766
|
+
Get top charts for a specific year
|
|
767
|
+
|
|
768
|
+
Args:
|
|
769
|
+
date: Year (e.g., 2023, 2024)
|
|
770
|
+
hl: Language
|
|
771
|
+
tz: Timezone offset
|
|
772
|
+
geo: Geographic location
|
|
773
|
+
|
|
774
|
+
Returns:
|
|
775
|
+
DataFrame of top charts, or None if not available
|
|
776
|
+
|
|
777
|
+
Example:
|
|
778
|
+
>>> pytrends = TrendReq()
|
|
779
|
+
>>> df = pytrends.top_charts(date=2024, geo='US')
|
|
780
|
+
>>> print(df.head())
|
|
781
|
+
"""
|
|
782
|
+
# Validate date
|
|
783
|
+
try:
|
|
784
|
+
date = int(date)
|
|
785
|
+
except (ValueError, TypeError):
|
|
786
|
+
raise ValueError("date must be a year in format YYYY")
|
|
787
|
+
|
|
788
|
+
params = {"hl": hl, "tz": tz, "date": date, "geo": geo, "isMobile": False}
|
|
789
|
+
|
|
790
|
+
req_json = self._get_data(
|
|
791
|
+
url=TOP_CHARTS_URL,
|
|
792
|
+
method=self.GET_METHOD,
|
|
793
|
+
trim_chars=5,
|
|
794
|
+
params=params,
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
try:
|
|
798
|
+
return pd.DataFrame(req_json["topCharts"][0]["listItems"])
|
|
799
|
+
except (KeyError, IndexError):
|
|
800
|
+
return None
|
|
801
|
+
|
|
802
|
+
def suggestions(self, keyword: str) -> List[Dict[str, Any]]:
|
|
803
|
+
"""
|
|
804
|
+
Get keyword suggestions from Google Trends autocomplete
|
|
805
|
+
|
|
806
|
+
Args:
|
|
807
|
+
keyword: Search keyword
|
|
808
|
+
|
|
809
|
+
Returns:
|
|
810
|
+
List of suggestion dictionaries
|
|
811
|
+
|
|
812
|
+
Example:
|
|
813
|
+
>>> pytrends = TrendReq()
|
|
814
|
+
>>> suggestions = pytrends.suggestions('python')
|
|
815
|
+
>>> for s in suggestions:
|
|
816
|
+
... print(s['title'])
|
|
817
|
+
"""
|
|
818
|
+
kw_param = quote(keyword)
|
|
819
|
+
params = {"hl": self.hl}
|
|
820
|
+
|
|
821
|
+
req_json = self._get_data(
|
|
822
|
+
url=SUGGESTIONS_URL + kw_param,
|
|
823
|
+
params=params,
|
|
824
|
+
method=self.GET_METHOD,
|
|
825
|
+
trim_chars=5,
|
|
826
|
+
)
|
|
827
|
+
|
|
828
|
+
return req_json.get("default", {}).get("topics", [])
|
|
829
|
+
|
|
830
|
+
def categories(self) -> Dict[str, Any]:
|
|
831
|
+
"""
|
|
832
|
+
Get available category data from Google Trends
|
|
833
|
+
|
|
834
|
+
Returns:
|
|
835
|
+
Dictionary of available categories
|
|
836
|
+
|
|
837
|
+
Example:
|
|
838
|
+
>>> pytrends = TrendReq()
|
|
839
|
+
>>> cats = pytrends.categories()
|
|
840
|
+
>>> print(cats)
|
|
841
|
+
"""
|
|
842
|
+
params = {"hl": self.hl}
|
|
843
|
+
|
|
844
|
+
return self._get_data(
|
|
845
|
+
url=CATEGORIES_URL,
|
|
846
|
+
params=params,
|
|
847
|
+
method=self.GET_METHOD,
|
|
848
|
+
trim_chars=5,
|
|
849
|
+
)
|