pytrends-modern 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pytrends_modern/rss.py ADDED
@@ -0,0 +1,337 @@
1
+ """
2
+ RSS Feed module for fast real-time Google Trends data
3
+ """
4
+
5
+ import xml.etree.ElementTree as ET
6
+ from datetime import datetime
7
+ from typing import Any, Dict, List, Literal, Optional, Union
8
+
9
+ import pandas as pd
10
+ import requests
11
+
12
+ from pytrends_modern.config import COUNTRIES, US_STATES
13
+ from pytrends_modern.exceptions import DownloadError, InvalidParameterError
14
+
15
+ # Type aliases
16
+ OutputFormat = Literal["dict", "json", "csv", "dataframe"]
17
+
18
+
19
+ class TrendsRSS:
20
+ """
21
+ Google Trends RSS Feed API
22
+
23
+ Fast access to real-time trending searches with rich media.
24
+
25
+ Features:
26
+ - 0.2 second response time
27
+ - News articles and headlines
28
+ - Images for each trend
29
+ - Traffic volume data
30
+ - Multiple output formats
31
+
32
+ Example:
33
+ >>> rss = TrendsRSS()
34
+ >>> trends = rss.get_trends(geo='US')
35
+ >>> for trend in trends:
36
+ ... print(f"{trend['title']}: {trend['traffic']}")
37
+ """
38
+
39
+ RSS_URL_TEMPLATE = "https://trends.google.com/trends/trendingsearches/daily/rss?geo={geo}"
40
+
41
+ def __init__(self, timeout: int = 10):
42
+ """
43
+ Initialize TrendsRSS client
44
+
45
+ Args:
46
+ timeout: Request timeout in seconds
47
+ """
48
+ self.timeout = timeout
49
+
50
+ def _validate_geo(self, geo: str) -> str:
51
+ """
52
+ Validate geographic parameter
53
+
54
+ Args:
55
+ geo: Country or US state code
56
+
57
+ Returns:
58
+ Validated geo code (uppercase)
59
+
60
+ Raises:
61
+ InvalidParameterError: If geo is invalid
62
+ """
63
+ geo = geo.upper()
64
+
65
+ if geo in COUNTRIES or geo in US_STATES:
66
+ return geo
67
+
68
+ # Suggest similar matches
69
+ all_geos = list(COUNTRIES.keys()) + list(US_STATES.keys())
70
+ similar = [code for code in all_geos if code.startswith(geo[0]) if len(geo) > 0][:5]
71
+
72
+ error_msg = f"Invalid geo code '{geo}'."
73
+ if similar:
74
+ error_msg += f" Did you mean: {', '.join(similar)}?"
75
+ error_msg += f"\n\nAvailable: {len(COUNTRIES)} countries, {len(US_STATES)} US states"
76
+ error_msg += "\nExamples: 'US', 'GB', 'CA', 'US-CA', 'US-NY'"
77
+
78
+ raise InvalidParameterError(error_msg)
79
+
80
+ def _parse_rss_feed(
81
+ self,
82
+ xml_content: str,
83
+ include_images: bool = True,
84
+ include_articles: bool = True,
85
+ max_articles_per_trend: int = 5,
86
+ ) -> List[Dict[str, Any]]:
87
+ """
88
+ Parse RSS XML feed into structured data
89
+
90
+ Args:
91
+ xml_content: Raw XML content
92
+ include_images: Include trend images
93
+ include_articles: Include news articles
94
+ max_articles_per_trend: Maximum articles per trend
95
+
96
+ Returns:
97
+ List of trend dictionaries
98
+ """
99
+ try:
100
+ root = ET.fromstring(xml_content)
101
+ except ET.ParseError as e:
102
+ raise DownloadError(f"Failed to parse RSS feed: {str(e)}")
103
+
104
+ trends = []
105
+
106
+ # Parse each item (trend)
107
+ for item in root.findall(".//item"):
108
+ trend_data: Dict[str, Any] = {}
109
+
110
+ # Basic info
111
+ trend_data["title"] = self._get_text(item, "title")
112
+ trend_data["description"] = self._get_text(item, "description")
113
+ trend_data["link"] = self._get_text(item, "link")
114
+ trend_data["pub_date"] = self._get_text(item, "pubDate")
115
+
116
+ # Parse pubDate to datetime
117
+ if trend_data["pub_date"]:
118
+ try:
119
+ trend_data["pub_date_datetime"] = datetime.strptime(
120
+ trend_data["pub_date"], "%a, %d %b %Y %H:%M:%S %z"
121
+ )
122
+ except ValueError:
123
+ trend_data["pub_date_datetime"] = None
124
+
125
+ # Traffic volume (from ht:approx_traffic namespace)
126
+ traffic_elem = item.find(".//{http://www.google.com/trends/hottrends}approx_traffic")
127
+ if traffic_elem is not None and traffic_elem.text:
128
+ # Remove '+' and ',' from traffic string
129
+ traffic_str = traffic_elem.text.replace("+", "").replace(",", "")
130
+ try:
131
+ trend_data["traffic"] = int(traffic_str)
132
+ except ValueError:
133
+ trend_data["traffic"] = traffic_elem.text
134
+ else:
135
+ trend_data["traffic"] = None
136
+
137
+ # Image
138
+ if include_images:
139
+ picture_elem = item.find(".//{http://www.google.com/trends/hottrends}picture")
140
+ trend_data["picture"] = picture_elem.text if picture_elem is not None else None
141
+
142
+ # News articles
143
+ if include_articles:
144
+ news_items = item.findall(".//{http://www.google.com/trends/hottrends}news_item")
145
+ articles = []
146
+
147
+ for news_item in news_items[:max_articles_per_trend]:
148
+ article: Dict[str, Any] = {}
149
+
150
+ # Article title
151
+ title_elem = news_item.find(
152
+ ".//{http://www.google.com/trends/hottrends}news_item_title"
153
+ )
154
+ article["title"] = title_elem.text if title_elem is not None else None
155
+
156
+ # Article URL
157
+ url_elem = news_item.find(
158
+ ".//{http://www.google.com/trends/hottrends}news_item_url"
159
+ )
160
+ article["url"] = url_elem.text if url_elem is not None else None
161
+
162
+ # Article snippet
163
+ snippet_elem = news_item.find(
164
+ ".//{http://www.google.com/trends/hottrends}news_item_snippet"
165
+ )
166
+ article["snippet"] = snippet_elem.text if snippet_elem is not None else None
167
+
168
+ # Article source
169
+ source_elem = news_item.find(
170
+ ".//{http://www.google.com/trends/hottrends}news_item_source"
171
+ )
172
+ article["source"] = source_elem.text if source_elem is not None else None
173
+
174
+ articles.append(article)
175
+
176
+ trend_data["articles"] = articles
177
+ trend_data["article_count"] = len(articles)
178
+
179
+ trends.append(trend_data)
180
+
181
+ return trends
182
+
183
+ def _get_text(self, element: ET.Element, tag: str) -> Optional[str]:
184
+ """Safely extract text from XML element"""
185
+ elem = element.find(tag)
186
+ return elem.text if elem is not None else None
187
+
188
+ def get_trends(
189
+ self,
190
+ geo: str = "US",
191
+ output_format: OutputFormat = "dict",
192
+ include_images: bool = True,
193
+ include_articles: bool = True,
194
+ max_articles_per_trend: int = 5,
195
+ ) -> Union[List[Dict], str, pd.DataFrame]:
196
+ """
197
+ Get trending searches from RSS feed
198
+
199
+ Args:
200
+ geo: Country or US state code (e.g., 'US', 'GB', 'US-CA')
201
+ output_format: Output format ('dict', 'json', 'csv', 'dataframe')
202
+ include_images: Include trend images
203
+ include_articles: Include news articles
204
+ max_articles_per_trend: Maximum articles per trend
205
+
206
+ Returns:
207
+ Trends data in specified format
208
+
209
+ Raises:
210
+ InvalidParameterError: If parameters are invalid
211
+ DownloadError: If download fails
212
+
213
+ Example:
214
+ >>> rss = TrendsRSS()
215
+ >>> trends = rss.get_trends(geo='US', output_format='dataframe')
216
+ >>> print(trends.head())
217
+ """
218
+ # Validate geo
219
+ geo = self._validate_geo(geo)
220
+
221
+ # Build URL
222
+ url = self.RSS_URL_TEMPLATE.format(geo=geo)
223
+
224
+ # Fetch RSS feed
225
+ try:
226
+ response = requests.get(url, timeout=self.timeout)
227
+ response.raise_for_status()
228
+ except requests.RequestException as e:
229
+ raise DownloadError(f"Failed to download RSS feed: {str(e)}")
230
+
231
+ # Parse feed
232
+ trends = self._parse_rss_feed(
233
+ response.text,
234
+ include_images=include_images,
235
+ include_articles=include_articles,
236
+ max_articles_per_trend=max_articles_per_trend,
237
+ )
238
+
239
+ # Format output
240
+ return self._format_output(trends, output_format)
241
+
242
+ def _format_output(
243
+ self, trends: List[Dict[str, Any]], output_format: OutputFormat
244
+ ) -> Union[List[Dict], str, pd.DataFrame]:
245
+ """
246
+ Format trends data to specified output format
247
+
248
+ Args:
249
+ trends: List of trend dictionaries
250
+ output_format: Desired output format
251
+
252
+ Returns:
253
+ Formatted data
254
+ """
255
+ if output_format == "dict":
256
+ return trends
257
+
258
+ elif output_format == "json":
259
+ import json
260
+
261
+ return json.dumps(trends, indent=2, default=str)
262
+
263
+ elif output_format == "dataframe":
264
+ # Flatten nested articles for DataFrame
265
+ flattened_trends = []
266
+ for trend in trends:
267
+ flat_trend = {
268
+ "title": trend.get("title"),
269
+ "description": trend.get("description"),
270
+ "link": trend.get("link"),
271
+ "pub_date": trend.get("pub_date"),
272
+ "traffic": trend.get("traffic"),
273
+ "picture": trend.get("picture"),
274
+ "article_count": trend.get("article_count", 0),
275
+ }
276
+ flattened_trends.append(flat_trend)
277
+
278
+ return pd.DataFrame(flattened_trends)
279
+
280
+ elif output_format == "csv":
281
+ # Convert to DataFrame then CSV
282
+ df = self._format_output(trends, "dataframe")
283
+ return df.to_csv(index=False)
284
+
285
+ else:
286
+ raise InvalidParameterError(
287
+ f"Invalid output format '{output_format}'. "
288
+ "Must be one of: 'dict', 'json', 'csv', 'dataframe'"
289
+ )
290
+
291
+ def get_available_geos(self) -> Dict[str, str]:
292
+ """
293
+ Get dictionary of available geographic locations
294
+
295
+ Returns:
296
+ Dictionary mapping geo codes to location names
297
+
298
+ Example:
299
+ >>> rss = TrendsRSS()
300
+ >>> geos = rss.get_available_geos()
301
+ >>> print(f"Available countries: {len([g for g in geos if '-' not in g])}")
302
+ """
303
+ return {**COUNTRIES, **US_STATES}
304
+
305
+ def get_trends_for_multiple_geos(
306
+ self, geos: List[str], output_format: OutputFormat = "dict", **kwargs: Any
307
+ ) -> Dict[str, Union[List[Dict], str, pd.DataFrame]]:
308
+ """
309
+ Get trends for multiple geographic locations
310
+
311
+ Args:
312
+ geos: List of geo codes
313
+ output_format: Output format for each geo
314
+ **kwargs: Additional arguments passed to get_trends()
315
+
316
+ Returns:
317
+ Dictionary mapping geo codes to their trends
318
+
319
+ Example:
320
+ >>> rss = TrendsRSS()
321
+ >>> trends = rss.get_trends_for_multiple_geos(
322
+ ... geos=['US', 'GB', 'CA'],
323
+ ... output_format='dataframe'
324
+ ... )
325
+ >>> for geo, df in trends.items():
326
+ ... print(f"{geo}: {len(df)} trends")
327
+ """
328
+ results = {}
329
+
330
+ for geo in geos:
331
+ try:
332
+ results[geo] = self.get_trends(geo=geo, output_format=output_format, **kwargs)
333
+ except Exception as e:
334
+ print(f"[WARN] Failed to get trends for {geo}: {str(e)}")
335
+ results[geo] = [] if output_format == "dict" else None
336
+
337
+ return results
@@ -0,0 +1,267 @@
1
+ """
2
+ Utility functions for pytrends-modern
3
+ """
4
+
5
+ from datetime import date, datetime, timedelta
6
+ from typing import Optional, Tuple
7
+
8
+ import pandas as pd
9
+
10
+
11
+ def convert_dates_to_timeframe(start: date, stop: date) -> str:
12
+ """
13
+ Convert two dates to Google Trends timeframe string
14
+
15
+ Args:
16
+ start: Start date
17
+ stop: End date
18
+
19
+ Returns:
20
+ Timeframe string (e.g., "2023-01-01 2023-12-31")
21
+
22
+ Example:
23
+ >>> from datetime import date
24
+ >>> timeframe = convert_dates_to_timeframe(
25
+ ... date(2023, 1, 1),
26
+ ... date(2023, 12, 31)
27
+ ... )
28
+ >>> print(timeframe)
29
+ 2023-01-01 2023-12-31
30
+ """
31
+ return f"{start.strftime('%Y-%m-%d')} {stop.strftime('%Y-%m-%d')}"
32
+
33
+
34
+ def parse_timeframe(timeframe: str) -> Optional[Tuple[datetime, datetime]]:
35
+ """
36
+ Parse a timeframe string to start and end dates
37
+
38
+ Args:
39
+ timeframe: Timeframe string (e.g., "today 12-m", "2023-01-01 2023-12-31")
40
+
41
+ Returns:
42
+ Tuple of (start_datetime, end_datetime) or None if relative timeframe
43
+
44
+ Example:
45
+ >>> dates = parse_timeframe("2023-01-01 2023-12-31")
46
+ >>> print(dates)
47
+ (datetime(2023, 1, 1), datetime(2023, 12, 31))
48
+ """
49
+ # Check if it's a date range
50
+ if " " in timeframe and not timeframe.startswith(("now", "today")):
51
+ parts = timeframe.split()
52
+ if len(parts) == 2:
53
+ try:
54
+ start = datetime.strptime(parts[0], "%Y-%m-%d")
55
+ end = datetime.strptime(parts[1], "%Y-%m-%d")
56
+ return (start, end)
57
+ except ValueError:
58
+ pass
59
+
60
+ return None
61
+
62
+
63
+ def validate_keywords(keywords: list) -> bool:
64
+ """
65
+ Validate keyword list
66
+
67
+ Args:
68
+ keywords: List of keywords
69
+
70
+ Returns:
71
+ True if valid
72
+
73
+ Raises:
74
+ ValueError: If keywords are invalid
75
+ """
76
+ if not keywords:
77
+ raise ValueError("At least one keyword is required")
78
+
79
+ if len(keywords) > 5:
80
+ raise ValueError("Maximum 5 keywords allowed")
81
+
82
+ for kw in keywords:
83
+ if not isinstance(kw, str):
84
+ raise ValueError(f"Keywords must be strings, got {type(kw)}")
85
+ if not kw.strip():
86
+ raise ValueError("Keywords cannot be empty")
87
+
88
+ return True
89
+
90
+
91
+ def normalize_geo_code(geo: str) -> str:
92
+ """
93
+ Normalize geographic code to uppercase
94
+
95
+ Args:
96
+ geo: Geographic code
97
+
98
+ Returns:
99
+ Uppercase geo code
100
+
101
+ Example:
102
+ >>> normalize_geo_code('us')
103
+ 'US'
104
+ >>> normalize_geo_code('US-ca')
105
+ 'US-CA'
106
+ """
107
+ return geo.upper()
108
+
109
+
110
+ def format_traffic_number(traffic: int) -> str:
111
+ """
112
+ Format traffic number with comma separators
113
+
114
+ Args:
115
+ traffic: Traffic count
116
+
117
+ Returns:
118
+ Formatted string (e.g., "1,000,000+")
119
+
120
+ Example:
121
+ >>> format_traffic_number(1000000)
122
+ '1,000,000+'
123
+ """
124
+ if traffic >= 1000000:
125
+ return f"{traffic:,}+"
126
+ elif traffic >= 1000:
127
+ return f"{traffic:,}"
128
+ else:
129
+ return str(traffic)
130
+
131
+
132
+ def merge_trends_data(dfs: list, how: str = "outer") -> pd.DataFrame:
133
+ """
134
+ Merge multiple trends DataFrames
135
+
136
+ Args:
137
+ dfs: List of DataFrames to merge
138
+ how: Merge method ('outer', 'inner', 'left', 'right')
139
+
140
+ Returns:
141
+ Merged DataFrame
142
+
143
+ Example:
144
+ >>> df1 = pytrends1.interest_over_time()
145
+ >>> df2 = pytrends2.interest_over_time()
146
+ >>> merged = merge_trends_data([df1, df2])
147
+ """
148
+ if not dfs:
149
+ return pd.DataFrame()
150
+
151
+ result = dfs[0]
152
+ for df in dfs[1:]:
153
+ result = pd.merge(
154
+ result, df, left_index=True, right_index=True, how=how, suffixes=("", "_dup")
155
+ )
156
+
157
+ return result
158
+
159
+
160
+ def calculate_trend_momentum(df: pd.DataFrame, keyword: str, window: int = 7) -> pd.Series:
161
+ """
162
+ Calculate momentum (rate of change) for a keyword's trend
163
+
164
+ Args:
165
+ df: DataFrame from interest_over_time()
166
+ keyword: Keyword column to analyze
167
+ window: Window size for rolling average
168
+
169
+ Returns:
170
+ Series with momentum values
171
+
172
+ Example:
173
+ >>> df = pytrends.interest_over_time()
174
+ >>> momentum = calculate_trend_momentum(df, 'Python', window=7)
175
+ >>> print(momentum.tail())
176
+ """
177
+ if keyword not in df.columns:
178
+ raise ValueError(f"Keyword '{keyword}' not found in DataFrame")
179
+
180
+ # Calculate rolling average
181
+ rolling_avg = df[keyword].rolling(window=window).mean()
182
+
183
+ # Calculate momentum (percent change)
184
+ momentum = rolling_avg.pct_change() * 100
185
+
186
+ return momentum
187
+
188
+
189
+ def detect_trend_spikes(df: pd.DataFrame, keyword: str, threshold: float = 2.0) -> pd.DataFrame:
190
+ """
191
+ Detect significant spikes in trend data
192
+
193
+ Args:
194
+ df: DataFrame from interest_over_time()
195
+ keyword: Keyword column to analyze
196
+ threshold: Standard deviations above mean to consider a spike
197
+
198
+ Returns:
199
+ DataFrame with only spike periods
200
+
201
+ Example:
202
+ >>> df = pytrends.interest_over_time()
203
+ >>> spikes = detect_trend_spikes(df, 'Python', threshold=2.0)
204
+ >>> print(spikes)
205
+ """
206
+ if keyword not in df.columns:
207
+ raise ValueError(f"Keyword '{keyword}' not found in DataFrame")
208
+
209
+ series = df[keyword]
210
+ mean = series.mean()
211
+ std = series.std()
212
+
213
+ # Find values above threshold
214
+ threshold_value = mean + (threshold * std)
215
+ spikes = df[series > threshold_value]
216
+
217
+ return spikes
218
+
219
+
220
+ def export_to_multiple_formats(
221
+ df: pd.DataFrame, base_path: str, formats: list = ["csv", "json", "parquet"]
222
+ ) -> dict:
223
+ """
224
+ Export DataFrame to multiple formats
225
+
226
+ Args:
227
+ df: DataFrame to export
228
+ base_path: Base path without extension (e.g., "trends")
229
+ formats: List of formats to export to
230
+
231
+ Returns:
232
+ Dictionary mapping format to file path
233
+
234
+ Example:
235
+ >>> df = pytrends.interest_over_time()
236
+ >>> paths = export_to_multiple_formats(df, "my_trends")
237
+ >>> print(paths)
238
+ {'csv': 'my_trends.csv', 'json': 'my_trends.json', ...}
239
+ """
240
+ results = {}
241
+
242
+ for fmt in formats:
243
+ path = f"{base_path}.{fmt}"
244
+
245
+ if fmt == "csv":
246
+ df.to_csv(path)
247
+ elif fmt == "json":
248
+ df.to_json(path, orient="records", date_format="iso")
249
+ elif fmt == "parquet":
250
+ try:
251
+ df.to_parquet(path)
252
+ except ImportError:
253
+ print(f"Warning: pyarrow not installed, skipping parquet export")
254
+ continue
255
+ elif fmt == "excel" or fmt == "xlsx":
256
+ try:
257
+ df.to_excel(path)
258
+ except ImportError:
259
+ print(f"Warning: openpyxl not installed, skipping Excel export")
260
+ continue
261
+ else:
262
+ print(f"Warning: Unknown format '{fmt}', skipping")
263
+ continue
264
+
265
+ results[fmt] = path
266
+
267
+ return results