PyPI - pytrends-modern - Versions diffs - 0.1.0__py3-none-any.whl - Mend

pytrends-modern 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

pytrends_modern/__init__.py +27 -0
pytrends_modern/cli.py +352 -0
pytrends_modern/config.py +196 -0
pytrends_modern/exceptions.py +68 -0
pytrends_modern/py.typed +0 -0
pytrends_modern/request.py +849 -0
pytrends_modern/rss.py +337 -0
pytrends_modern/utils.py +267 -0
pytrends_modern-0.1.0.dist-info/METADATA +394 -0
pytrends_modern-0.1.0.dist-info/RECORD +14 -0
pytrends_modern-0.1.0.dist-info/WHEEL +5 -0
pytrends_modern-0.1.0.dist-info/entry_points.txt +2 -0
pytrends_modern-0.1.0.dist-info/licenses/LICENSE +37 -0
pytrends_modern-0.1.0.dist-info/top_level.txt +1 -0

pytrends_modern/rss.py ADDED Viewed

@@ -0,0 +1,337 @@
+"""
+RSS Feed module for fast real-time Google Trends data
+"""
+import xml.etree.ElementTree as ET
+from datetime import datetime
+from typing import Any, Dict, List, Literal, Optional, Union
+import pandas as pd
+import requests
+from pytrends_modern.config import COUNTRIES, US_STATES
+from pytrends_modern.exceptions import DownloadError, InvalidParameterError
+# Type aliases
+OutputFormat = Literal["dict", "json", "csv", "dataframe"]
+class TrendsRSS:
+    """
+    Google Trends RSS Feed API
+    Fast access to real-time trending searches with rich media.
+    Features:
+    - 0.2 second response time
+    - News articles and headlines
+    - Images for each trend
+    - Traffic volume data
+    - Multiple output formats
+    Example:
+        >>> rss = TrendsRSS()
+        >>> trends = rss.get_trends(geo='US')
+        >>> for trend in trends:
+        ...     print(f"{trend['title']}: {trend['traffic']}")
+    """
+    RSS_URL_TEMPLATE = "https://trends.google.com/trends/trendingsearches/daily/rss?geo={geo}"
+    def __init__(self, timeout: int = 10):
+        """
+        Initialize TrendsRSS client
+        Args:
+            timeout: Request timeout in seconds
+        """
+        self.timeout = timeout
+    def _validate_geo(self, geo: str) -> str:
+        """
+        Validate geographic parameter
+        Args:
+            geo: Country or US state code
+        Returns:
+            Validated geo code (uppercase)
+        Raises:
+            InvalidParameterError: If geo is invalid
+        """
+        geo = geo.upper()
+        if geo in COUNTRIES or geo in US_STATES:
+            return geo
+        # Suggest similar matches
+        all_geos = list(COUNTRIES.keys()) + list(US_STATES.keys())
+        similar = [code for code in all_geos if code.startswith(geo[0]) if len(geo) > 0][:5]
+        error_msg = f"Invalid geo code '{geo}'."
+        if similar:
+            error_msg += f" Did you mean: {', '.join(similar)}?"
+        error_msg += f"\n\nAvailable: {len(COUNTRIES)} countries, {len(US_STATES)} US states"
+        error_msg += "\nExamples: 'US', 'GB', 'CA', 'US-CA', 'US-NY'"
+        raise InvalidParameterError(error_msg)
+    def _parse_rss_feed(
+        self,
+        xml_content: str,
+        include_images: bool = True,
+        include_articles: bool = True,
+        max_articles_per_trend: int = 5,
+    ) -> List[Dict[str, Any]]:
+        """
+        Parse RSS XML feed into structured data
+        Args:
+            xml_content: Raw XML content
+            include_images: Include trend images
+            include_articles: Include news articles
+            max_articles_per_trend: Maximum articles per trend
+        Returns:
+            List of trend dictionaries
+        """
+        try:
+            root = ET.fromstring(xml_content)
+        except ET.ParseError as e:
+            raise DownloadError(f"Failed to parse RSS feed: {str(e)}")
+        trends = []
+        # Parse each item (trend)
+        for item in root.findall(".//item"):
+            trend_data: Dict[str, Any] = {}
+            # Basic info
+            trend_data["title"] = self._get_text(item, "title")
+            trend_data["description"] = self._get_text(item, "description")
+            trend_data["link"] = self._get_text(item, "link")
+            trend_data["pub_date"] = self._get_text(item, "pubDate")
+            # Parse pubDate to datetime
+            if trend_data["pub_date"]:
+                try:
+                    trend_data["pub_date_datetime"] = datetime.strptime(
+                        trend_data["pub_date"], "%a, %d %b %Y %H:%M:%S %z"
+                    )
+                except ValueError:
+                    trend_data["pub_date_datetime"] = None
+            # Traffic volume (from ht:approx_traffic namespace)
+            traffic_elem = item.find(".//{http://www.google.com/trends/hottrends}approx_traffic")
+            if traffic_elem is not None and traffic_elem.text:
+                # Remove '+' and ',' from traffic string
+                traffic_str = traffic_elem.text.replace("+", "").replace(",", "")
+                try:
+                    trend_data["traffic"] = int(traffic_str)
+                except ValueError:
+                    trend_data["traffic"] = traffic_elem.text
+            else:
+                trend_data["traffic"] = None
+            # Image
+            if include_images:
+                picture_elem = item.find(".//{http://www.google.com/trends/hottrends}picture")
+                trend_data["picture"] = picture_elem.text if picture_elem is not None else None
+            # News articles
+            if include_articles:
+                news_items = item.findall(".//{http://www.google.com/trends/hottrends}news_item")
+                articles = []
+                for news_item in news_items[:max_articles_per_trend]:
+                    article: Dict[str, Any] = {}
+                    # Article title
+                    title_elem = news_item.find(
+                        ".//{http://www.google.com/trends/hottrends}news_item_title"
+                    )
+                    article["title"] = title_elem.text if title_elem is not None else None
+                    # Article URL
+                    url_elem = news_item.find(
+                        ".//{http://www.google.com/trends/hottrends}news_item_url"
+                    )
+                    article["url"] = url_elem.text if url_elem is not None else None
+                    # Article snippet
+                    snippet_elem = news_item.find(
+                        ".//{http://www.google.com/trends/hottrends}news_item_snippet"
+                    )
+                    article["snippet"] = snippet_elem.text if snippet_elem is not None else None
+                    # Article source
+                    source_elem = news_item.find(
+                        ".//{http://www.google.com/trends/hottrends}news_item_source"
+                    )
+                    article["source"] = source_elem.text if source_elem is not None else None
+                    articles.append(article)
+                trend_data["articles"] = articles
+                trend_data["article_count"] = len(articles)
+            trends.append(trend_data)
+        return trends
+    def _get_text(self, element: ET.Element, tag: str) -> Optional[str]:
+        """Safely extract text from XML element"""
+        elem = element.find(tag)
+        return elem.text if elem is not None else None
+    def get_trends(
+        self,
+        geo: str = "US",
+        output_format: OutputFormat = "dict",
+        include_images: bool = True,
+        include_articles: bool = True,
+        max_articles_per_trend: int = 5,
+    ) -> Union[List[Dict], str, pd.DataFrame]:
+        """
+        Get trending searches from RSS feed
+        Args:
+            geo: Country or US state code (e.g., 'US', 'GB', 'US-CA')
+            output_format: Output format ('dict', 'json', 'csv', 'dataframe')
+            include_images: Include trend images
+            include_articles: Include news articles
+            max_articles_per_trend: Maximum articles per trend
+        Returns:
+            Trends data in specified format
+        Raises:
+            InvalidParameterError: If parameters are invalid
+            DownloadError: If download fails
+        Example:
+            >>> rss = TrendsRSS()
+            >>> trends = rss.get_trends(geo='US', output_format='dataframe')
+            >>> print(trends.head())
+        """
+        # Validate geo
+        geo = self._validate_geo(geo)
+        # Build URL
+        url = self.RSS_URL_TEMPLATE.format(geo=geo)
+        # Fetch RSS feed
+        try:
+            response = requests.get(url, timeout=self.timeout)
+            response.raise_for_status()
+        except requests.RequestException as e:
+            raise DownloadError(f"Failed to download RSS feed: {str(e)}")
+        # Parse feed
+        trends = self._parse_rss_feed(
+            response.text,
+            include_images=include_images,
+            include_articles=include_articles,
+            max_articles_per_trend=max_articles_per_trend,
+        )
+        # Format output
+        return self._format_output(trends, output_format)
+    def _format_output(
+        self, trends: List[Dict[str, Any]], output_format: OutputFormat
+    ) -> Union[List[Dict], str, pd.DataFrame]:
+        """
+        Format trends data to specified output format
+        Args:
+            trends: List of trend dictionaries
+            output_format: Desired output format
+        Returns:
+            Formatted data
+        """
+        if output_format == "dict":
+            return trends
+        elif output_format == "json":
+            import json
+            return json.dumps(trends, indent=2, default=str)
+        elif output_format == "dataframe":
+            # Flatten nested articles for DataFrame
+            flattened_trends = []
+            for trend in trends:
+                flat_trend = {
+                    "title": trend.get("title"),
+                    "description": trend.get("description"),
+                    "link": trend.get("link"),
+                    "pub_date": trend.get("pub_date"),
+                    "traffic": trend.get("traffic"),
+                    "picture": trend.get("picture"),
+                    "article_count": trend.get("article_count", 0),
+                }
+                flattened_trends.append(flat_trend)
+            return pd.DataFrame(flattened_trends)
+        elif output_format == "csv":
+            # Convert to DataFrame then CSV
+            df = self._format_output(trends, "dataframe")
+            return df.to_csv(index=False)
+        else:
+            raise InvalidParameterError(
+                f"Invalid output format '{output_format}'. "
+                "Must be one of: 'dict', 'json', 'csv', 'dataframe'"
+            )
+    def get_available_geos(self) -> Dict[str, str]:
+        """
+        Get dictionary of available geographic locations
+        Returns:
+            Dictionary mapping geo codes to location names
+        Example:
+            >>> rss = TrendsRSS()
+            >>> geos = rss.get_available_geos()
+            >>> print(f"Available countries: {len([g for g in geos if '-' not in g])}")
+        """
+        return {**COUNTRIES, **US_STATES}
+    def get_trends_for_multiple_geos(
+        self, geos: List[str], output_format: OutputFormat = "dict", **kwargs: Any
+    ) -> Dict[str, Union[List[Dict], str, pd.DataFrame]]:
+        """
+        Get trends for multiple geographic locations
+        Args:
+            geos: List of geo codes
+            output_format: Output format for each geo
+            **kwargs: Additional arguments passed to get_trends()
+        Returns:
+            Dictionary mapping geo codes to their trends
+        Example:
+            >>> rss = TrendsRSS()
+            >>> trends = rss.get_trends_for_multiple_geos(
+            ...     geos=['US', 'GB', 'CA'],
+            ...     output_format='dataframe'
+            ... )
+            >>> for geo, df in trends.items():
+            ...     print(f"{geo}: {len(df)} trends")
+        """
+        results = {}
+        for geo in geos:
+            try:
+                results[geo] = self.get_trends(geo=geo, output_format=output_format, **kwargs)
+            except Exception as e:
+                print(f"[WARN] Failed to get trends for {geo}: {str(e)}")
+                results[geo] = [] if output_format == "dict" else None
+        return results

pytrends_modern/utils.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""
+Utility functions for pytrends-modern
+"""
+from datetime import date, datetime, timedelta
+from typing import Optional, Tuple
+import pandas as pd
+def convert_dates_to_timeframe(start: date, stop: date) -> str:
+    """
+    Convert two dates to Google Trends timeframe string
+    Args:
+        start: Start date
+        stop: End date
+    Returns:
+        Timeframe string (e.g., "2023-01-01 2023-12-31")
+    Example:
+        >>> from datetime import date
+        >>> timeframe = convert_dates_to_timeframe(
+        ...     date(2023, 1, 1),
+        ...     date(2023, 12, 31)
+        ... )
+        >>> print(timeframe)
+        2023-01-01 2023-12-31
+    """
+    return f"{start.strftime('%Y-%m-%d')} {stop.strftime('%Y-%m-%d')}"
+def parse_timeframe(timeframe: str) -> Optional[Tuple[datetime, datetime]]:
+    """
+    Parse a timeframe string to start and end dates
+    Args:
+        timeframe: Timeframe string (e.g., "today 12-m", "2023-01-01 2023-12-31")
+    Returns:
+        Tuple of (start_datetime, end_datetime) or None if relative timeframe
+    Example:
+        >>> dates = parse_timeframe("2023-01-01 2023-12-31")
+        >>> print(dates)
+        (datetime(2023, 1, 1), datetime(2023, 12, 31))
+    """
+    # Check if it's a date range
+    if " " in timeframe and not timeframe.startswith(("now", "today")):
+        parts = timeframe.split()
+        if len(parts) == 2:
+            try:
+                start = datetime.strptime(parts[0], "%Y-%m-%d")
+                end = datetime.strptime(parts[1], "%Y-%m-%d")
+                return (start, end)
+            except ValueError:
+                pass
+    return None
+def validate_keywords(keywords: list) -> bool:
+    """
+    Validate keyword list
+    Args:
+        keywords: List of keywords
+    Returns:
+        True if valid
+    Raises:
+        ValueError: If keywords are invalid
+    """
+    if not keywords:
+        raise ValueError("At least one keyword is required")
+    if len(keywords) > 5:
+        raise ValueError("Maximum 5 keywords allowed")
+    for kw in keywords:
+        if not isinstance(kw, str):
+            raise ValueError(f"Keywords must be strings, got {type(kw)}")
+        if not kw.strip():
+            raise ValueError("Keywords cannot be empty")
+    return True
+def normalize_geo_code(geo: str) -> str:
+    """
+    Normalize geographic code to uppercase
+    Args:
+        geo: Geographic code
+    Returns:
+        Uppercase geo code
+    Example:
+        >>> normalize_geo_code('us')
+        'US'
+        >>> normalize_geo_code('US-ca')
+        'US-CA'
+    """
+    return geo.upper()
+def format_traffic_number(traffic: int) -> str:
+    """
+    Format traffic number with comma separators
+    Args:
+        traffic: Traffic count
+    Returns:
+        Formatted string (e.g., "1,000,000+")
+    Example:
+        >>> format_traffic_number(1000000)
+        '1,000,000+'
+    """
+    if traffic >= 1000000:
+        return f"{traffic:,}+"
+    elif traffic >= 1000:
+        return f"{traffic:,}"
+    else:
+        return str(traffic)
+def merge_trends_data(dfs: list, how: str = "outer") -> pd.DataFrame:
+    """
+    Merge multiple trends DataFrames
+    Args:
+        dfs: List of DataFrames to merge
+        how: Merge method ('outer', 'inner', 'left', 'right')
+    Returns:
+        Merged DataFrame
+    Example:
+        >>> df1 = pytrends1.interest_over_time()
+        >>> df2 = pytrends2.interest_over_time()
+        >>> merged = merge_trends_data([df1, df2])
+    """
+    if not dfs:
+        return pd.DataFrame()
+    result = dfs[0]
+    for df in dfs[1:]:
+        result = pd.merge(
+            result, df, left_index=True, right_index=True, how=how, suffixes=("", "_dup")
+        )
+    return result
+def calculate_trend_momentum(df: pd.DataFrame, keyword: str, window: int = 7) -> pd.Series:
+    """
+    Calculate momentum (rate of change) for a keyword's trend
+    Args:
+        df: DataFrame from interest_over_time()
+        keyword: Keyword column to analyze
+        window: Window size for rolling average
+    Returns:
+        Series with momentum values
+    Example:
+        >>> df = pytrends.interest_over_time()
+        >>> momentum = calculate_trend_momentum(df, 'Python', window=7)
+        >>> print(momentum.tail())
+    """
+    if keyword not in df.columns:
+        raise ValueError(f"Keyword '{keyword}' not found in DataFrame")
+    # Calculate rolling average
+    rolling_avg = df[keyword].rolling(window=window).mean()
+    # Calculate momentum (percent change)
+    momentum = rolling_avg.pct_change() * 100
+    return momentum
+def detect_trend_spikes(df: pd.DataFrame, keyword: str, threshold: float = 2.0) -> pd.DataFrame:
+    """
+    Detect significant spikes in trend data
+    Args:
+        df: DataFrame from interest_over_time()
+        keyword: Keyword column to analyze
+        threshold: Standard deviations above mean to consider a spike
+    Returns:
+        DataFrame with only spike periods
+    Example:
+        >>> df = pytrends.interest_over_time()
+        >>> spikes = detect_trend_spikes(df, 'Python', threshold=2.0)
+        >>> print(spikes)
+    """
+    if keyword not in df.columns:
+        raise ValueError(f"Keyword '{keyword}' not found in DataFrame")
+    series = df[keyword]
+    mean = series.mean()
+    std = series.std()
+    # Find values above threshold
+    threshold_value = mean + (threshold * std)
+    spikes = df[series > threshold_value]
+    return spikes
+def export_to_multiple_formats(
+    df: pd.DataFrame, base_path: str, formats: list = ["csv", "json", "parquet"]
+) -> dict:
+    """
+    Export DataFrame to multiple formats
+    Args:
+        df: DataFrame to export
+        base_path: Base path without extension (e.g., "trends")
+        formats: List of formats to export to
+    Returns:
+        Dictionary mapping format to file path
+    Example:
+        >>> df = pytrends.interest_over_time()
+        >>> paths = export_to_multiple_formats(df, "my_trends")
+        >>> print(paths)
+        {'csv': 'my_trends.csv', 'json': 'my_trends.json', ...}
+    """
+    results = {}
+    for fmt in formats:
+        path = f"{base_path}.{fmt}"
+        if fmt == "csv":
+            df.to_csv(path)
+        elif fmt == "json":
+            df.to_json(path, orient="records", date_format="iso")
+        elif fmt == "parquet":
+            try:
+                df.to_parquet(path)
+            except ImportError:
+                print(f"Warning: pyarrow not installed, skipping parquet export")
+                continue
+        elif fmt == "excel" or fmt == "xlsx":
+            try:
+                df.to_excel(path)
+            except ImportError:
+                print(f"Warning: openpyxl not installed, skipping Excel export")
+                continue
+        else:
+            print(f"Warning: Unknown format '{fmt}', skipping")
+            continue
+        results[fmt] = path
+    return results