pytrends-modern 0.1.2__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytrends_modern/__init__.py +5 -1
- pytrends_modern/browser_config.py +94 -0
- pytrends_modern/browser_config_camoufox.py +67 -0
- pytrends_modern/camoufox_setup.py +300 -0
- pytrends_modern/proxy_extension.py +78 -0
- pytrends_modern/request.py +454 -4
- {pytrends_modern-0.1.2.dist-info → pytrends_modern-0.2.0.dist-info}/METADATA +73 -2
- pytrends_modern-0.2.0.dist-info/RECORD +19 -0
- pytrends_modern-0.1.2.dist-info/RECORD +0 -15
- {pytrends_modern-0.1.2.dist-info → pytrends_modern-0.2.0.dist-info}/WHEEL +0 -0
- {pytrends_modern-0.1.2.dist-info → pytrends_modern-0.2.0.dist-info}/entry_points.txt +0 -0
- {pytrends_modern-0.1.2.dist-info → pytrends_modern-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {pytrends_modern-0.1.2.dist-info → pytrends_modern-0.2.0.dist-info}/top_level.txt +0 -0
pytrends_modern/__init__.py
CHANGED
|
@@ -2,13 +2,15 @@
|
|
|
2
2
|
pytrends-modern: Modern Google Trends API
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
__version__ = "
|
|
5
|
+
__version__ = "0.2.0"
|
|
6
6
|
__author__ = "pytrends-modern contributors"
|
|
7
7
|
__license__ = "MIT"
|
|
8
8
|
|
|
9
9
|
from pytrends_modern.request import TrendReq
|
|
10
10
|
from pytrends_modern.rss import TrendsRSS
|
|
11
11
|
from pytrends_modern.scraper import TrendsScraper
|
|
12
|
+
from pytrends_modern.browser_config_camoufox import BrowserConfig
|
|
13
|
+
from pytrends_modern import camoufox_setup
|
|
12
14
|
from pytrends_modern.exceptions import (
|
|
13
15
|
TooManyRequestsError,
|
|
14
16
|
ResponseError,
|
|
@@ -21,6 +23,8 @@ __all__ = [
|
|
|
21
23
|
"TrendReq",
|
|
22
24
|
"TrendsRSS",
|
|
23
25
|
"TrendsScraper",
|
|
26
|
+
"BrowserConfig",
|
|
27
|
+
"camoufox_setup",
|
|
24
28
|
"TooManyRequestsError",
|
|
25
29
|
"ResponseError",
|
|
26
30
|
"InvalidParameterError",
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Browser configuration for DrissionPage automation."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BrowserConfig:
|
|
7
|
+
"""Configuration for DrissionPage browser automation.
|
|
8
|
+
|
|
9
|
+
When enabled, TrendReq will use DrissionPage to capture network traffic
|
|
10
|
+
from trends.google.com instead of making direct API calls.
|
|
11
|
+
|
|
12
|
+
⚠️ LIMITATIONS when using BrowserConfig:
|
|
13
|
+
- Only 1 keyword supported (no comparison)
|
|
14
|
+
- Only 'today 1-m' timeframe supported
|
|
15
|
+
- Only WORLDWIDE geo supported (no geo filtering)
|
|
16
|
+
- Requires Chrome/Chromium browser installed
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
browser_path: Path to Chrome/Chromium executable.
|
|
20
|
+
Defaults: '/usr/bin/chromium' or '/usr/bin/chrome'
|
|
21
|
+
port: Browser remote debugging port (default: 9222)
|
|
22
|
+
headless: Run browser in headless mode (default: True)
|
|
23
|
+
proxy: Proxy server URL (e.g., 'http://proxy.com:8080')
|
|
24
|
+
proxy_username: Proxy username (for authenticated proxies)
|
|
25
|
+
proxy_password: Proxy password (for authenticated proxies)
|
|
26
|
+
user_data_dir: Browser profile directory to persist login session.
|
|
27
|
+
If not provided, creates temp directory (won't persist)
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
>>> from pytrends_modern import TrendReq, BrowserConfig
|
|
31
|
+
>>> # Without auth - manual login required once
|
|
32
|
+
>>> config = BrowserConfig(
|
|
33
|
+
... browser_path='/usr/bin/chromium',
|
|
34
|
+
... user_data_dir='~/.config/chromium-pytrends'
|
|
35
|
+
... )
|
|
36
|
+
>>> # With proxy auth
|
|
37
|
+
>>> config = BrowserConfig(
|
|
38
|
+
... browser_path='/usr/bin/chromium',
|
|
39
|
+
... proxy='153.80.44.3:64804',
|
|
40
|
+
... proxy_username='user',
|
|
41
|
+
... proxy_password='pass',
|
|
42
|
+
... user_data_dir='~/.config/chromium-pytrends'
|
|
43
|
+
... )
|
|
44
|
+
>>> pytrends = TrendReq(browser_config=config)
|
|
45
|
+
>>> pytrends.build_payload(['Python']) # Only 1 keyword!
|
|
46
|
+
>>> df = pytrends.interest_over_time()
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
browser_path: Optional[str] = None,
|
|
52
|
+
port: int = 9222,
|
|
53
|
+
headless: bool = True,
|
|
54
|
+
proxy: Optional[str] = None,
|
|
55
|
+
proxy_username: Optional[str] = None,
|
|
56
|
+
proxy_password: Optional[str] = None,
|
|
57
|
+
user_data_dir: Optional[str] = None
|
|
58
|
+
):
|
|
59
|
+
self.browser_path = browser_path or self._get_default_browser_path()
|
|
60
|
+
self.port = port
|
|
61
|
+
self.headless = headless
|
|
62
|
+
self.proxy = proxy
|
|
63
|
+
self.proxy_username = proxy_username
|
|
64
|
+
self.proxy_password = proxy_password
|
|
65
|
+
self.user_data_dir = user_data_dir
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _get_default_browser_path() -> str:
|
|
69
|
+
"""Get default browser path based on common locations."""
|
|
70
|
+
import os
|
|
71
|
+
|
|
72
|
+
# Common Chrome/Chromium paths
|
|
73
|
+
paths = [
|
|
74
|
+
'/usr/bin/chromium',
|
|
75
|
+
'/usr/bin/chromium-browser',
|
|
76
|
+
'/usr/bin/chrome',
|
|
77
|
+
'/usr/bin/google-chrome',
|
|
78
|
+
'/snap/bin/chromium',
|
|
79
|
+
'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
|
|
80
|
+
'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
for path in paths:
|
|
84
|
+
if os.path.exists(path):
|
|
85
|
+
return path
|
|
86
|
+
|
|
87
|
+
# Default fallback
|
|
88
|
+
return '/usr/bin/chromium'
|
|
89
|
+
|
|
90
|
+
def __repr__(self) -> str:
|
|
91
|
+
return (
|
|
92
|
+
f"BrowserConfig(browser_path='{self.browser_path}', "
|
|
93
|
+
f"port={self.port}, headless={self.headless}, proxy={self.proxy})"
|
|
94
|
+
)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""Browser configuration for Camoufox automation"""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
import os as os_module
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BrowserConfig:
|
|
8
|
+
"""Configuration for Camoufox browser automation.
|
|
9
|
+
|
|
10
|
+
Uses Camoufox (Playwright Firefox) with advanced fingerprinting and
|
|
11
|
+
anti-detection to bypass Google's bot detection.
|
|
12
|
+
|
|
13
|
+
⚠️ LIMITATIONS:
|
|
14
|
+
- Only 1 keyword supported (no comparison)
|
|
15
|
+
- Only 'today 1-m' timeframe supported
|
|
16
|
+
- Only WORLDWIDE geo supported (no geo filtering)
|
|
17
|
+
- Requires Google account login (first run)
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
headless: Run browser in headless mode (default: False)
|
|
21
|
+
Set to 'virtual' on Linux to use Xvfb
|
|
22
|
+
proxy_server: Proxy server URL (e.g., 'http://proxy.com:8080')
|
|
23
|
+
proxy_username: Proxy username (for authenticated proxies)
|
|
24
|
+
proxy_password: Proxy password (for authenticated proxies)
|
|
25
|
+
user_data_dir: Browser profile directory to persist login session.
|
|
26
|
+
Default: ~/.config/camoufox-pytrends
|
|
27
|
+
humanize: Enable human-like cursor movement (default: True)
|
|
28
|
+
os: Operating system for fingerprint ('windows', 'macos', 'linux')
|
|
29
|
+
geoip: Auto-detect geolocation from proxy IP (default: True if proxy)
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
>>> from pytrends_modern import TrendReq, BrowserConfig
|
|
33
|
+
>>> # Simple usage (logs in once, saves session)
|
|
34
|
+
>>> config = BrowserConfig()
|
|
35
|
+
>>> pytrends = TrendReq(browser_config=config)
|
|
36
|
+
>>> pytrends.build_payload(['Python'])
|
|
37
|
+
>>> df = pytrends.interest_over_time()
|
|
38
|
+
>>>
|
|
39
|
+
>>> # With proxy
|
|
40
|
+
>>> config = BrowserConfig(
|
|
41
|
+
... proxy_server='http://proxy.com:8080',
|
|
42
|
+
... proxy_username='user',
|
|
43
|
+
... proxy_password='pass',
|
|
44
|
+
... geoip=True
|
|
45
|
+
... )
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
headless: bool = False,
|
|
51
|
+
proxy_server: Optional[str] = None,
|
|
52
|
+
proxy_username: Optional[str] = None,
|
|
53
|
+
proxy_password: Optional[str] = None,
|
|
54
|
+
user_data_dir: Optional[str] = None,
|
|
55
|
+
humanize: bool = True,
|
|
56
|
+
os: str = 'linux',
|
|
57
|
+
geoip: bool = True,
|
|
58
|
+
):
|
|
59
|
+
self.headless = headless
|
|
60
|
+
self.proxy_server = proxy_server
|
|
61
|
+
self.proxy_username = proxy_username
|
|
62
|
+
self.proxy_password = proxy_password
|
|
63
|
+
self.user_data_dir = user_data_dir or os_module.path.expanduser('~/.config/camoufox-pytrends-profile')
|
|
64
|
+
self.humanize = humanize
|
|
65
|
+
self.os = os
|
|
66
|
+
self.geoip = geoip if proxy_server else False
|
|
67
|
+
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Camoufox setup and configuration helper for pytrends-modern
|
|
3
|
+
|
|
4
|
+
This module helps users set up their Google account login for browser mode.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_default_profile_dir() -> str:
|
|
12
|
+
"""Get the default Camoufox profile directory"""
|
|
13
|
+
return os.path.expanduser('~/.config/camoufox-pytrends-profile')
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def is_profile_configured(profile_dir: Optional[str] = None) -> bool:
|
|
17
|
+
"""
|
|
18
|
+
Check if Camoufox profile is configured (has Google login)
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
profile_dir: Custom profile directory, or None for default
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
True if profile exists and appears configured
|
|
25
|
+
"""
|
|
26
|
+
if profile_dir is None:
|
|
27
|
+
profile_dir = get_default_profile_dir()
|
|
28
|
+
else:
|
|
29
|
+
profile_dir = os.path.expanduser(profile_dir)
|
|
30
|
+
|
|
31
|
+
# Check if profile directory exists and has content
|
|
32
|
+
if not os.path.exists(profile_dir):
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
# Check if it has Firefox profile structure (indicates browser has been used)
|
|
36
|
+
# Camoufox uses Firefox, so we check for common Firefox profile files
|
|
37
|
+
profile_indicators = [
|
|
38
|
+
'prefs.js',
|
|
39
|
+
'cookies.sqlite',
|
|
40
|
+
'storage',
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
for indicator in profile_indicators:
|
|
44
|
+
if os.path.exists(os.path.join(profile_dir, indicator)):
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def setup_profile(profile_dir: Optional[str] = None, headless: bool = False) -> bool:
|
|
51
|
+
"""
|
|
52
|
+
Interactive setup: Open browser for user to log in to Google
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
profile_dir: Custom profile directory, or None for default
|
|
56
|
+
headless: Run in headless mode (not recommended for first setup)
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
True if setup completed successfully
|
|
60
|
+
|
|
61
|
+
Raises:
|
|
62
|
+
ImportError: If Camoufox is not installed
|
|
63
|
+
"""
|
|
64
|
+
try:
|
|
65
|
+
from camoufox.sync_api import Camoufox
|
|
66
|
+
except ImportError:
|
|
67
|
+
raise ImportError(
|
|
68
|
+
"Camoufox is required for browser mode. "
|
|
69
|
+
"Install with: pip install pytrends-modern[browser]"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if profile_dir is None:
|
|
73
|
+
profile_dir = get_default_profile_dir()
|
|
74
|
+
else:
|
|
75
|
+
profile_dir = os.path.expanduser(profile_dir)
|
|
76
|
+
|
|
77
|
+
print("=" * 70)
|
|
78
|
+
print("🔧 Camoufox Profile Setup for pytrends-modern")
|
|
79
|
+
print("=" * 70)
|
|
80
|
+
print(f"\n📁 Profile directory: {profile_dir}")
|
|
81
|
+
|
|
82
|
+
if is_profile_configured(profile_dir):
|
|
83
|
+
print("✓ Profile already exists")
|
|
84
|
+
response = input("\nReconfigure profile? This will open the browser again (y/N): ")
|
|
85
|
+
if response.lower() != 'y':
|
|
86
|
+
print("Setup cancelled.")
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
print("\n📖 Instructions:")
|
|
90
|
+
print("1. Browser will open to Google Trends")
|
|
91
|
+
print("2. Log in to your Google account")
|
|
92
|
+
print("3. Once logged in and page loads, press Enter here")
|
|
93
|
+
print("4. Your login will be saved for future use")
|
|
94
|
+
print("\n⚠️ IMPORTANT: Browser mode has limitations:")
|
|
95
|
+
print(" - Only 1 keyword at a time (no comparisons)")
|
|
96
|
+
print(" - Only 'today 1-m' timeframe")
|
|
97
|
+
print(" - Only WORLDWIDE region")
|
|
98
|
+
print()
|
|
99
|
+
|
|
100
|
+
input("Press Enter to open browser...")
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
with Camoufox(
|
|
104
|
+
persistent_context=True,
|
|
105
|
+
user_data_dir=profile_dir,
|
|
106
|
+
headless=headless,
|
|
107
|
+
humanize=True,
|
|
108
|
+
os='linux',
|
|
109
|
+
geoip=True,
|
|
110
|
+
) as context:
|
|
111
|
+
page = context.pages[0] if context.pages else context.new_page()
|
|
112
|
+
|
|
113
|
+
print("\n🌐 Opening Google Trends...")
|
|
114
|
+
print(" Please log in to your Google account")
|
|
115
|
+
|
|
116
|
+
# Navigate to Google Trends
|
|
117
|
+
page.goto(
|
|
118
|
+
"https://trends.google.com/trends/explore?q=Python&hl=en-GB",
|
|
119
|
+
wait_until='networkidle',
|
|
120
|
+
timeout=60000
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
title = page.title()
|
|
124
|
+
if "429" in title or "error" in title.lower():
|
|
125
|
+
print(f"\n⚠️ Page title: {title}")
|
|
126
|
+
print(" You may need to log in or solve a CAPTCHA")
|
|
127
|
+
else:
|
|
128
|
+
print(f"✓ Page loaded: {title}")
|
|
129
|
+
|
|
130
|
+
print("\n📋 Please:")
|
|
131
|
+
print(" 1. Log in to Google if not already logged in")
|
|
132
|
+
print(" 2. Make sure the page loads correctly")
|
|
133
|
+
print(" 3. Then come back here and press Enter")
|
|
134
|
+
|
|
135
|
+
input("\nPress Enter when done (browser will close)...")
|
|
136
|
+
|
|
137
|
+
# Verify profile was created
|
|
138
|
+
if is_profile_configured(profile_dir):
|
|
139
|
+
print("\n✅ SUCCESS! Profile configured successfully")
|
|
140
|
+
print(f"📁 Profile saved to: {profile_dir}")
|
|
141
|
+
print("\n💡 You can now use pytrends-modern with browser mode:")
|
|
142
|
+
print(" from pytrends_modern import TrendReq, BrowserConfig")
|
|
143
|
+
print(" config = BrowserConfig()")
|
|
144
|
+
print(" pytrends = TrendReq(browser_config=config)")
|
|
145
|
+
return True
|
|
146
|
+
else:
|
|
147
|
+
print("\n⚠️ Warning: Profile directory exists but may not be fully configured")
|
|
148
|
+
print(" Try running setup again or check if browser saved data")
|
|
149
|
+
return False
|
|
150
|
+
|
|
151
|
+
except Exception as e:
|
|
152
|
+
print(f"\n❌ Error during setup: {e}")
|
|
153
|
+
return False
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def export_profile(source_dir: Optional[str] = None, dest_path: str = "./camoufox-profile.tar.gz") -> bool:
|
|
157
|
+
"""
|
|
158
|
+
Export profile to a tar.gz file for portability (Docker, other machines, etc.)
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
source_dir: Source profile directory, or None for default
|
|
162
|
+
dest_path: Destination file path for the exported profile
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
True if export successful
|
|
166
|
+
"""
|
|
167
|
+
import tarfile
|
|
168
|
+
|
|
169
|
+
if source_dir is None:
|
|
170
|
+
source_dir = get_default_profile_dir()
|
|
171
|
+
else:
|
|
172
|
+
source_dir = os.path.expanduser(source_dir)
|
|
173
|
+
|
|
174
|
+
if not is_profile_configured(source_dir):
|
|
175
|
+
print(f"❌ Profile not configured at: {source_dir}")
|
|
176
|
+
return False
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
print(f"📦 Exporting profile from: {source_dir}")
|
|
180
|
+
print(f"📁 To: {dest_path}")
|
|
181
|
+
|
|
182
|
+
with tarfile.open(dest_path, "w:gz") as tar:
|
|
183
|
+
tar.add(source_dir, arcname=os.path.basename(source_dir))
|
|
184
|
+
|
|
185
|
+
print(f"✅ Profile exported successfully!")
|
|
186
|
+
print(f"📊 File size: {os.path.getsize(dest_path) / 1024 / 1024:.2f} MB")
|
|
187
|
+
return True
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
print(f"❌ Export failed: {e}")
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def import_profile(source_path: str, dest_dir: Optional[str] = None) -> bool:
|
|
195
|
+
"""
|
|
196
|
+
Import profile from a tar.gz file (for Docker, other machines, etc.)
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
source_path: Source tar.gz file path
|
|
200
|
+
dest_dir: Destination profile directory, or None for default
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
True if import successful
|
|
204
|
+
"""
|
|
205
|
+
import tarfile
|
|
206
|
+
|
|
207
|
+
if dest_dir is None:
|
|
208
|
+
dest_dir = get_default_profile_dir()
|
|
209
|
+
else:
|
|
210
|
+
dest_dir = os.path.expanduser(dest_dir)
|
|
211
|
+
|
|
212
|
+
if not os.path.exists(source_path):
|
|
213
|
+
print(f"❌ Source file not found: {source_path}")
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
print(f"📦 Importing profile from: {source_path}")
|
|
218
|
+
print(f"📁 To: {dest_dir}")
|
|
219
|
+
|
|
220
|
+
# Create parent directory if needed
|
|
221
|
+
os.makedirs(os.path.dirname(dest_dir), exist_ok=True)
|
|
222
|
+
|
|
223
|
+
with tarfile.open(source_path, "r:gz") as tar:
|
|
224
|
+
tar.extractall(path=os.path.dirname(dest_dir))
|
|
225
|
+
|
|
226
|
+
if is_profile_configured(dest_dir):
|
|
227
|
+
print(f"✅ Profile imported successfully!")
|
|
228
|
+
return True
|
|
229
|
+
else:
|
|
230
|
+
print(f"⚠️ Profile imported but may not be fully configured")
|
|
231
|
+
return False
|
|
232
|
+
|
|
233
|
+
except Exception as e:
|
|
234
|
+
print(f"❌ Import failed: {e}")
|
|
235
|
+
return False
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def print_profile_status(profile_dir: Optional[str] = None):
|
|
239
|
+
"""Print current profile configuration status"""
|
|
240
|
+
if profile_dir is None:
|
|
241
|
+
profile_dir = get_default_profile_dir()
|
|
242
|
+
else:
|
|
243
|
+
profile_dir = os.path.expanduser(profile_dir)
|
|
244
|
+
|
|
245
|
+
print("=" * 70)
|
|
246
|
+
print("🔍 Camoufox Profile Status")
|
|
247
|
+
print("=" * 70)
|
|
248
|
+
print(f"\n📁 Profile directory: {profile_dir}")
|
|
249
|
+
|
|
250
|
+
if is_profile_configured(profile_dir):
|
|
251
|
+
print("✅ Status: Configured")
|
|
252
|
+
print("\n💡 Profile is ready to use with browser mode")
|
|
253
|
+
print("\n📦 To use in Docker/other machines:")
|
|
254
|
+
print(" 1. Export: from pytrends_modern.camoufox_setup import export_profile")
|
|
255
|
+
print(" export_profile(dest_path='profile.tar.gz')")
|
|
256
|
+
print(" 2. Copy profile.tar.gz to target machine/container")
|
|
257
|
+
print(" 3. Import: from pytrends_modern.camoufox_setup import import_profile")
|
|
258
|
+
print(" import_profile('profile.tar.gz')")
|
|
259
|
+
else:
|
|
260
|
+
print("❌ Status: Not configured")
|
|
261
|
+
print("\n⚠️ You need to run setup before using browser mode:")
|
|
262
|
+
print(" from pytrends_modern.camoufox_setup import setup_profile")
|
|
263
|
+
print(" setup_profile()")
|
|
264
|
+
print("\n Or use the CLI:")
|
|
265
|
+
print(" python -m pytrends_modern.camoufox_setup")
|
|
266
|
+
|
|
267
|
+
print("=" * 70)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
if __name__ == "__main__":
|
|
271
|
+
"""Run setup when called as a module"""
|
|
272
|
+
import sys
|
|
273
|
+
|
|
274
|
+
if len(sys.argv) > 1:
|
|
275
|
+
command = sys.argv[1]
|
|
276
|
+
|
|
277
|
+
if command == "status":
|
|
278
|
+
print_profile_status()
|
|
279
|
+
elif command == "export":
|
|
280
|
+
dest = sys.argv[2] if len(sys.argv) > 2 else "./camoufox-profile.tar.gz"
|
|
281
|
+
success = export_profile(dest_path=dest)
|
|
282
|
+
sys.exit(0 if success else 1)
|
|
283
|
+
elif command == "import":
|
|
284
|
+
if len(sys.argv) < 3:
|
|
285
|
+
print("❌ Usage: python -m pytrends_modern.camoufox_setup import <source.tar.gz>")
|
|
286
|
+
sys.exit(1)
|
|
287
|
+
source = sys.argv[2]
|
|
288
|
+
success = import_profile(source)
|
|
289
|
+
sys.exit(0 if success else 1)
|
|
290
|
+
else:
|
|
291
|
+
print(f"❌ Unknown command: {command}")
|
|
292
|
+
print("\nUsage:")
|
|
293
|
+
print(" python -m pytrends_modern.camoufox_setup # Run setup")
|
|
294
|
+
print(" python -m pytrends_modern.camoufox_setup status # Check status")
|
|
295
|
+
print(" python -m pytrends_modern.camoufox_setup export [path] # Export profile")
|
|
296
|
+
print(" python -m pytrends_modern.camoufox_setup import <path> # Import profile")
|
|
297
|
+
sys.exit(1)
|
|
298
|
+
else:
|
|
299
|
+
success = setup_profile()
|
|
300
|
+
sys.exit(0 if success else 1)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Proxy extension generator for Chrome/Chromium with automatic authentication
|
|
3
|
+
Creates a simple extension that handles proxy auth without any UI
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import tempfile
|
|
8
|
+
import json
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def create_proxy_extension(username: str, password: str, host: str, port: int) -> str:
|
|
12
|
+
"""
|
|
13
|
+
Create a Chrome extension that automatically handles proxy authentication
|
|
14
|
+
No UI, no dialogs - completely automatic
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
username: Proxy username
|
|
18
|
+
password: Proxy password
|
|
19
|
+
host: Proxy host/IP
|
|
20
|
+
port: Proxy port
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Path to the extension folder (DrissionPage needs folder not ZIP)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
# Create temp directory for extension
|
|
27
|
+
extension_dir = tempfile.mkdtemp(prefix='proxy_auth_')
|
|
28
|
+
|
|
29
|
+
# Manifest v3 - simple and clean
|
|
30
|
+
manifest = {
|
|
31
|
+
"manifest_version": 3,
|
|
32
|
+
"name": "Auto Proxy Auth",
|
|
33
|
+
"version": "1.0",
|
|
34
|
+
"description": "Automatic proxy authentication",
|
|
35
|
+
"permissions": [
|
|
36
|
+
"webRequest",
|
|
37
|
+
"webRequestAuthProvider"
|
|
38
|
+
],
|
|
39
|
+
"host_permissions": [
|
|
40
|
+
"<all_urls>"
|
|
41
|
+
],
|
|
42
|
+
"background": {
|
|
43
|
+
"service_worker": "background.js"
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
# Background script - handles auth automatically
|
|
48
|
+
background_js = f"""
|
|
49
|
+
// Automatic proxy authentication - no UI
|
|
50
|
+
chrome.webRequest.onAuthRequired.addListener(
|
|
51
|
+
function(details) {{
|
|
52
|
+
console.log('[Proxy Auth] Providing credentials for:', details.url);
|
|
53
|
+
return {{
|
|
54
|
+
authCredentials: {{
|
|
55
|
+
username: "{username}",
|
|
56
|
+
password: "{password}"
|
|
57
|
+
}}
|
|
58
|
+
}};
|
|
59
|
+
}},
|
|
60
|
+
{{urls: ["<all_urls>"]}},
|
|
61
|
+
["blocking"]
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
console.log('[Proxy Auth] Extension loaded - auth will be automatic');
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
# Write manifest
|
|
68
|
+
with open(os.path.join(extension_dir, 'manifest.json'), 'w') as f:
|
|
69
|
+
json.dump(manifest, f, indent=2)
|
|
70
|
+
|
|
71
|
+
# Write background script
|
|
72
|
+
with open(os.path.join(extension_dir, 'background.js'), 'w') as f:
|
|
73
|
+
f.write(background_js)
|
|
74
|
+
|
|
75
|
+
print(f"[Proxy Extension] Created at: {extension_dir}")
|
|
76
|
+
print(f"[Proxy Extension] Username: {username}, Host: {host}:{port}")
|
|
77
|
+
|
|
78
|
+
return extension_dir
|
pytrends_modern/request.py
CHANGED
|
@@ -15,6 +15,7 @@ from requests.adapters import HTTPAdapter
|
|
|
15
15
|
from requests.packages.urllib3.util.retry import Retry
|
|
16
16
|
|
|
17
17
|
from pytrends_modern import exceptions
|
|
18
|
+
from pytrends_modern.browser_config_camoufox import BrowserConfig
|
|
18
19
|
from pytrends_modern.config import (
|
|
19
20
|
BASE_TRENDS_URL,
|
|
20
21
|
CATEGORIES_URL,
|
|
@@ -72,6 +73,7 @@ class TrendReq:
|
|
|
72
73
|
backoff_factor: float = DEFAULT_BACKOFF_FACTOR,
|
|
73
74
|
requests_args: Optional[Dict[str, Any]] = None,
|
|
74
75
|
rotate_user_agent: bool = True,
|
|
76
|
+
browser_config: Optional[BrowserConfig] = None,
|
|
75
77
|
):
|
|
76
78
|
"""
|
|
77
79
|
Initialize Google Trends API client
|
|
@@ -86,7 +88,36 @@ class TrendReq:
|
|
|
86
88
|
backoff_factor: Backoff factor for exponential backoff
|
|
87
89
|
requests_args: Additional arguments to pass to requests
|
|
88
90
|
rotate_user_agent: Whether to rotate user agents
|
|
91
|
+
browser_config: DrissionPage browser configuration (experimental)
|
|
92
|
+
|
|
93
|
+
⚠️ LIMITATIONS when using browser_config:
|
|
94
|
+
- Only 1 keyword supported (no comparison)
|
|
95
|
+
- Only 'today 1-m' timeframe supported
|
|
96
|
+
- Only WORLDWIDE geo supported (no geo filtering)
|
|
97
|
+
- Requires Chrome/Chromium browser installed
|
|
89
98
|
"""
|
|
99
|
+
# Browser mode initialization
|
|
100
|
+
self.browser_config = browser_config
|
|
101
|
+
self.browser = None
|
|
102
|
+
self.browser_context = None
|
|
103
|
+
self.browser_page = None
|
|
104
|
+
self.browser_mode = browser_config is not None
|
|
105
|
+
self.browser_responses_cache = {} # Cache for captured API responses
|
|
106
|
+
|
|
107
|
+
if self.browser_mode:
|
|
108
|
+
import warnings
|
|
109
|
+
warnings.warn(
|
|
110
|
+
"⚠️ Camoufox browser mode is EXPERIMENTAL and has limitations:\n"
|
|
111
|
+
" - Only 1 keyword supported (no keyword comparison)\n"
|
|
112
|
+
" - Only 'today 1-m' timeframe supported\n"
|
|
113
|
+
" - Only WORLDWIDE geo supported\n"
|
|
114
|
+
" - Requires Google account login (first run)\n"
|
|
115
|
+
" - Login session is saved for future runs",
|
|
116
|
+
UserWarning,
|
|
117
|
+
stacklevel=2
|
|
118
|
+
)
|
|
119
|
+
self._init_camoufox()
|
|
120
|
+
|
|
90
121
|
# Rate limit message from Google
|
|
91
122
|
self.google_rl = "You have reached your quota limit. Please try again later."
|
|
92
123
|
|
|
@@ -112,8 +143,11 @@ class TrendReq:
|
|
|
112
143
|
# Store dict format in requests_args
|
|
113
144
|
self.requests_args["proxies"] = proxies
|
|
114
145
|
|
|
115
|
-
# Get initial cookies
|
|
116
|
-
|
|
146
|
+
# Get initial cookies (skip in browser mode)
|
|
147
|
+
if not self.browser_mode:
|
|
148
|
+
self.cookies = self._get_google_cookie()
|
|
149
|
+
else:
|
|
150
|
+
self.cookies = {}
|
|
117
151
|
|
|
118
152
|
# Initialize widget payloads
|
|
119
153
|
self.token_payload: Dict[str, Any] = {}
|
|
@@ -127,10 +161,330 @@ class TrendReq:
|
|
|
127
161
|
if self.rotate_user_agent:
|
|
128
162
|
self.headers["User-Agent"] = random.choice(USER_AGENTS)
|
|
129
163
|
self.headers.update(self.requests_args.pop("headers", {}))
|
|
164
|
+
|
|
165
|
+
def __del__(self):
|
|
166
|
+
"""Cleanup browser on object deletion"""
|
|
167
|
+
self._close_browser()
|
|
130
168
|
|
|
131
169
|
def _get_user_agent(self) -> str:
|
|
132
170
|
"""Get a random user agent"""
|
|
133
171
|
return random.choice(USER_AGENTS) if self.rotate_user_agent else USER_AGENTS[0]
|
|
172
|
+
|
|
173
|
+
def _init_camoufox(self) -> None:
|
|
174
|
+
"""Initialize Camoufox browser with persistent context"""
|
|
175
|
+
try:
|
|
176
|
+
from camoufox.sync_api import Camoufox
|
|
177
|
+
except ImportError:
|
|
178
|
+
raise ImportError(
|
|
179
|
+
"Camoufox is required for browser mode. "
|
|
180
|
+
"Install with: pip install pytrends-modern[browser]"
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Prepare browser options
|
|
184
|
+
import os
|
|
185
|
+
user_data_dir = os.path.expanduser(
|
|
186
|
+
self.browser_config.user_data_dir or "~/.config/camoufox-pytrends-profile"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Check if profile is configured (has Google login)
|
|
190
|
+
from pytrends_modern.camoufox_setup import is_profile_configured
|
|
191
|
+
if not is_profile_configured(user_data_dir):
|
|
192
|
+
raise exceptions.BrowserError(
|
|
193
|
+
f"Camoufox profile not configured at: {user_data_dir}\n"
|
|
194
|
+
"You must set up your Google account login first:\n\n"
|
|
195
|
+
" from pytrends_modern.camoufox_setup import setup_profile\n"
|
|
196
|
+
" setup_profile()\n\n"
|
|
197
|
+
"Or run from command line:\n"
|
|
198
|
+
" python -m pytrends_modern.camoufox_setup\n\n"
|
|
199
|
+
"This will open a browser for you to log in to Google."
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Proxy configuration (if provided)
|
|
203
|
+
proxy_config = None
|
|
204
|
+
if self.browser_config.proxy_server:
|
|
205
|
+
proxy_config = {
|
|
206
|
+
"server": self.browser_config.proxy_server,
|
|
207
|
+
}
|
|
208
|
+
if self.browser_config.proxy_username:
|
|
209
|
+
proxy_config["username"] = self.browser_config.proxy_username
|
|
210
|
+
if self.browser_config.proxy_password:
|
|
211
|
+
proxy_config["password"] = self.browser_config.proxy_password
|
|
212
|
+
|
|
213
|
+
# Initialize Camoufox with persistent context
|
|
214
|
+
try:
|
|
215
|
+
# Camoufox() returns a context manager, we need to use __enter__() to get the context
|
|
216
|
+
camoufox_manager = Camoufox(
|
|
217
|
+
persistent_context=True,
|
|
218
|
+
user_data_dir=user_data_dir,
|
|
219
|
+
headless=self.browser_config.headless,
|
|
220
|
+
humanize=self.browser_config.humanize if hasattr(self.browser_config, 'humanize') else True,
|
|
221
|
+
os=self.browser_config.os if hasattr(self.browser_config, 'os') else 'linux',
|
|
222
|
+
geoip=self.browser_config.geoip if hasattr(self.browser_config, 'geoip') else True,
|
|
223
|
+
proxy=proxy_config
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Enter the context manager to get the browser context
|
|
227
|
+
self.browser = camoufox_manager # Store manager for cleanup
|
|
228
|
+
self.browser_context = camoufox_manager.__enter__()
|
|
229
|
+
|
|
230
|
+
# Use existing page if available (avoid opening 2 tabs)
|
|
231
|
+
if self.browser_context.pages:
|
|
232
|
+
self.browser_page = self.browser_context.pages[0]
|
|
233
|
+
else:
|
|
234
|
+
self.browser_page = self.browser_context.new_page()
|
|
235
|
+
|
|
236
|
+
# Set up network interception
|
|
237
|
+
self.browser_page.on("response", self._handle_network_response)
|
|
238
|
+
|
|
239
|
+
except Exception as e:
|
|
240
|
+
raise exceptions.BrowserError(f"Failed to initialize Camoufox: {e}")
|
|
241
|
+
|
|
242
|
+
def _close_browser(self) -> None:
|
|
243
|
+
"""Close browser if open"""
|
|
244
|
+
if self.browser:
|
|
245
|
+
try:
|
|
246
|
+
# Exit the context manager
|
|
247
|
+
self.browser.__exit__(None, None, None)
|
|
248
|
+
except Exception:
|
|
249
|
+
pass
|
|
250
|
+
self.browser = None
|
|
251
|
+
self.browser_context = None
|
|
252
|
+
self.browser_page = None
|
|
253
|
+
|
|
254
|
+
def _handle_network_response(self, response) -> None:
|
|
255
|
+
"""
|
|
256
|
+
Handle network responses and cache Google Trends API data
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
response: Playwright response object
|
|
260
|
+
"""
|
|
261
|
+
url = response.url
|
|
262
|
+
|
|
263
|
+
# Only process Google Trends API responses
|
|
264
|
+
if '/trends/api/widgetdata/' not in url:
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
# Get response body
|
|
269
|
+
body = response.body()
|
|
270
|
+
|
|
271
|
+
# Parse the response (remove Google's JSONP prefix - exactly 5 bytes)
|
|
272
|
+
if body.startswith(b")]}'\n"):
|
|
273
|
+
body = body[5:]
|
|
274
|
+
elif body.startswith(b")]}'"):
|
|
275
|
+
body = body[5:]
|
|
276
|
+
|
|
277
|
+
data = json.loads(body)
|
|
278
|
+
|
|
279
|
+
# Cache by URL pattern
|
|
280
|
+
if '/widgetdata/multiline' in url:
|
|
281
|
+
self.browser_responses_cache['interest_over_time'] = data
|
|
282
|
+
elif '/widgetdata/comparedgeo' in url:
|
|
283
|
+
self.browser_responses_cache['interest_by_region'] = data
|
|
284
|
+
elif '/widgetdata/relatedsearches' in url:
|
|
285
|
+
# keywordType is URL-encoded inside the req parameter
|
|
286
|
+
import urllib.parse
|
|
287
|
+
decoded_url = urllib.parse.unquote(url)
|
|
288
|
+
if 'keywordType":"ENTITY' in decoded_url:
|
|
289
|
+
self.browser_responses_cache['related_topics'] = data
|
|
290
|
+
elif 'keywordType":"QUERY' in decoded_url:
|
|
291
|
+
self.browser_responses_cache['related_queries'] = data
|
|
292
|
+
|
|
293
|
+
except Exception:
|
|
294
|
+
pass # Silently ignore parsing errors
|
|
295
|
+
|
|
296
|
+
def _capture_all_api_responses(self, keyword: str) -> None:
|
|
297
|
+
"""
|
|
298
|
+
Navigate once and capture ALL API responses via network interception
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
keyword: Search keyword to use
|
|
302
|
+
"""
|
|
303
|
+
if not self.browser_page:
|
|
304
|
+
raise exceptions.BrowserError("Browser not initialized")
|
|
305
|
+
|
|
306
|
+
# Clear cache
|
|
307
|
+
self.browser_responses_cache.clear()
|
|
308
|
+
|
|
309
|
+
# Build URL
|
|
310
|
+
import urllib.parse
|
|
311
|
+
encoded_keyword = urllib.parse.quote(keyword)
|
|
312
|
+
url = f"https://trends.google.com/trends/explore?date=today%201-m&q={encoded_keyword}&hl=en-GB"
|
|
313
|
+
|
|
314
|
+
try:
|
|
315
|
+
# Navigate and wait for network idle
|
|
316
|
+
self.browser_page.goto(url, wait_until='networkidle', timeout=60000)
|
|
317
|
+
|
|
318
|
+
# Give extra time for any delayed API calls
|
|
319
|
+
import time
|
|
320
|
+
time.sleep(2)
|
|
321
|
+
|
|
322
|
+
except Exception as e:
|
|
323
|
+
raise exceptions.BrowserError(f"Failed to navigate to Google Trends: {e}")
|
|
324
|
+
|
|
325
|
+
def _parse_api_response(self, response_text: str) -> Dict:
|
|
326
|
+
"""
|
|
327
|
+
Parse API response from Google Trends
|
|
328
|
+
|
|
329
|
+
Google's API responses may contain garbage prefix: ")]}',\n"
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
response_text: Raw response text
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
Parsed JSON data
|
|
336
|
+
"""
|
|
337
|
+
# Remove garbage prefix if present
|
|
338
|
+
if response_text.startswith(")]}',\n"):
|
|
339
|
+
response_text = response_text[6:]
|
|
340
|
+
elif response_text.startswith(")]}',"):
|
|
341
|
+
response_text = response_text[5:]
|
|
342
|
+
elif response_text.startswith(")]},"):
|
|
343
|
+
response_text = response_text[5:]
|
|
344
|
+
elif response_text.startswith(")]}'"):
|
|
345
|
+
response_text = response_text[4:]
|
|
346
|
+
|
|
347
|
+
try:
|
|
348
|
+
return json.loads(response_text)
|
|
349
|
+
except json.JSONDecodeError as e:
|
|
350
|
+
raise exceptions.ResponseError(f"Failed to parse API response: {e}")
|
|
351
|
+
|
|
352
|
+
def _parse_multiline_response(self, data: Dict) -> pd.DataFrame:
|
|
353
|
+
"""
|
|
354
|
+
Parse multiline API response (interest over time)
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
data: Parsed JSON response from multiline API
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
DataFrame with date index and keyword columns
|
|
361
|
+
"""
|
|
362
|
+
try:
|
|
363
|
+
timeline_data = data.get('default', {}).get('timelineData', [])
|
|
364
|
+
|
|
365
|
+
if not timeline_data:
|
|
366
|
+
return pd.DataFrame()
|
|
367
|
+
|
|
368
|
+
df = pd.DataFrame(timeline_data)
|
|
369
|
+
|
|
370
|
+
# Convert timestamps to datetime
|
|
371
|
+
df["date"] = pd.to_datetime(df["time"].astype("float64"), unit="s")
|
|
372
|
+
df = df.set_index("date").sort_index()
|
|
373
|
+
|
|
374
|
+
# Parse values
|
|
375
|
+
result_df = df["value"].apply(
|
|
376
|
+
lambda x: pd.Series(str(x).replace("[", "").replace("]", "").split(","))
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# Name columns with keywords
|
|
380
|
+
for idx, kw in enumerate(self.kw_list):
|
|
381
|
+
result_df.insert(len(result_df.columns), kw, result_df[idx].astype("int"))
|
|
382
|
+
del result_df[idx]
|
|
383
|
+
|
|
384
|
+
# Add isPartial column
|
|
385
|
+
if "isPartial" in df:
|
|
386
|
+
df["isPartial"] = df["isPartial"].where(df["isPartial"].notna(), False)
|
|
387
|
+
is_partial_df = df["isPartial"].apply(
|
|
388
|
+
lambda x: pd.Series(str(x).replace("[", "").replace("]", "").split(","))
|
|
389
|
+
)
|
|
390
|
+
is_partial_df.columns = ["isPartial"]
|
|
391
|
+
is_partial_df["isPartial"] = is_partial_df["isPartial"] == "True"
|
|
392
|
+
final_df = pd.concat([result_df, is_partial_df], axis=1)
|
|
393
|
+
else:
|
|
394
|
+
final_df = result_df
|
|
395
|
+
final_df["isPartial"] = False
|
|
396
|
+
|
|
397
|
+
return final_df
|
|
398
|
+
|
|
399
|
+
except Exception as e:
|
|
400
|
+
raise exceptions.ResponseError(f"Failed to parse multiline response: {e}")
|
|
401
|
+
|
|
402
|
+
def _parse_comparedgeo_response(self, data: Dict, inc_geo_code: bool = False) -> pd.DataFrame:
|
|
403
|
+
"""
|
|
404
|
+
Parse comparedgeo API response (interest by region)
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
data: Parsed JSON response from comparedgeo API
|
|
408
|
+
inc_geo_code: Include geographic codes in output
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
DataFrame with geographic distribution
|
|
412
|
+
"""
|
|
413
|
+
try:
|
|
414
|
+
geo_data = data.get('default', {}).get('geoMapData', [])
|
|
415
|
+
|
|
416
|
+
if not geo_data:
|
|
417
|
+
return pd.DataFrame()
|
|
418
|
+
|
|
419
|
+
df = pd.DataFrame(geo_data)
|
|
420
|
+
|
|
421
|
+
# Determine geo column name
|
|
422
|
+
geo_column = "geoCode" if "geoCode" in df.columns else "coordinates"
|
|
423
|
+
columns = ["geoName", geo_column, "value"]
|
|
424
|
+
df = df[columns].set_index("geoName").sort_index()
|
|
425
|
+
|
|
426
|
+
# Parse values
|
|
427
|
+
result_df = df["value"].apply(
|
|
428
|
+
lambda x: pd.Series(str(x).replace("[", "").replace("]", "").split(","))
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Name columns with keywords
|
|
432
|
+
for idx, kw in enumerate(self.kw_list):
|
|
433
|
+
result_df.insert(len(result_df.columns), kw, result_df[idx].astype("int"))
|
|
434
|
+
del result_df[idx]
|
|
435
|
+
|
|
436
|
+
# Add geo code if requested
|
|
437
|
+
if inc_geo_code and geo_column in df.columns:
|
|
438
|
+
result_df[geo_column] = df[geo_column]
|
|
439
|
+
|
|
440
|
+
return result_df
|
|
441
|
+
|
|
442
|
+
except Exception as e:
|
|
443
|
+
raise exceptions.ResponseError(f"Failed to parse comparedgeo response: {e}")
|
|
444
|
+
|
|
445
|
+
def _parse_relatedsearches_response(self, data: Dict) -> Dict[str, Optional[pd.DataFrame]]:
|
|
446
|
+
"""
|
|
447
|
+
Parse relatedsearches API response (related queries or topics)
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
data: Parsed JSON response from relatedsearches API
|
|
451
|
+
|
|
452
|
+
Returns:
|
|
453
|
+
Dictionary with 'top' and 'rising' DataFrames
|
|
454
|
+
"""
|
|
455
|
+
try:
|
|
456
|
+
ranked_list = data.get('default', {}).get('rankedList', [])
|
|
457
|
+
|
|
458
|
+
# Parse top
|
|
459
|
+
try:
|
|
460
|
+
top_data = ranked_list[0]['rankedKeyword']
|
|
461
|
+
if 'topic' in str(top_data[0]) if top_data else False:
|
|
462
|
+
# Topics format
|
|
463
|
+
df_top = pd.json_normalize(top_data, sep="_")
|
|
464
|
+
else:
|
|
465
|
+
# Queries format
|
|
466
|
+
df_top = pd.DataFrame(top_data)
|
|
467
|
+
df_top = df_top[["query", "value"]] if "query" in df_top.columns else df_top
|
|
468
|
+
except (KeyError, IndexError):
|
|
469
|
+
df_top = None
|
|
470
|
+
|
|
471
|
+
# Parse rising
|
|
472
|
+
try:
|
|
473
|
+
rising_data = ranked_list[1]['rankedKeyword']
|
|
474
|
+
if 'topic' in str(rising_data[0]) if rising_data else False:
|
|
475
|
+
# Topics format
|
|
476
|
+
df_rising = pd.json_normalize(rising_data, sep="_")
|
|
477
|
+
else:
|
|
478
|
+
# Queries format
|
|
479
|
+
df_rising = pd.DataFrame(rising_data)
|
|
480
|
+
df_rising = df_rising[["query", "value"]] if "query" in df_rising.columns else df_rising
|
|
481
|
+
except (KeyError, IndexError):
|
|
482
|
+
df_rising = None
|
|
483
|
+
|
|
484
|
+
return {"top": df_top, "rising": df_rising}
|
|
485
|
+
|
|
486
|
+
except Exception as e:
|
|
487
|
+
raise exceptions.ResponseError(f"Failed to parse relatedsearches response: {e}")
|
|
134
488
|
|
|
135
489
|
def _get_google_cookie(self) -> Dict[str, str]:
|
|
136
490
|
"""
|
|
@@ -334,8 +688,9 @@ class TrendReq:
|
|
|
334
688
|
# Convert req to JSON string (required by Google's API)
|
|
335
689
|
self.token_payload["req"] = json.dumps(self.token_payload["req"])
|
|
336
690
|
|
|
337
|
-
# Get tokens from Google
|
|
338
|
-
self.
|
|
691
|
+
# Get tokens from Google (skip in browser mode)
|
|
692
|
+
if not self.browser_mode:
|
|
693
|
+
self._get_tokens()
|
|
339
694
|
|
|
340
695
|
def _get_tokens(self) -> None:
|
|
341
696
|
"""
|
|
@@ -384,6 +739,35 @@ class TrendReq:
|
|
|
384
739
|
>>> df = pytrends.interest_over_time()
|
|
385
740
|
>>> print(df.head())
|
|
386
741
|
"""
|
|
742
|
+
# Browser mode: capture multiline API response
|
|
743
|
+
if self.browser_mode:
|
|
744
|
+
if len(self.kw_list) != 1:
|
|
745
|
+
raise exceptions.InvalidParameterError(
|
|
746
|
+
"Browser mode only supports 1 keyword. You provided: "
|
|
747
|
+
+ str(len(self.kw_list))
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
keyword = self.kw_list[0]
|
|
751
|
+
|
|
752
|
+
# Capture all responses if not already cached
|
|
753
|
+
if not self.browser_responses_cache:
|
|
754
|
+
self._capture_all_api_responses(keyword)
|
|
755
|
+
|
|
756
|
+
# Get cached response
|
|
757
|
+
response_data = self.browser_responses_cache.get('interest_over_time')
|
|
758
|
+
|
|
759
|
+
if not response_data:
|
|
760
|
+
# Try one more navigation if cache is empty
|
|
761
|
+
self._capture_all_api_responses(keyword)
|
|
762
|
+
response_data = self.browser_responses_cache.get('interest_over_time')
|
|
763
|
+
|
|
764
|
+
if not response_data:
|
|
765
|
+
raise exceptions.ResponseError("Failed to capture interest_over_time API response")
|
|
766
|
+
|
|
767
|
+
# Parse browser response to DataFrame
|
|
768
|
+
return self._parse_multiline_response(response_data)
|
|
769
|
+
|
|
770
|
+
# Standard mode: use widgets
|
|
387
771
|
if not self.interest_over_time_widget:
|
|
388
772
|
raise exceptions.ResponseError(
|
|
389
773
|
"No interest over time widget available. Call build_payload() first."
|
|
@@ -469,6 +853,28 @@ class TrendReq:
|
|
|
469
853
|
>>> df = pytrends.interest_by_region(resolution='REGION')
|
|
470
854
|
>>> print(df.head())
|
|
471
855
|
"""
|
|
856
|
+
# Browser mode: capture comparedgeo API response
|
|
857
|
+
if self.browser_mode:
|
|
858
|
+
if len(self.kw_list) != 1:
|
|
859
|
+
raise exceptions.InvalidParameterError(
|
|
860
|
+
"Browser mode only supports 1 keyword"
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
keyword = self.kw_list[0]
|
|
864
|
+
|
|
865
|
+
# Capture all responses if not already cached
|
|
866
|
+
if not self.browser_responses_cache:
|
|
867
|
+
self._capture_all_api_responses(keyword)
|
|
868
|
+
|
|
869
|
+
# Get cached response
|
|
870
|
+
response_data = self.browser_responses_cache.get('interest_by_region')
|
|
871
|
+
|
|
872
|
+
if not response_data:
|
|
873
|
+
raise exceptions.ResponseError("Failed to capture interest_by_region API response")
|
|
874
|
+
|
|
875
|
+
return self._parse_comparedgeo_response(response_data, inc_geo_code)
|
|
876
|
+
|
|
877
|
+
# Standard mode
|
|
472
878
|
if not self.interest_by_region_widget:
|
|
473
879
|
raise exceptions.ResponseError(
|
|
474
880
|
"No interest by region widget available. Call build_payload() first."
|
|
@@ -536,6 +942,28 @@ class TrendReq:
|
|
|
536
942
|
>>> topics = pytrends.related_topics()
|
|
537
943
|
>>> print(topics['Python']['top'].head())
|
|
538
944
|
"""
|
|
945
|
+
# Browser mode: capture relatedsearches ENTITY API response
|
|
946
|
+
if self.browser_mode:
|
|
947
|
+
if len(self.kw_list) != 1:
|
|
948
|
+
raise exceptions.InvalidParameterError(
|
|
949
|
+
"Browser mode only supports 1 keyword"
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
keyword = self.kw_list[0]
|
|
953
|
+
|
|
954
|
+
# Capture all responses if not already cached
|
|
955
|
+
if not self.browser_responses_cache:
|
|
956
|
+
self._capture_all_api_responses(keyword)
|
|
957
|
+
|
|
958
|
+
# Get cached response
|
|
959
|
+
response_data = self.browser_responses_cache.get('related_topics')
|
|
960
|
+
|
|
961
|
+
if not response_data:
|
|
962
|
+
raise exceptions.ResponseError("Failed to capture related_topics API response")
|
|
963
|
+
|
|
964
|
+
return {keyword: self._parse_relatedsearches_response(response_data)}
|
|
965
|
+
|
|
966
|
+
# Standard mode
|
|
539
967
|
if not self.related_topics_widget_list:
|
|
540
968
|
raise exceptions.ResponseError(
|
|
541
969
|
"No related topics widgets available. Call build_payload() first."
|
|
@@ -599,6 +1027,28 @@ class TrendReq:
|
|
|
599
1027
|
>>> queries = pytrends.related_queries()
|
|
600
1028
|
>>> print(queries['Python']['top'].head())
|
|
601
1029
|
"""
|
|
1030
|
+
# Browser mode: capture relatedsearches QUERY API response
|
|
1031
|
+
if self.browser_mode:
|
|
1032
|
+
if len(self.kw_list) != 1:
|
|
1033
|
+
raise exceptions.InvalidParameterError(
|
|
1034
|
+
"Browser mode only supports 1 keyword"
|
|
1035
|
+
)
|
|
1036
|
+
|
|
1037
|
+
keyword = self.kw_list[0]
|
|
1038
|
+
|
|
1039
|
+
# Capture all responses if not already cached
|
|
1040
|
+
if not self.browser_responses_cache:
|
|
1041
|
+
self._capture_all_api_responses(keyword)
|
|
1042
|
+
|
|
1043
|
+
# Get cached response
|
|
1044
|
+
response_data = self.browser_responses_cache.get('related_queries')
|
|
1045
|
+
|
|
1046
|
+
if not response_data:
|
|
1047
|
+
raise exceptions.ResponseError("Failed to capture related_queries API response")
|
|
1048
|
+
|
|
1049
|
+
return {keyword: self._parse_relatedsearches_response(response_data)}
|
|
1050
|
+
|
|
1051
|
+
# Standard mode
|
|
602
1052
|
if not self.related_queries_widget_list:
|
|
603
1053
|
raise exceptions.ResponseError(
|
|
604
1054
|
"No related queries widgets available. Call build_payload() first."
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pytrends-modern
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: Modern Google Trends API - Combining the best of pytrends, with RSS feeds, Selenium scraping, and enhanced features
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Modern Google Trends API - Combining the best of pytrends, with RSS feeds, Selenium scraping, DrissionPage browser automation, and enhanced features
|
|
5
5
|
Author: pytrends-modern contributors
|
|
6
6
|
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/yiromo/pytrends-modern
|
|
@@ -28,6 +28,9 @@ Requires-Dist: lxml>=4.9.0
|
|
|
28
28
|
Provides-Extra: selenium
|
|
29
29
|
Requires-Dist: selenium>=4.0.0; extra == "selenium"
|
|
30
30
|
Requires-Dist: webdriver-manager>=4.0.0; extra == "selenium"
|
|
31
|
+
Provides-Extra: browser
|
|
32
|
+
Requires-Dist: camoufox[geoip]>=0.4.11; extra == "browser"
|
|
33
|
+
Requires-Dist: browserforge[all]>=1.0.0; extra == "browser"
|
|
31
34
|
Provides-Extra: cli
|
|
32
35
|
Requires-Dist: click>=8.0.0; extra == "cli"
|
|
33
36
|
Requires-Dist: rich>=13.0.0; extra == "cli"
|
|
@@ -74,6 +77,9 @@ pytrends-modern is a **next-generation** Google Trends library that combines:
|
|
|
74
77
|
# Basic installation
|
|
75
78
|
pip install pytrends-modern
|
|
76
79
|
|
|
80
|
+
# With browser mode (Camoufox for bypassing rate limits)
|
|
81
|
+
pip install pytrends-modern[browser]
|
|
82
|
+
|
|
77
83
|
# With Selenium support (for advanced scraping)
|
|
78
84
|
pip install pytrends-modern[selenium]
|
|
79
85
|
|
|
@@ -112,6 +118,71 @@ related = pytrends.related_queries()
|
|
|
112
118
|
print(related['Python']['top'])
|
|
113
119
|
```
|
|
114
120
|
|
|
121
|
+
### 🦊 Browser Mode (Camoufox) - Bypass Rate Limits
|
|
122
|
+
|
|
123
|
+
**NEW!** Use Camoufox with advanced fingerprinting to bypass Google's rate limits by using your Google account:
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from pytrends_modern import TrendReq, BrowserConfig
|
|
127
|
+
from pytrends_modern.camoufox_setup import setup_profile
|
|
128
|
+
|
|
129
|
+
# First-time setup: Configure Google account login
|
|
130
|
+
setup_profile() # Opens browser - log in to Google once
|
|
131
|
+
|
|
132
|
+
# Use browser mode (persistent login, no rate limits!)
|
|
133
|
+
config = BrowserConfig(headless=False)
|
|
134
|
+
pytrends = TrendReq(browser_config=config)
|
|
135
|
+
|
|
136
|
+
# Works like normal API
|
|
137
|
+
pytrends.kw_list = ['Python']
|
|
138
|
+
df = pytrends.interest_over_time()
|
|
139
|
+
print(df.head())
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**Browser Mode Limitations:**
|
|
143
|
+
- ⚠️ Only 1 keyword at a time (no comparisons)
|
|
144
|
+
- ⚠️ Only 'today 1-m' timeframe
|
|
145
|
+
- ⚠️ Only WORLDWIDE region
|
|
146
|
+
- ✅ No rate limits (uses your Google account)
|
|
147
|
+
- ✅ Perfect anti-detection with Camoufox fingerprinting
|
|
148
|
+
|
|
149
|
+
**Setup from command line:**
|
|
150
|
+
```bash
|
|
151
|
+
# Check profile status
|
|
152
|
+
python -m pytrends_modern.camoufox_setup status
|
|
153
|
+
|
|
154
|
+
# Run setup (opens browser for Google login)
|
|
155
|
+
python -m pytrends_modern.camoufox_setup
|
|
156
|
+
|
|
157
|
+
# Export profile for Docker/other machines
|
|
158
|
+
python -m pytrends_modern.camoufox_setup export camoufox-profile.tar.gz
|
|
159
|
+
|
|
160
|
+
# Import profile on another machine
|
|
161
|
+
python -m pytrends_modern.camoufox_setup import camoufox-profile.tar.gz
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
**Docker Usage:**
|
|
165
|
+
|
|
166
|
+
Yes! You can export your profile and use it in Docker containers:
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
# 1. Export profile locally
|
|
170
|
+
python -m pytrends_modern.camoufox_setup export profile.tar.gz
|
|
171
|
+
|
|
172
|
+
# 2. Use in Dockerfile
|
|
173
|
+
COPY profile.tar.gz /tmp/
|
|
174
|
+
RUN mkdir -p /root/.config && \
|
|
175
|
+
cd /root/.config && \
|
|
176
|
+
tar -xzf /tmp/profile.tar.gz
|
|
177
|
+
|
|
178
|
+
# 3. Use headless mode in container
|
|
179
|
+
config = BrowserConfig(headless=True)
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
See `Dockerfile.example` for complete Docker setup.
|
|
183
|
+
|
|
184
|
+
⚠️ **Security**: Profile contains Google session - keep secure, don't commit to git!
|
|
185
|
+
|
|
115
186
|
### RSS Feed (Fast Real-Time Data)
|
|
116
187
|
|
|
117
188
|
```python
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
pytrends_modern/__init__.py,sha256=oMZFQnXNdW3SPnAvBQ-dfUghoLOD4oMpmqd6Gloxkvo,767
|
|
2
|
+
pytrends_modern/browser_config.py,sha256=aEznKEaAiJ1vFPYMeNc2wiyehijnleNBFoiZIXWQcRQ,3513
|
|
3
|
+
pytrends_modern/browser_config_camoufox.py,sha256=RADieQug_3-7-Pfdr_YNs929zIwaaWicyn9kAfbqh-E,2532
|
|
4
|
+
pytrends_modern/camoufox_setup.py,sha256=WkytaNt64Gro8wJUPXaoUPhesDRvz460UeHGzETR6ds,10534
|
|
5
|
+
pytrends_modern/cli.py,sha256=lwuXzFY5S7glSGL180dFkpnTcVsM7JfTzJ2vLKs8TLA,12784
|
|
6
|
+
pytrends_modern/config.py,sha256=zPCMgdAFwcrQPMYO87dLAirkxnvpvsTx9DkWZGkl8zk,5198
|
|
7
|
+
pytrends_modern/exceptions.py,sha256=3WWDiYha1Tj2Hn2TNQfAPUncdGHIJbqMLFRQjLLXm3k,1712
|
|
8
|
+
pytrends_modern/proxy_extension.py,sha256=j-Cseb5G627q2G7dRdlkhqzGUG9dhmoTi1UK5wwF3u4,2214
|
|
9
|
+
pytrends_modern/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
pytrends_modern/request.py,sha256=4_cBWmcMmzPiZE6OuCBM7me65RlHKLtOPCv6kRrQtas,46606
|
|
11
|
+
pytrends_modern/rss.py,sha256=6Qq8MsVJu9WtE5UcYlUze7EexT81Mq4af7pPFOMcDS0,11414
|
|
12
|
+
pytrends_modern/scraper.py,sha256=x1xbFjUs7ULOBlIWLZ90G6WXJwB2EYuwUo0dWmHAjEQ,9706
|
|
13
|
+
pytrends_modern/utils.py,sha256=xPf4nz4c8Mn-737PZTe6o3HChjZvIR8brIftNg0-IFE,6875
|
|
14
|
+
pytrends_modern-0.2.0.dist-info/licenses/LICENSE,sha256=4K_FiN4IB1h5rffiOC8s5Tpxiv161v0eNIQJMDbvC0o,1469
|
|
15
|
+
pytrends_modern-0.2.0.dist-info/METADATA,sha256=HruSpENf_WgzrxTaAR3HHMHPpC83VxF9Rzpw-7_aDp0,14302
|
|
16
|
+
pytrends_modern-0.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
pytrends_modern-0.2.0.dist-info/entry_points.txt,sha256=1ilOUXV2wt8NqQp2ViD-obi9k8iQANEa3eU-7S3jTgs,61
|
|
18
|
+
pytrends_modern-0.2.0.dist-info/top_level.txt,sha256=bbuIEWVfkaA-sBTKf-Dzau5Ll2zlHs21o0zWtCmQG50,16
|
|
19
|
+
pytrends_modern-0.2.0.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
pytrends_modern/__init__.py,sha256=rzZ0VT5yvZw-lJvoF4dl9GLTXWqv0A3TayTQmIBnxOc,615
|
|
2
|
-
pytrends_modern/cli.py,sha256=lwuXzFY5S7glSGL180dFkpnTcVsM7JfTzJ2vLKs8TLA,12784
|
|
3
|
-
pytrends_modern/config.py,sha256=zPCMgdAFwcrQPMYO87dLAirkxnvpvsTx9DkWZGkl8zk,5198
|
|
4
|
-
pytrends_modern/exceptions.py,sha256=3WWDiYha1Tj2Hn2TNQfAPUncdGHIJbqMLFRQjLLXm3k,1712
|
|
5
|
-
pytrends_modern/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
pytrends_modern/request.py,sha256=V_7R_Ywk_QhgW--sx3s2ScQpmvuHpXfU2LCDKZJ7GcU,28211
|
|
7
|
-
pytrends_modern/rss.py,sha256=6Qq8MsVJu9WtE5UcYlUze7EexT81Mq4af7pPFOMcDS0,11414
|
|
8
|
-
pytrends_modern/scraper.py,sha256=x1xbFjUs7ULOBlIWLZ90G6WXJwB2EYuwUo0dWmHAjEQ,9706
|
|
9
|
-
pytrends_modern/utils.py,sha256=xPf4nz4c8Mn-737PZTe6o3HChjZvIR8brIftNg0-IFE,6875
|
|
10
|
-
pytrends_modern-0.1.2.dist-info/licenses/LICENSE,sha256=4K_FiN4IB1h5rffiOC8s5Tpxiv161v0eNIQJMDbvC0o,1469
|
|
11
|
-
pytrends_modern-0.1.2.dist-info/METADATA,sha256=T5QU-DUrLhfmr-kzRv0oF6LJG6nIdu-JFCrbdm4MD5w,12156
|
|
12
|
-
pytrends_modern-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
13
|
-
pytrends_modern-0.1.2.dist-info/entry_points.txt,sha256=1ilOUXV2wt8NqQp2ViD-obi9k8iQANEa3eU-7S3jTgs,61
|
|
14
|
-
pytrends_modern-0.1.2.dist-info/top_level.txt,sha256=bbuIEWVfkaA-sBTKf-Dzau5Ll2zlHs21o0zWtCmQG50,16
|
|
15
|
-
pytrends_modern-0.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|