crawlforge-mcp-server 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +315 -0
- package/LICENSE +21 -0
- package/README.md +181 -0
- package/package.json +115 -0
- package/server.js +1963 -0
- package/setup.js +112 -0
- package/src/constants/config.js +615 -0
- package/src/core/ActionExecutor.js +1104 -0
- package/src/core/AlertNotificationSystem.js +601 -0
- package/src/core/AuthManager.js +315 -0
- package/src/core/ChangeTracker.js +2306 -0
- package/src/core/JobManager.js +687 -0
- package/src/core/LLMsTxtAnalyzer.js +753 -0
- package/src/core/LocalizationManager.js +1615 -0
- package/src/core/PerformanceManager.js +828 -0
- package/src/core/ResearchOrchestrator.js +1327 -0
- package/src/core/SnapshotManager.js +1037 -0
- package/src/core/StealthBrowserManager.js +1795 -0
- package/src/core/WebhookDispatcher.js +745 -0
- package/src/core/analysis/ContentAnalyzer.js +749 -0
- package/src/core/analysis/LinkAnalyzer.js +972 -0
- package/src/core/cache/CacheManager.js +821 -0
- package/src/core/connections/ConnectionPool.js +553 -0
- package/src/core/crawlers/BFSCrawler.js +845 -0
- package/src/core/integrations/PerformanceIntegration.js +377 -0
- package/src/core/llm/AnthropicProvider.js +135 -0
- package/src/core/llm/LLMManager.js +415 -0
- package/src/core/llm/LLMProvider.js +97 -0
- package/src/core/llm/OpenAIProvider.js +127 -0
- package/src/core/processing/BrowserProcessor.js +986 -0
- package/src/core/processing/ContentProcessor.js +505 -0
- package/src/core/processing/PDFProcessor.js +448 -0
- package/src/core/processing/StreamProcessor.js +673 -0
- package/src/core/queue/QueueManager.js +98 -0
- package/src/core/workers/WorkerPool.js +585 -0
- package/src/core/workers/worker.js +743 -0
- package/src/monitoring/healthCheck.js +600 -0
- package/src/monitoring/metrics.js +761 -0
- package/src/optimization/wave3-optimizations.js +932 -0
- package/src/security/security-patches.js +120 -0
- package/src/security/security-tests.js +355 -0
- package/src/security/wave3-security.js +652 -0
- package/src/tools/advanced/BatchScrapeTool.js +1089 -0
- package/src/tools/advanced/ScrapeWithActionsTool.js +669 -0
- package/src/tools/crawl/crawlDeep.js +449 -0
- package/src/tools/crawl/mapSite.js +400 -0
- package/src/tools/extract/analyzeContent.js +624 -0
- package/src/tools/extract/extractContent.js +329 -0
- package/src/tools/extract/processDocument.js +503 -0
- package/src/tools/extract/summarizeContent.js +376 -0
- package/src/tools/llmstxt/generateLLMsTxt.js +570 -0
- package/src/tools/research/deepResearch.js +706 -0
- package/src/tools/search/adapters/duckduckgoSearch.js +398 -0
- package/src/tools/search/adapters/googleSearch.js +236 -0
- package/src/tools/search/adapters/searchProviderFactory.js +96 -0
- package/src/tools/search/queryExpander.js +543 -0
- package/src/tools/search/ranking/ResultDeduplicator.js +676 -0
- package/src/tools/search/ranking/ResultRanker.js +497 -0
- package/src/tools/search/searchWeb.js +482 -0
- package/src/tools/tracking/trackChanges.js +1355 -0
- package/src/utils/CircuitBreaker.js +515 -0
- package/src/utils/ErrorHandlingConfig.js +342 -0
- package/src/utils/HumanBehaviorSimulator.js +569 -0
- package/src/utils/Logger.js +568 -0
- package/src/utils/MemoryMonitor.js +173 -0
- package/src/utils/RetryManager.js +386 -0
- package/src/utils/contentUtils.js +588 -0
- package/src/utils/domainFilter.js +612 -0
- package/src/utils/inputValidation.js +766 -0
- package/src/utils/rateLimiter.js +196 -0
- package/src/utils/robotsChecker.js +91 -0
- package/src/utils/securityMiddleware.js +416 -0
- package/src/utils/sitemapParser.js +678 -0
- package/src/utils/ssrfProtection.js +640 -0
- package/src/utils/urlNormalizer.js +168 -0
|
@@ -0,0 +1,1615 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LocalizationManager - Location/Language Settings Management
|
|
3
|
+
* Handles country-specific settings, browser locale emulation,
|
|
4
|
+
* timezone spoofing, and geo-blocked content handling
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { z } from 'zod';
|
|
8
|
+
import { EventEmitter } from 'events';
|
|
9
|
+
|
|
10
|
+
// ISO 3166-1 alpha-2 country codes with associated settings (Expanded to 15+ countries)
|
|
11
|
+
const SUPPORTED_COUNTRIES = {
|
|
12
|
+
'US': { timezone: 'America/New_York', currency: 'USD', language: 'en-US', searchDomain: 'google.com', isRTL: false, proxyRegion: 'us-east', countryName: 'United States' },
|
|
13
|
+
'GB': { timezone: 'Europe/London', currency: 'GBP', language: 'en-GB', searchDomain: 'google.co.uk', isRTL: false, proxyRegion: 'eu-west', countryName: 'United Kingdom' },
|
|
14
|
+
'DE': { timezone: 'Europe/Berlin', currency: 'EUR', language: 'de-DE', searchDomain: 'google.de', isRTL: false, proxyRegion: 'eu-central', countryName: 'Germany' },
|
|
15
|
+
'FR': { timezone: 'Europe/Paris', currency: 'EUR', language: 'fr-FR', searchDomain: 'google.fr', isRTL: false, proxyRegion: 'eu-west', countryName: 'France' },
|
|
16
|
+
'JP': { timezone: 'Asia/Tokyo', currency: 'JPY', language: 'ja-JP', searchDomain: 'google.co.jp', isRTL: false, proxyRegion: 'asia-pacific', countryName: 'Japan' },
|
|
17
|
+
'CN': { timezone: 'Asia/Shanghai', currency: 'CNY', language: 'zh-CN', searchDomain: 'baidu.com', isRTL: false, proxyRegion: 'asia-pacific', countryName: 'China' },
|
|
18
|
+
'AU': { timezone: 'Australia/Sydney', currency: 'AUD', language: 'en-AU', searchDomain: 'google.com.au', isRTL: false, proxyRegion: 'asia-pacific', countryName: 'Australia' },
|
|
19
|
+
'CA': { timezone: 'America/Toronto', currency: 'CAD', language: 'en-CA', searchDomain: 'google.ca', isRTL: false, proxyRegion: 'us-east', countryName: 'Canada' },
|
|
20
|
+
'IT': { timezone: 'Europe/Rome', currency: 'EUR', language: 'it-IT', searchDomain: 'google.it', isRTL: false, proxyRegion: 'eu-central', countryName: 'Italy' },
|
|
21
|
+
'ES': { timezone: 'Europe/Madrid', currency: 'EUR', language: 'es-ES', searchDomain: 'google.es', isRTL: false, proxyRegion: 'eu-west', countryName: 'Spain' },
|
|
22
|
+
'RU': { timezone: 'Europe/Moscow', currency: 'RUB', language: 'ru-RU', searchDomain: 'yandex.ru', isRTL: false, proxyRegion: 'eu-east', countryName: 'Russia' },
|
|
23
|
+
'BR': { timezone: 'America/Sao_Paulo', currency: 'BRL', language: 'pt-BR', searchDomain: 'google.com.br', isRTL: false, proxyRegion: 'south-america', countryName: 'Brazil' },
|
|
24
|
+
'IN': { timezone: 'Asia/Kolkata', currency: 'INR', language: 'hi-IN', searchDomain: 'google.co.in', isRTL: false, proxyRegion: 'asia-pacific', countryName: 'India' },
|
|
25
|
+
'KR': { timezone: 'Asia/Seoul', currency: 'KRW', language: 'ko-KR', searchDomain: 'google.co.kr', isRTL: false, proxyRegion: 'asia-pacific', countryName: 'South Korea' },
|
|
26
|
+
'MX': { timezone: 'America/Mexico_City', currency: 'MXN', language: 'es-MX', searchDomain: 'google.com.mx', isRTL: false, proxyRegion: 'north-america', countryName: 'Mexico' },
|
|
27
|
+
'NL': { timezone: 'Europe/Amsterdam', currency: 'EUR', language: 'nl-NL', searchDomain: 'google.nl', isRTL: false, proxyRegion: 'eu-west', countryName: 'Netherlands' },
|
|
28
|
+
'SE': { timezone: 'Europe/Stockholm', currency: 'SEK', language: 'sv-SE', searchDomain: 'google.se', isRTL: false, proxyRegion: 'eu-north', countryName: 'Sweden' },
|
|
29
|
+
'NO': { timezone: 'Europe/Oslo', currency: 'NOK', language: 'nb-NO', searchDomain: 'google.no', isRTL: false, proxyRegion: 'eu-north', countryName: 'Norway' },
|
|
30
|
+
'SA': { timezone: 'Asia/Riyadh', currency: 'SAR', language: 'ar-SA', searchDomain: 'google.com.sa', isRTL: true, proxyRegion: 'middle-east', countryName: 'Saudi Arabia' },
|
|
31
|
+
'AE': { timezone: 'Asia/Dubai', currency: 'AED', language: 'ar-AE', searchDomain: 'google.ae', isRTL: true, proxyRegion: 'middle-east', countryName: 'United Arab Emirates' },
|
|
32
|
+
'TR': { timezone: 'Europe/Istanbul', currency: 'TRY', language: 'tr-TR', searchDomain: 'google.com.tr', isRTL: false, proxyRegion: 'eu-east', countryName: 'Turkey' },
|
|
33
|
+
'IL': { timezone: 'Asia/Jerusalem', currency: 'ILS', language: 'he-IL', searchDomain: 'google.co.il', isRTL: true, proxyRegion: 'middle-east', countryName: 'Israel' },
|
|
34
|
+
'TH': { timezone: 'Asia/Bangkok', currency: 'THB', language: 'th-TH', searchDomain: 'google.co.th', isRTL: false, proxyRegion: 'asia-pacific', countryName: 'Thailand' },
|
|
35
|
+
'SG': { timezone: 'Asia/Singapore', currency: 'SGD', language: 'en-SG', searchDomain: 'google.com.sg', isRTL: false, proxyRegion: 'asia-pacific', countryName: 'Singapore' },
|
|
36
|
+
'PL': { timezone: 'Europe/Warsaw', currency: 'PLN', language: 'pl-PL', searchDomain: 'google.pl', isRTL: false, proxyRegion: 'eu-central', countryName: 'Poland' },
|
|
37
|
+
'ZA': { timezone: 'Africa/Johannesburg', currency: 'ZAR', language: 'en-ZA', searchDomain: 'google.co.za', isRTL: false, proxyRegion: 'africa', countryName: 'South Africa' }
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
// Common language mappings for Accept-Language headers with cultural preferences
|
|
41
|
+
const LANGUAGE_MAPPINGS = {
|
|
42
|
+
'en': 'en-US,en;q=0.9',
|
|
43
|
+
'es': 'es-ES,es;q=0.9,en;q=0.8',
|
|
44
|
+
'fr': 'fr-FR,fr;q=0.9,en;q=0.8',
|
|
45
|
+
'de': 'de-DE,de;q=0.9,en;q=0.8',
|
|
46
|
+
'it': 'it-IT,it;q=0.9,en;q=0.8',
|
|
47
|
+
'pt': 'pt-BR,pt;q=0.9,en;q=0.8',
|
|
48
|
+
'ru': 'ru-RU,ru;q=0.9,en;q=0.8',
|
|
49
|
+
'ja': 'ja-JP,ja;q=0.9,en;q=0.8',
|
|
50
|
+
'ko': 'ko-KR,ko;q=0.9,en;q=0.8',
|
|
51
|
+
'zh': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
52
|
+
'hi': 'hi-IN,hi;q=0.9,en;q=0.8',
|
|
53
|
+
'ar': 'ar-SA,ar;q=0.9,en;q=0.8',
|
|
54
|
+
'he': 'he-IL,he;q=0.9,en;q=0.8',
|
|
55
|
+
'tr': 'tr-TR,tr;q=0.9,en;q=0.8',
|
|
56
|
+
'th': 'th-TH,th;q=0.9,en;q=0.8',
|
|
57
|
+
'pl': 'pl-PL,pl;q=0.9,en;q=0.8',
|
|
58
|
+
'nl': 'nl-NL,nl;q=0.9,en;q=0.8',
|
|
59
|
+
'sv': 'sv-SE,sv;q=0.9,en;q=0.8',
|
|
60
|
+
'nb': 'nb-NO,nb;q=0.9,en;q=0.8'
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
// RTL Languages Configuration
|
|
64
|
+
const RTL_LANGUAGES = new Set(['ar', 'he', 'fa', 'ur', 'ku', 'dv']);
|
|
65
|
+
|
|
66
|
+
// Proxy Provider Configuration
|
|
67
|
+
const PROXY_PROVIDERS = {
|
|
68
|
+
regions: {
|
|
69
|
+
'us-east': { endpoint: 'proxy-us-east.example.com', port: 8080 },
|
|
70
|
+
'us-west': { endpoint: 'proxy-us-west.example.com', port: 8080 },
|
|
71
|
+
'eu-west': { endpoint: 'proxy-eu-west.example.com', port: 8080 },
|
|
72
|
+
'eu-central': { endpoint: 'proxy-eu-central.example.com', port: 8080 },
|
|
73
|
+
'eu-north': { endpoint: 'proxy-eu-north.example.com', port: 8080 },
|
|
74
|
+
'eu-east': { endpoint: 'proxy-eu-east.example.com', port: 8080 },
|
|
75
|
+
'asia-pacific': { endpoint: 'proxy-asia-pacific.example.com', port: 8080 },
|
|
76
|
+
'middle-east': { endpoint: 'proxy-middle-east.example.com', port: 8080 },
|
|
77
|
+
'south-america': { endpoint: 'proxy-south-america.example.com', port: 8080 },
|
|
78
|
+
'north-america': { endpoint: 'proxy-north-america.example.com', port: 8080 },
|
|
79
|
+
'africa': { endpoint: 'proxy-africa.example.com', port: 8080 }
|
|
80
|
+
},
|
|
81
|
+
fallbackStrategies: {
|
|
82
|
+
'geo-blocked': ['rotate-proxy', 'change-user-agent', 'delay-request'],
|
|
83
|
+
'rate-limited': ['change-proxy', 'exponential-backoff'],
|
|
84
|
+
'detection': ['rotate-fingerprint', 'change-proxy', 'human-delay']
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
// Translation Service Configuration
|
|
89
|
+
const TRANSLATION_SERVICES = {
|
|
90
|
+
google: {
|
|
91
|
+
enabled: process.env.GOOGLE_TRANSLATE_API_KEY ? true : false,
|
|
92
|
+
apiKey: process.env.GOOGLE_TRANSLATE_API_KEY,
|
|
93
|
+
endpoint: 'https://translation.googleapis.com/language/translate/v2'
|
|
94
|
+
},
|
|
95
|
+
azure: {
|
|
96
|
+
enabled: process.env.AZURE_TRANSLATE_KEY ? true : false,
|
|
97
|
+
key: process.env.AZURE_TRANSLATE_KEY,
|
|
98
|
+
region: process.env.AZURE_TRANSLATE_REGION || 'global',
|
|
99
|
+
endpoint: 'https://api.cognitive.microsofttranslator.com/translate'
|
|
100
|
+
},
|
|
101
|
+
libre: {
|
|
102
|
+
enabled: process.env.LIBRE_TRANSLATE_URL ? true : false,
|
|
103
|
+
url: process.env.LIBRE_TRANSLATE_URL,
|
|
104
|
+
apiKey: process.env.LIBRE_TRANSLATE_API_KEY
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
const LocalizationSchema = z.object({
|
|
109
|
+
countryCode: z.string().length(2).optional(),
|
|
110
|
+
language: z.string().optional(),
|
|
111
|
+
timezone: z.string().optional(),
|
|
112
|
+
currency: z.string().length(3).optional(),
|
|
113
|
+
customHeaders: z.record(z.string()).optional(),
|
|
114
|
+
userAgent: z.string().optional(),
|
|
115
|
+
acceptLanguage: z.string().optional(),
|
|
116
|
+
geoLocation: z.object({
|
|
117
|
+
latitude: z.number().min(-90).max(90),
|
|
118
|
+
longitude: z.number().min(-180).max(180),
|
|
119
|
+
accuracy: z.number().min(1).max(100).optional()
|
|
120
|
+
}).optional(),
|
|
121
|
+
proxySettings: z.object({
|
|
122
|
+
enabled: z.boolean().default(false),
|
|
123
|
+
region: z.string().optional(), // Proxy region preference
|
|
124
|
+
type: z.enum(['http', 'https', 'socks4', 'socks5']).default('https'),
|
|
125
|
+
server: z.string().optional(),
|
|
126
|
+
port: z.number().optional(),
|
|
127
|
+
username: z.string().optional(),
|
|
128
|
+
password: z.string().optional(),
|
|
129
|
+
rotation: z.object({
|
|
130
|
+
enabled: z.boolean().default(false),
|
|
131
|
+
interval: z.number().default(300000), // 5 minutes
|
|
132
|
+
strategy: z.enum(['round-robin', 'random', 'failover']).default('round-robin')
|
|
133
|
+
}).optional(),
|
|
134
|
+
fallback: z.object({
|
|
135
|
+
enabled: z.boolean().default(true),
|
|
136
|
+
maxRetries: z.number().default(3),
|
|
137
|
+
timeout: z.number().default(10000)
|
|
138
|
+
}).optional()
|
|
139
|
+
}).optional(),
|
|
140
|
+
dnsSettings: z.object({
|
|
141
|
+
enabled: z.boolean().default(false),
|
|
142
|
+
servers: z.array(z.string()).optional(),
|
|
143
|
+
preferredCountry: z.string().length(2).optional(),
|
|
144
|
+
dnsOverHttps: z.boolean().default(false),
|
|
145
|
+
customResolvers: z.record(z.string()).optional() // domain -> IP mappings
|
|
146
|
+
}).optional(),
|
|
147
|
+
translationSettings: z.object({
|
|
148
|
+
enabled: z.boolean().default(false),
|
|
149
|
+
targetLanguage: z.string().optional(),
|
|
150
|
+
provider: z.enum(['google', 'azure', 'libre']).default('google'),
|
|
151
|
+
autoDetect: z.boolean().default(true),
|
|
152
|
+
preserveFormatting: z.boolean().default(true)
|
|
153
|
+
}).optional(),
|
|
154
|
+
culturalSettings: z.object({
|
|
155
|
+
dateFormat: z.string().optional(),
|
|
156
|
+
numberFormat: z.string().optional(),
|
|
157
|
+
currencyDisplay: z.enum(['symbol', 'narrowSymbol', 'code', 'name']).default('symbol'),
|
|
158
|
+
firstDayOfWeek: z.number().min(0).max(6).optional(),
|
|
159
|
+
timeFormat: z.enum(['12h', '24h']).optional(),
|
|
160
|
+
measurementSystem: z.enum(['metric', 'imperial']).optional()
|
|
161
|
+
}).optional()
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
const BrowserLocaleSchema = z.object({
|
|
165
|
+
languages: z.array(z.string()),
|
|
166
|
+
timezone: z.string(),
|
|
167
|
+
locale: z.string(),
|
|
168
|
+
currency: z.string(),
|
|
169
|
+
dateFormat: z.string(),
|
|
170
|
+
numberFormat: z.string(),
|
|
171
|
+
firstDayOfWeek: z.number().min(0).max(6),
|
|
172
|
+
isRTL: z.boolean().default(false),
|
|
173
|
+
measurementSystem: z.string().optional(),
|
|
174
|
+
timeFormat: z.string().optional(),
|
|
175
|
+
currencyDisplay: z.string().optional(),
|
|
176
|
+
textDirection: z.enum(['ltr', 'rtl']).default('ltr')
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
export class LocalizationManager extends EventEmitter {
|
|
180
|
+
constructor(options = {}) {
|
|
181
|
+
super();
|
|
182
|
+
|
|
183
|
+
this.defaultSettings = {
|
|
184
|
+
countryCode: 'US',
|
|
185
|
+
language: 'en-US',
|
|
186
|
+
timezone: 'America/New_York',
|
|
187
|
+
currency: 'USD',
|
|
188
|
+
customHeaders: {},
|
|
189
|
+
geoBlockingBypass: true,
|
|
190
|
+
dynamicFingerprinting: true,
|
|
191
|
+
enableProxyRotation: false,
|
|
192
|
+
enableTranslation: false
|
|
193
|
+
};
|
|
194
|
+
|
|
195
|
+
this.currentSettings = { ...this.defaultSettings, ...options };
|
|
196
|
+
this.localeCache = new Map();
|
|
197
|
+
this.geoLocationCache = new Map();
|
|
198
|
+
this.timezoneCache = new Map();
|
|
199
|
+
this.proxyCache = new Map();
|
|
200
|
+
this.translationCache = new Map();
|
|
201
|
+
|
|
202
|
+
// Proxy management
|
|
203
|
+
this.proxyManager = {
|
|
204
|
+
activeProxies: new Map(),
|
|
205
|
+
currentProxy: null,
|
|
206
|
+
rotationIndex: 0,
|
|
207
|
+
lastRotation: 0,
|
|
208
|
+
failedProxies: new Set(),
|
|
209
|
+
healthChecks: new Map()
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
// Translation services
|
|
213
|
+
this.translationProviders = new Map();
|
|
214
|
+
this.languageDetector = null;
|
|
215
|
+
|
|
216
|
+
// Cultural browsing patterns
|
|
217
|
+
this.culturalPatterns = new Map();
|
|
218
|
+
|
|
219
|
+
// Statistics tracking
|
|
220
|
+
this.stats = {
|
|
221
|
+
localizationApplied: 0,
|
|
222
|
+
geoBlocksBypass: 0,
|
|
223
|
+
timezoneChanges: 0,
|
|
224
|
+
languageDetections: 0,
|
|
225
|
+
proxyUsage: 0,
|
|
226
|
+
proxyRotations: 0,
|
|
227
|
+
dnsOverrides: 0,
|
|
228
|
+
translationRequests: 0,
|
|
229
|
+
culturalAdaptations: 0,
|
|
230
|
+
lastUpdated: Date.now()
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
this.initialize();
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
async initialize() {
|
|
237
|
+
try {
|
|
238
|
+
// Pre-populate timezone mappings
|
|
239
|
+
await this.loadTimezoneData();
|
|
240
|
+
|
|
241
|
+
// Initialize geo-location data
|
|
242
|
+
await this.loadGeoLocationData();
|
|
243
|
+
|
|
244
|
+
// Initialize proxy configurations
|
|
245
|
+
await this.initializeProxySystem();
|
|
246
|
+
|
|
247
|
+
// Initialize translation services
|
|
248
|
+
await this.initializeTranslationServices();
|
|
249
|
+
|
|
250
|
+
// Load cultural browsing patterns
|
|
251
|
+
await this.loadCulturalPatterns();
|
|
252
|
+
|
|
253
|
+
// Setup periodic health checks
|
|
254
|
+
this.setupHealthChecks();
|
|
255
|
+
|
|
256
|
+
this.emit('initialized');
|
|
257
|
+
} catch (error) {
|
|
258
|
+
this.emit('error', {
|
|
259
|
+
type: 'initialization_failed',
|
|
260
|
+
error: error.message
|
|
261
|
+
});
|
|
262
|
+
throw error;
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Configure localization settings for a specific country
|
|
268
|
+
* @param {string} countryCode - ISO 3166-1 alpha-2 country code
|
|
269
|
+
* @param {Object} options - Additional localization options
|
|
270
|
+
* @returns {Object} - Complete localization configuration
|
|
271
|
+
*/
|
|
272
|
+
async configureCountry(countryCode, options = {}) {
|
|
273
|
+
const validatedInput = LocalizationSchema.parse({
|
|
274
|
+
countryCode: countryCode.toUpperCase(),
|
|
275
|
+
...options
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
if (!SUPPORTED_COUNTRIES[validatedInput.countryCode]) {
|
|
279
|
+
throw new Error(`Unsupported country code: ${validatedInput.countryCode}`);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const countryData = SUPPORTED_COUNTRIES[validatedInput.countryCode];
|
|
283
|
+
|
|
284
|
+
// Merge country defaults with custom options
|
|
285
|
+
const localizationConfig = {
|
|
286
|
+
countryCode: validatedInput.countryCode,
|
|
287
|
+
language: validatedInput.language || countryData.language,
|
|
288
|
+
timezone: validatedInput.timezone || countryData.timezone,
|
|
289
|
+
currency: validatedInput.currency || countryData.currency,
|
|
290
|
+
searchDomain: countryData.searchDomain,
|
|
291
|
+
acceptLanguage: this.buildAcceptLanguageHeader(validatedInput.language || countryData.language),
|
|
292
|
+
customHeaders: validatedInput.customHeaders || {},
|
|
293
|
+
geoLocation: validatedInput.geoLocation,
|
|
294
|
+
proxySettings: validatedInput.proxySettings,
|
|
295
|
+
dnsSettings: validatedInput.dnsSettings
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
// Generate browser locale configuration
|
|
299
|
+
const browserLocale = await this.generateBrowserLocale(localizationConfig);
|
|
300
|
+
|
|
301
|
+
// Cache the configuration
|
|
302
|
+
const cacheKey = `${countryCode}-${JSON.stringify(options)}`;
|
|
303
|
+
this.localeCache.set(cacheKey, {
|
|
304
|
+
...localizationConfig,
|
|
305
|
+
browserLocale,
|
|
306
|
+
createdAt: Date.now()
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
this.currentSettings = localizationConfig;
|
|
310
|
+
this.stats.localizationApplied++;
|
|
311
|
+
|
|
312
|
+
this.emit('countryConfigured', countryCode, localizationConfig);
|
|
313
|
+
|
|
314
|
+
return {
|
|
315
|
+
...localizationConfig,
|
|
316
|
+
browserLocale
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
/**
|
|
321
|
+
* Generate browser locale emulation settings
|
|
322
|
+
* @param {Object} localizationConfig - Localization configuration
|
|
323
|
+
* @returns {Object} - Browser locale settings
|
|
324
|
+
*/
|
|
325
|
+
async generateBrowserLocale(localizationConfig) {
|
|
326
|
+
const { language, timezone, currency, countryCode } = localizationConfig;
|
|
327
|
+
|
|
328
|
+
// Extract language code from full locale
|
|
329
|
+
const langCode = language.split('-')[0];
|
|
330
|
+
const countryConfig = SUPPORTED_COUNTRIES[countryCode];
|
|
331
|
+
|
|
332
|
+
// Determine text direction and RTL support
|
|
333
|
+
const isRTL = RTL_LANGUAGES.has(langCode) || countryConfig?.isRTL;
|
|
334
|
+
|
|
335
|
+
// Generate comprehensive browser locale with RTL support
|
|
336
|
+
const browserLocale = {
|
|
337
|
+
languages: [language, langCode, 'en'],
|
|
338
|
+
timezone: timezone,
|
|
339
|
+
locale: language,
|
|
340
|
+
currency: currency,
|
|
341
|
+
dateFormat: this.getDateFormat(countryCode),
|
|
342
|
+
numberFormat: this.getNumberFormat(countryCode),
|
|
343
|
+
firstDayOfWeek: this.getFirstDayOfWeek(countryCode),
|
|
344
|
+
isRTL: isRTL,
|
|
345
|
+
textDirection: isRTL ? 'rtl' : 'ltr',
|
|
346
|
+
measurementSystem: this.getMeasurementSystem(countryCode),
|
|
347
|
+
timeFormat: this.getTimeFormat(countryCode),
|
|
348
|
+
currencyDisplay: this.getCurrencyDisplay(countryCode),
|
|
349
|
+
|
|
350
|
+
// Additional browser properties
|
|
351
|
+
screen: this.generateScreenProperties(countryCode),
|
|
352
|
+
navigator: await this.generateNavigatorProperties(localizationConfig),
|
|
353
|
+
intl: this.generateIntlProperties(language, countryCode),
|
|
354
|
+
|
|
355
|
+
// Cultural browsing behavior
|
|
356
|
+
culturalBehavior: this.getCulturalBehavior(countryCode)
|
|
357
|
+
};
|
|
358
|
+
|
|
359
|
+
return BrowserLocaleSchema.parse(browserLocale);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Apply localization to search query parameters
|
|
364
|
+
* @param {Object} searchParams - Original search parameters
|
|
365
|
+
* @param {string} countryCode - Target country code
|
|
366
|
+
* @returns {Object} - Localized search parameters
|
|
367
|
+
*/
|
|
368
|
+
async localizeSearchQuery(searchParams, countryCode = null) {
|
|
369
|
+
const targetCountry = countryCode || this.currentSettings.countryCode;
|
|
370
|
+
const config = await this.getLocalizationConfig(targetCountry);
|
|
371
|
+
|
|
372
|
+
const localizedParams = {
|
|
373
|
+
...searchParams,
|
|
374
|
+
|
|
375
|
+
// Apply language localization
|
|
376
|
+
lang: config.language.split('-')[0],
|
|
377
|
+
cr: `country${targetCountry}`, // Country restrict
|
|
378
|
+
lr: `lang_${config.language.split('-')[0]}`, // Language restrict
|
|
379
|
+
|
|
380
|
+
// Apply regional search domain
|
|
381
|
+
searchDomain: config.searchDomain,
|
|
382
|
+
|
|
383
|
+
// Add geo-location hints
|
|
384
|
+
uule: this.encodeLocationString(targetCountry),
|
|
385
|
+
|
|
386
|
+
// Custom headers for the request
|
|
387
|
+
headers: {
|
|
388
|
+
'Accept-Language': config.acceptLanguage,
|
|
389
|
+
'X-Forwarded-For': await this.getProxyIP(targetCountry),
|
|
390
|
+
...config.customHeaders,
|
|
391
|
+
...searchParams.headers
|
|
392
|
+
}
|
|
393
|
+
};
|
|
394
|
+
|
|
395
|
+
this.emit('searchQueryLocalized', targetCountry, localizedParams);
|
|
396
|
+
|
|
397
|
+
return localizedParams;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Apply localization to browser context
|
|
402
|
+
* @param {Object} browserOptions - Browser configuration options
|
|
403
|
+
* @param {string} countryCode - Target country code
|
|
404
|
+
* @returns {Object} - Localized browser options
|
|
405
|
+
*/
|
|
406
|
+
async localizeBrowserContext(browserOptions, countryCode = null) {
|
|
407
|
+
const targetCountry = countryCode || this.currentSettings.countryCode;
|
|
408
|
+
const config = await this.getLocalizationConfig(targetCountry);
|
|
409
|
+
|
|
410
|
+
const localizedOptions = {
|
|
411
|
+
...browserOptions,
|
|
412
|
+
|
|
413
|
+
// Set locale and timezone
|
|
414
|
+
locale: config.language,
|
|
415
|
+
timezoneId: config.timezone,
|
|
416
|
+
|
|
417
|
+
// Configure geolocation
|
|
418
|
+
geolocation: config.geoLocation || await this.getDefaultGeoLocation(targetCountry),
|
|
419
|
+
|
|
420
|
+
// Set HTTP headers
|
|
421
|
+
extraHTTPHeaders: {
|
|
422
|
+
'Accept-Language': config.acceptLanguage,
|
|
423
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
424
|
+
'Cache-Control': 'no-cache',
|
|
425
|
+
'DNT': '1',
|
|
426
|
+
...config.customHeaders,
|
|
427
|
+
...browserOptions.extraHTTPHeaders
|
|
428
|
+
},
|
|
429
|
+
|
|
430
|
+
// Configure user agent
|
|
431
|
+
userAgent: config.userAgent || this.generateUserAgent(targetCountry),
|
|
432
|
+
|
|
433
|
+
// Proxy configuration
|
|
434
|
+
proxy: config.proxySettings?.enabled ? {
|
|
435
|
+
server: `${config.proxySettings.server}:${config.proxySettings.port}`,
|
|
436
|
+
username: config.proxySettings.username,
|
|
437
|
+
password: config.proxySettings.password
|
|
438
|
+
} : undefined
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
// Apply browser fingerprinting adjustments
|
|
442
|
+
if (this.currentSettings.dynamicFingerprinting) {
|
|
443
|
+
localizedOptions.fingerprint = await this.generateFingerprint(targetCountry);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
this.stats.localizationApplied++;
|
|
447
|
+
this.emit('browserContextLocalized', targetCountry, localizedOptions);
|
|
448
|
+
|
|
449
|
+
return localizedOptions;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
/**
|
|
453
|
+
* Generate JavaScript code to inject timezone and locale overrides
|
|
454
|
+
* @param {string} countryCode - Target country code
|
|
455
|
+
* @returns {string} - JavaScript injection code
|
|
456
|
+
*/
|
|
457
|
+
async generateTimezoneSpoof(countryCode = null) {
|
|
458
|
+
const targetCountry = countryCode || this.currentSettings.countryCode;
|
|
459
|
+
const config = await this.getLocalizationConfig(targetCountry);
|
|
460
|
+
|
|
461
|
+
const timezoneOffset = this.getTimezoneOffset(config.timezone);
|
|
462
|
+
|
|
463
|
+
const injectionScript = `
|
|
464
|
+
// Override timezone and locale detection
|
|
465
|
+
(function() {
|
|
466
|
+
const originalDate = Date;
|
|
467
|
+
const targetTimezone = '${config.timezone}';
|
|
468
|
+
const timezoneOffset = ${timezoneOffset};
|
|
469
|
+
const targetLocale = '${config.language}';
|
|
470
|
+
|
|
471
|
+
// Override Date object
|
|
472
|
+
Date = function(...args) {
|
|
473
|
+
if (args.length === 0) {
|
|
474
|
+
const now = new originalDate();
|
|
475
|
+
now.setTime(now.getTime() + (now.getTimezoneOffset() + timezoneOffset) * 60000);
|
|
476
|
+
return now;
|
|
477
|
+
}
|
|
478
|
+
return new originalDate(...args);
|
|
479
|
+
};
|
|
480
|
+
|
|
481
|
+
// Copy static methods
|
|
482
|
+
Object.setPrototypeOf(Date, originalDate);
|
|
483
|
+
Object.getOwnPropertyNames(originalDate).forEach(name => {
|
|
484
|
+
if (name !== 'prototype' && name !== 'name' && name !== 'length') {
|
|
485
|
+
Date[name] = originalDate[name];
|
|
486
|
+
}
|
|
487
|
+
});
|
|
488
|
+
|
|
489
|
+
Date.prototype = originalDate.prototype;
|
|
490
|
+
|
|
491
|
+
// Override timezone methods
|
|
492
|
+
Date.prototype.getTimezoneOffset = function() {
|
|
493
|
+
return -timezoneOffset;
|
|
494
|
+
};
|
|
495
|
+
|
|
496
|
+
// Override Intl.DateTimeFormat
|
|
497
|
+
const originalIntlDateTimeFormat = Intl.DateTimeFormat;
|
|
498
|
+
Intl.DateTimeFormat = function(locales, options) {
|
|
499
|
+
return new originalIntlDateTimeFormat(targetLocale, {
|
|
500
|
+
...options,
|
|
501
|
+
timeZone: targetTimezone
|
|
502
|
+
});
|
|
503
|
+
};
|
|
504
|
+
|
|
505
|
+
// Override navigator.language
|
|
506
|
+
Object.defineProperty(navigator, 'language', {
|
|
507
|
+
get: () => targetLocale
|
|
508
|
+
});
|
|
509
|
+
|
|
510
|
+
Object.defineProperty(navigator, 'languages', {
|
|
511
|
+
get: () => ['${config.language}', '${config.language.split('-')[0]}', 'en']
|
|
512
|
+
});
|
|
513
|
+
|
|
514
|
+
// Override screen properties for regional differences
|
|
515
|
+
${this.generateScreenOverrides(targetCountry)}
|
|
516
|
+
|
|
517
|
+
console.debug('Timezone spoofing applied:', {
|
|
518
|
+
timezone: targetTimezone,
|
|
519
|
+
locale: targetLocale,
|
|
520
|
+
offset: timezoneOffset
|
|
521
|
+
});
|
|
522
|
+
})();
|
|
523
|
+
`;
|
|
524
|
+
|
|
525
|
+
this.stats.timezoneChanges++;
|
|
526
|
+
return injectionScript;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
/**
|
|
530
|
+
* Detect and handle geo-blocked content
|
|
531
|
+
* @param {string} url - URL to check
|
|
532
|
+
* @param {Object} response - HTTP response object
|
|
533
|
+
* @returns {Object} - Analysis and bypass suggestions
|
|
534
|
+
*/
|
|
535
|
+
async handleGeoBlocking(url, response) {
|
|
536
|
+
const geoBlockingIndicators = [
|
|
537
|
+
/not available in your country/i,
|
|
538
|
+
/access denied/i,
|
|
539
|
+
/geo.?block/i,
|
|
540
|
+
/region.?restrict/i,
|
|
541
|
+
/unavailable in your location/i,
|
|
542
|
+
/vpn.?detect/i
|
|
543
|
+
];
|
|
544
|
+
|
|
545
|
+
const isGeoBlocked = response.status === 403 ||
|
|
546
|
+
response.status === 451 ||
|
|
547
|
+
geoBlockingIndicators.some(pattern =>
|
|
548
|
+
pattern.test(response.body || response.statusText || '')
|
|
549
|
+
);
|
|
550
|
+
|
|
551
|
+
if (!isGeoBlocked) {
|
|
552
|
+
return { blocked: false, url, status: response.status };
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
// Advanced bypass strategies with fallback options
|
|
556
|
+
const bypassStrategies = await this.generateBypassStrategies(url, response);
|
|
557
|
+
|
|
558
|
+
this.stats.geoBlocksBypass++;
|
|
559
|
+
this.emit('geoBlockingDetected', {
|
|
560
|
+
url,
|
|
561
|
+
status: response.status,
|
|
562
|
+
strategies: bypassStrategies
|
|
563
|
+
});
|
|
564
|
+
|
|
565
|
+
return {
|
|
566
|
+
blocked: true,
|
|
567
|
+
url,
|
|
568
|
+
status: response.status,
|
|
569
|
+
bypassStrategies,
|
|
570
|
+
autoBypass: this.currentSettings.geoBlockingBypass
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
/**
|
|
575
|
+
* Auto-detect appropriate localization from content
|
|
576
|
+
* @param {string} content - Web page content
|
|
577
|
+
* @param {string} url - Source URL
|
|
578
|
+
* @returns {Object} - Detected localization settings
|
|
579
|
+
*/
|
|
580
|
+
async autoDetectLocalization(content, url) {
|
|
581
|
+
const detection = {
|
|
582
|
+
detectedLanguage: null,
|
|
583
|
+
detectedCountry: null,
|
|
584
|
+
detectedScript: null,
|
|
585
|
+
isRTL: false,
|
|
586
|
+
confidence: 0,
|
|
587
|
+
evidence: [],
|
|
588
|
+
recommendations: [],
|
|
589
|
+
culturalIndicators: []
|
|
590
|
+
};
|
|
591
|
+
|
|
592
|
+
// Enhanced language detection
|
|
593
|
+
await this.performLanguageDetection(content, detection);
|
|
594
|
+
|
|
595
|
+
// Country detection from multiple sources
|
|
596
|
+
await this.performCountryDetection(content, url, detection);
|
|
597
|
+
|
|
598
|
+
// Script and direction detection
|
|
599
|
+
await this.performScriptDetection(content, detection);
|
|
600
|
+
|
|
601
|
+
// Cultural pattern detection
|
|
602
|
+
await this.performCulturalDetection(content, detection);
|
|
603
|
+
|
|
604
|
+
// Generate comprehensive recommendations
|
|
605
|
+
await this.generateLocalizationRecommendations(detection);
|
|
606
|
+
|
|
607
|
+
this.stats.languageDetections++;
|
|
608
|
+
this.emit('localizationDetected', detection);
|
|
609
|
+
|
|
610
|
+
return detection;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
// Helper methods
|
|
614
|
+
|
|
615
|
+
async getLocalizationConfig(countryCode) {
|
|
616
|
+
const cacheKey = `config-${countryCode}`;
|
|
617
|
+
if (this.localeCache.has(cacheKey)) {
|
|
618
|
+
return this.localeCache.get(cacheKey);
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
return await this.configureCountry(countryCode);
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
buildAcceptLanguageHeader(language) {
|
|
625
|
+
const langCode = language.split('-')[0];
|
|
626
|
+
return LANGUAGE_MAPPINGS[langCode] || `${language},${langCode};q=0.9,en;q=0.8`;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
async loadTimezoneData() {
|
|
630
|
+
// Pre-populate common timezone offsets
|
|
631
|
+
const timezones = {
|
|
632
|
+
'America/New_York': -300, // EST offset in minutes
|
|
633
|
+
'Europe/London': 0,
|
|
634
|
+
'Europe/Berlin': 60,
|
|
635
|
+
'Asia/Tokyo': 540,
|
|
636
|
+
'Australia/Sydney': 600,
|
|
637
|
+
// Add more as needed
|
|
638
|
+
};
|
|
639
|
+
|
|
640
|
+
for (const [tz, offset] of Object.entries(timezones)) {
|
|
641
|
+
this.timezoneCache.set(tz, offset);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
async loadGeoLocationData() {
|
|
646
|
+
// Pre-populate major city coordinates
|
|
647
|
+
const geoData = {
|
|
648
|
+
'US': { latitude: 40.7128, longitude: -74.0060 }, // New York
|
|
649
|
+
'GB': { latitude: 51.5074, longitude: -0.1278 }, // London
|
|
650
|
+
'DE': { latitude: 52.5200, longitude: 13.4050 }, // Berlin
|
|
651
|
+
'FR': { latitude: 48.8566, longitude: 2.3522 }, // Paris
|
|
652
|
+
'JP': { latitude: 35.6762, longitude: 139.6503 }, // Tokyo
|
|
653
|
+
'AU': { latitude: -33.8688, longitude: 151.2093 } // Sydney
|
|
654
|
+
};
|
|
655
|
+
|
|
656
|
+
for (const [country, coords] of Object.entries(geoData)) {
|
|
657
|
+
this.geoLocationCache.set(country, coords);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
/**
|
|
662
|
+
* Validate timezone string
|
|
663
|
+
*/
|
|
664
|
+
validateTimezone(timezone) {
|
|
665
|
+
if (!timezone || typeof timezone !== "string") {
|
|
666
|
+
throw new Error("Timezone must be a non-empty string");
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Check if timezone is in the supported countries
|
|
670
|
+
const validTimezones = Object.values(SUPPORTED_COUNTRIES).map(c => c.timezone);
|
|
671
|
+
const commonTimezones = [
|
|
672
|
+
"America/New_York", "America/Los_Angeles", "America/Chicago",
|
|
673
|
+
"Europe/London", "Europe/Berlin", "Europe/Paris", "Europe/Rome",
|
|
674
|
+
"Asia/Tokyo", "Asia/Shanghai", "Asia/Kolkata", "Asia/Seoul",
|
|
675
|
+
"Australia/Sydney", "America/Toronto", "America/Sao_Paulo",
|
|
676
|
+
"America/Mexico_City", "Europe/Moscow", "Europe/Madrid"
|
|
677
|
+
];
|
|
678
|
+
|
|
679
|
+
if (!validTimezones.includes(timezone) && !commonTimezones.includes(timezone)) {
|
|
680
|
+
throw new Error(`Unsupported timezone: ${timezone}`);
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
// Test if timezone is valid by trying to use it
|
|
684
|
+
try {
|
|
685
|
+
new Date().toLocaleString("en-US", { timeZone: timezone });
|
|
686
|
+
} catch (error) {
|
|
687
|
+
throw new Error(`Invalid timezone: ${timezone}`);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
return true;
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
getCountryCoordinates(countryCode) {
|
|
694
|
+
return this.geoLocationCache.get(countryCode) || null;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
getTimezoneOffset(timezone) {
|
|
698
|
+
if (this.timezoneCache.has(timezone)) {
|
|
699
|
+
return this.timezoneCache.get(timezone);
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
// Calculate dynamically if not cached
|
|
703
|
+
const now = new Date();
|
|
704
|
+
const utc = new Date(now.getTime() + (now.getTimezoneOffset() * 60000));
|
|
705
|
+
const targetTime = new Date(utc.toLocaleString('en-US', { timeZone: timezone }));
|
|
706
|
+
const offset = (targetTime.getTime() - utc.getTime()) / (1000 * 60);
|
|
707
|
+
|
|
708
|
+
this.timezoneCache.set(timezone, offset);
|
|
709
|
+
return offset;
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
async getDefaultGeoLocation(countryCode) {
|
|
713
|
+
return this.geoLocationCache.get(countryCode) || { latitude: 0, longitude: 0 };
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
generateUserAgent(countryCode) {
|
|
717
|
+
const userAgents = {
|
|
718
|
+
'US': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
719
|
+
'GB': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
720
|
+
'DE': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
721
|
+
};
|
|
722
|
+
|
|
723
|
+
return userAgents[countryCode] || userAgents['US'];
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
getDateFormat(countryCode) {
|
|
727
|
+
const formats = {
|
|
728
|
+
'US': 'MM/DD/YYYY',
|
|
729
|
+
'GB': 'DD/MM/YYYY',
|
|
730
|
+
'DE': 'DD.MM.YYYY',
|
|
731
|
+
'JP': 'YYYY/MM/DD'
|
|
732
|
+
};
|
|
733
|
+
|
|
734
|
+
return formats[countryCode] || 'MM/DD/YYYY';
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
getNumberFormat(countryCode) {
|
|
738
|
+
const formats = {
|
|
739
|
+
'US': '1,234.56',
|
|
740
|
+
'GB': '1,234.56',
|
|
741
|
+
'DE': '1.234,56',
|
|
742
|
+
'FR': '1 234,56'
|
|
743
|
+
};
|
|
744
|
+
|
|
745
|
+
return formats[countryCode] || '1,234.56';
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
getFirstDayOfWeek(countryCode) {
|
|
749
|
+
// 0 = Sunday, 1 = Monday
|
|
750
|
+
const firstDays = {
|
|
751
|
+
'US': 0,
|
|
752
|
+
'GB': 1,
|
|
753
|
+
'DE': 1,
|
|
754
|
+
'FR': 1
|
|
755
|
+
};
|
|
756
|
+
|
|
757
|
+
return firstDays[countryCode] || 1;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
generateScreenProperties(countryCode) {
|
|
761
|
+
// Different regions may have different common screen resolutions
|
|
762
|
+
const screenProps = {
|
|
763
|
+
'US': { width: 1920, height: 1080, colorDepth: 24 },
|
|
764
|
+
'JP': { width: 1366, height: 768, colorDepth: 24 },
|
|
765
|
+
'DE': { width: 1920, height: 1080, colorDepth: 24 }
|
|
766
|
+
};
|
|
767
|
+
|
|
768
|
+
return screenProps[countryCode] || screenProps['US'];
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
async generateNavigatorProperties(localizationConfig) {
|
|
772
|
+
return {
|
|
773
|
+
language: localizationConfig.language,
|
|
774
|
+
languages: [localizationConfig.language, localizationConfig.language.split('-')[0], 'en'],
|
|
775
|
+
platform: 'Win32',
|
|
776
|
+
userAgent: localizationConfig.userAgent || this.generateUserAgent(localizationConfig.countryCode),
|
|
777
|
+
cookieEnabled: true,
|
|
778
|
+
onLine: true,
|
|
779
|
+
hardwareConcurrency: 8
|
|
780
|
+
};
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
generateIntlProperties(language, countryCode) {
|
|
784
|
+
return {
|
|
785
|
+
locale: language,
|
|
786
|
+
timeZone: SUPPORTED_COUNTRIES[countryCode].timezone,
|
|
787
|
+
currency: SUPPORTED_COUNTRIES[countryCode].currency,
|
|
788
|
+
numberingSystem: 'latn',
|
|
789
|
+
calendar: 'gregory'
|
|
790
|
+
};
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
encodeLocationString(countryCode) {
|
|
794
|
+
// Google's UULE encoding for country-based searches
|
|
795
|
+
const countryNames = {
|
|
796
|
+
'US': 'United States',
|
|
797
|
+
'GB': 'United Kingdom',
|
|
798
|
+
'DE': 'Germany',
|
|
799
|
+
'FR': 'France'
|
|
800
|
+
};
|
|
801
|
+
|
|
802
|
+
const countryName = countryNames[countryCode] || countryCode;
|
|
803
|
+
return Buffer.from(countryName).toString('base64');
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
async getProxyIP(countryCode) {
|
|
807
|
+
// This would integrate with proxy services
|
|
808
|
+
// For now, return placeholder
|
|
809
|
+
return '192.0.2.1'; // RFC5737 documentation IP
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
getSuggestedCountries(url) {
|
|
813
|
+
// Analyze URL to suggest appropriate countries
|
|
814
|
+
const domain = new URL(url).hostname;
|
|
815
|
+
|
|
816
|
+
if (domain.includes('.co.uk')) return ['GB'];
|
|
817
|
+
if (domain.includes('.de')) return ['DE'];
|
|
818
|
+
if (domain.includes('.fr')) return ['FR'];
|
|
819
|
+
if (domain.includes('.jp')) return ['JP'];
|
|
820
|
+
|
|
821
|
+
return ['US', 'GB', 'DE']; // Common fallbacks
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
async getSuggestedUserAgents(url) {
|
|
825
|
+
return [
|
|
826
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
827
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
828
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
|
829
|
+
];
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
getProxyRegions(url) {
|
|
833
|
+
// Based on the URL, suggest proxy regions
|
|
834
|
+
return ['US-East', 'EU-West', 'Asia-Pacific'];
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
generateScreenOverrides(countryCode) {
|
|
838
|
+
const screen = this.generateScreenProperties(countryCode);
|
|
839
|
+
|
|
840
|
+
return `
|
|
841
|
+
Object.defineProperty(screen, 'width', { value: ${screen.width} });
|
|
842
|
+
Object.defineProperty(screen, 'height', { value: ${screen.height} });
|
|
843
|
+
Object.defineProperty(screen, 'colorDepth', { value: ${screen.colorDepth} });
|
|
844
|
+
`;
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
async generateFingerprint(countryCode) {
|
|
848
|
+
// Generate browser fingerprint consistent with the target country
|
|
849
|
+
return {
|
|
850
|
+
userAgent: this.generateUserAgent(countryCode),
|
|
851
|
+
screen: this.generateScreenProperties(countryCode),
|
|
852
|
+
timezone: SUPPORTED_COUNTRIES[countryCode].timezone,
|
|
853
|
+
language: SUPPORTED_COUNTRIES[countryCode].language,
|
|
854
|
+
platform: 'Win32'
|
|
855
|
+
};
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
// Public API methods
|
|
859
|
+
|
|
860
|
+
getSupportedCountries() {
|
|
861
|
+
return Object.keys(SUPPORTED_COUNTRIES);
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
getCurrentSettings() {
|
|
865
|
+
return { ...this.currentSettings };
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
getStats() {
|
|
869
|
+
return {
|
|
870
|
+
...this.stats,
|
|
871
|
+
cacheSize: this.localeCache.size,
|
|
872
|
+
supportedCountries: Object.keys(SUPPORTED_COUNTRIES).length,
|
|
873
|
+
activeProxies: this.proxyManager.activeProxies.size,
|
|
874
|
+
failedProxies: this.proxyManager.failedProxies.size,
|
|
875
|
+
translationProviders: this.translationProviders.size,
|
|
876
|
+
culturalPatterns: this.culturalPatterns.size,
|
|
877
|
+
lastUpdated: Date.now()
|
|
878
|
+
};
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
resetStats() {
|
|
882
|
+
this.stats = {
|
|
883
|
+
localizationApplied: 0,
|
|
884
|
+
geoBlocksBypass: 0,
|
|
885
|
+
timezoneChanges: 0,
|
|
886
|
+
languageDetections: 0,
|
|
887
|
+
proxyUsage: 0,
|
|
888
|
+
proxyRotations: 0,
|
|
889
|
+
dnsOverrides: 0,
|
|
890
|
+
translationRequests: 0,
|
|
891
|
+
culturalAdaptations: 0,
|
|
892
|
+
lastUpdated: Date.now()
|
|
893
|
+
};
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
/**
|
|
898
|
+
* Get country configuration
|
|
899
|
+
*/
|
|
900
|
+
getCountryConfig(countryCode) {
|
|
901
|
+
return SUPPORTED_COUNTRIES[countryCode] || null;
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
/**
|
|
905
|
+
* Generate Accept-Language header
|
|
906
|
+
*/
|
|
907
|
+
generateAcceptLanguageHeader(language) {
|
|
908
|
+
const langCode = language.split("-")[0];
|
|
909
|
+
return `${language},${langCode};q=0.9,en;q=0.8`;
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
/**
|
|
913
|
+
* Create browser locale configuration
|
|
914
|
+
*/
|
|
915
|
+
createBrowserLocale(countryCode) {
|
|
916
|
+
const config = this.getCountryConfig(countryCode);
|
|
917
|
+
if (!config) return null;
|
|
918
|
+
|
|
919
|
+
return {
|
|
920
|
+
languages: [config.language, config.language.split("-")[0], "en"],
|
|
921
|
+
timezone: config.timezone,
|
|
922
|
+
currency: config.currency,
|
|
923
|
+
dateFormat: this.getDateFormat(countryCode),
|
|
924
|
+
numberFormat: this.getNumberFormat(countryCode)
|
|
925
|
+
};
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
/**
|
|
929
|
+
* Generate localized headers
|
|
930
|
+
*/
|
|
931
|
+
generateLocalizedHeaders(countryCode) {
|
|
932
|
+
const config = this.getCountryConfig(countryCode);
|
|
933
|
+
if (!config) return {};
|
|
934
|
+
|
|
935
|
+
return {
|
|
936
|
+
"Accept-Language": this.generateAcceptLanguageHeader(config.language),
|
|
937
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
938
|
+
"Cache-Control": "no-cache",
|
|
939
|
+
"DNT": "1"
|
|
940
|
+
};
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
/**
|
|
944
|
+
* Detect content language
|
|
945
|
+
*/
|
|
946
|
+
detectContentLanguage(content) {
|
|
947
|
+
// Look for lang attribute in HTML
|
|
948
|
+
const langMatch = content.match(/<html[^>]+lang=["']([^"']+)["']/i);
|
|
949
|
+
if (langMatch) {
|
|
950
|
+
return langMatch[1];
|
|
951
|
+
}
|
|
952
|
+
|
|
953
|
+
// Look for meta tag
|
|
954
|
+
const metaMatch = content.match(/<meta[^>]+http-equiv=["']content-language["'][^>]+content=["']([^"']+)["']/i);
|
|
955
|
+
if (metaMatch) {
|
|
956
|
+
return metaMatch[1];
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
return null;
|
|
960
|
+
}
|
|
961
|
+
/**
|
|
962
|
+
* Initialize proxy system with regional configurations
|
|
963
|
+
*/
|
|
964
|
+
async initializeProxySystem() {
|
|
965
|
+
try {
|
|
966
|
+
// Load proxy configurations from environment or config
|
|
967
|
+
for (const [region, config] of Object.entries(PROXY_PROVIDERS.regions)) {
|
|
968
|
+
if (process.env[`PROXY_${region.toUpperCase().replace('-', '_')}_ENABLED`] === 'true') {
|
|
969
|
+
this.proxyManager.activeProxies.set(region, {
|
|
970
|
+
...config,
|
|
971
|
+
username: process.env[`PROXY_${region.toUpperCase().replace('-', '_')}_USERNAME`],
|
|
972
|
+
password: process.env[`PROXY_${region.toUpperCase().replace('-', '_')}_PASSWORD`],
|
|
973
|
+
healthScore: 100,
|
|
974
|
+
lastCheck: 0,
|
|
975
|
+
failureCount: 0
|
|
976
|
+
});
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
// Setup proxy health monitoring
|
|
981
|
+
if (this.proxyManager.activeProxies.size > 0) {
|
|
982
|
+
await this.performProxyHealthChecks();
|
|
983
|
+
}
|
|
984
|
+
|
|
985
|
+
} catch (error) {
|
|
986
|
+
console.warn('Failed to initialize proxy system:', error.message);
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
/**
|
|
991
|
+
* Initialize translation services
|
|
992
|
+
*/
|
|
993
|
+
async initializeTranslationServices() {
|
|
994
|
+
try {
|
|
995
|
+
// Google Translate
|
|
996
|
+
if (TRANSLATION_SERVICES.google.enabled) {
|
|
997
|
+
this.translationProviders.set('google', {
|
|
998
|
+
type: 'google',
|
|
999
|
+
apiKey: TRANSLATION_SERVICES.google.apiKey,
|
|
1000
|
+
endpoint: TRANSLATION_SERVICES.google.endpoint,
|
|
1001
|
+
available: true
|
|
1002
|
+
});
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
// Azure Translator
|
|
1006
|
+
if (TRANSLATION_SERVICES.azure.enabled) {
|
|
1007
|
+
this.translationProviders.set('azure', {
|
|
1008
|
+
type: 'azure',
|
|
1009
|
+
key: TRANSLATION_SERVICES.azure.key,
|
|
1010
|
+
region: TRANSLATION_SERVICES.azure.region,
|
|
1011
|
+
endpoint: TRANSLATION_SERVICES.azure.endpoint,
|
|
1012
|
+
available: true
|
|
1013
|
+
});
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
// LibreTranslate
|
|
1017
|
+
if (TRANSLATION_SERVICES.libre.enabled) {
|
|
1018
|
+
this.translationProviders.set('libre', {
|
|
1019
|
+
type: 'libre',
|
|
1020
|
+
url: TRANSLATION_SERVICES.libre.url,
|
|
1021
|
+
apiKey: TRANSLATION_SERVICES.libre.apiKey,
|
|
1022
|
+
available: true
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
} catch (error) {
|
|
1027
|
+
console.warn('Failed to initialize translation services:', error.message);
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
/**
|
|
1032
|
+
* Load cultural browsing patterns for different regions
|
|
1033
|
+
*/
|
|
1034
|
+
async loadCulturalPatterns() {
|
|
1035
|
+
const patterns = {
|
|
1036
|
+
'US': {
|
|
1037
|
+
scrollSpeed: 'fast',
|
|
1038
|
+
clickDelay: 200,
|
|
1039
|
+
readingSpeed: 250, // words per minute
|
|
1040
|
+
pageStayTime: { min: 5000, max: 30000 },
|
|
1041
|
+
bounceRate: 0.4
|
|
1042
|
+
},
|
|
1043
|
+
'JP': {
|
|
1044
|
+
scrollSpeed: 'medium',
|
|
1045
|
+
clickDelay: 500,
|
|
1046
|
+
readingSpeed: 400,
|
|
1047
|
+
pageStayTime: { min: 8000, max: 45000 },
|
|
1048
|
+
bounceRate: 0.25
|
|
1049
|
+
},
|
|
1050
|
+
'DE': {
|
|
1051
|
+
scrollSpeed: 'medium',
|
|
1052
|
+
clickDelay: 300,
|
|
1053
|
+
readingSpeed: 200,
|
|
1054
|
+
pageStayTime: { min: 10000, max: 60000 },
|
|
1055
|
+
bounceRate: 0.3
|
|
1056
|
+
},
|
|
1057
|
+
'CN': {
|
|
1058
|
+
scrollSpeed: 'slow',
|
|
1059
|
+
clickDelay: 400,
|
|
1060
|
+
readingSpeed: 300,
|
|
1061
|
+
pageStayTime: { min: 12000, max: 50000 },
|
|
1062
|
+
bounceRate: 0.35
|
|
1063
|
+
},
|
|
1064
|
+
'SA': {
|
|
1065
|
+
scrollSpeed: 'slow',
|
|
1066
|
+
clickDelay: 600,
|
|
1067
|
+
readingSpeed: 180,
|
|
1068
|
+
pageStayTime: { min: 15000, max: 70000 },
|
|
1069
|
+
bounceRate: 0.2,
|
|
1070
|
+
rtlBehavior: true
|
|
1071
|
+
}
|
|
1072
|
+
};
|
|
1073
|
+
|
|
1074
|
+
for (const [country, pattern] of Object.entries(patterns)) {
|
|
1075
|
+
this.culturalPatterns.set(country, pattern);
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
|
|
1079
|
+
/**
|
|
1080
|
+
* Setup periodic health checks for proxies and services
|
|
1081
|
+
*/
|
|
1082
|
+
setupHealthChecks() {
|
|
1083
|
+
// Proxy health checks every 5 minutes
|
|
1084
|
+
setInterval(async () => {
|
|
1085
|
+
if (this.proxyManager.activeProxies.size > 0) {
|
|
1086
|
+
await this.performProxyHealthChecks();
|
|
1087
|
+
}
|
|
1088
|
+
}, 300000);
|
|
1089
|
+
|
|
1090
|
+
// Translation service health checks every 10 minutes
|
|
1091
|
+
setInterval(async () => {
|
|
1092
|
+
if (this.translationProviders.size > 0) {
|
|
1093
|
+
await this.checkTranslationServiceHealth();
|
|
1094
|
+
}
|
|
1095
|
+
}, 600000);
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
/**
|
|
1099
|
+
* Perform health checks on all active proxies
|
|
1100
|
+
*/
|
|
1101
|
+
async performProxyHealthChecks() {
|
|
1102
|
+
const healthCheckPromises = [];
|
|
1103
|
+
|
|
1104
|
+
for (const [region, proxy] of this.proxyManager.activeProxies) {
|
|
1105
|
+
healthCheckPromises.push(this.checkProxyHealth(region, proxy));
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
await Promise.allSettled(healthCheckPromises);
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
/**
|
|
1112
|
+
* Check health of a specific proxy
|
|
1113
|
+
*/
|
|
1114
|
+
async checkProxyHealth(region, proxy) {
|
|
1115
|
+
try {
|
|
1116
|
+
const start = Date.now();
|
|
1117
|
+
const response = await fetch('http://httpbin.org/ip', {
|
|
1118
|
+
method: 'GET',
|
|
1119
|
+
headers: { 'User-Agent': 'Health-Check/1.0' },
|
|
1120
|
+
// Proxy configuration would go here
|
|
1121
|
+
timeout: 10000
|
|
1122
|
+
});
|
|
1123
|
+
|
|
1124
|
+
const latency = Date.now() - start;
|
|
1125
|
+
|
|
1126
|
+
if (response.ok) {
|
|
1127
|
+
proxy.healthScore = Math.min(100, proxy.healthScore + 10);
|
|
1128
|
+
proxy.failureCount = 0;
|
|
1129
|
+
proxy.latency = latency;
|
|
1130
|
+
} else {
|
|
1131
|
+
proxy.healthScore = Math.max(0, proxy.healthScore - 20);
|
|
1132
|
+
proxy.failureCount++;
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
proxy.lastCheck = Date.now();
|
|
1136
|
+
|
|
1137
|
+
} catch (error) {
|
|
1138
|
+
proxy.healthScore = Math.max(0, proxy.healthScore - 30);
|
|
1139
|
+
proxy.failureCount++;
|
|
1140
|
+
proxy.lastCheck = Date.now();
|
|
1141
|
+
|
|
1142
|
+
if (proxy.failureCount > 3) {
|
|
1143
|
+
this.proxyManager.failedProxies.add(region);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
/**
|
|
1149
|
+
* Get optimal proxy for a target country
|
|
1150
|
+
*/
|
|
1151
|
+
async getOptimalProxy(countryCode, proxySettings) {
|
|
1152
|
+
if (!proxySettings?.enabled || this.proxyManager.activeProxies.size === 0) {
|
|
1153
|
+
return null;
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
const countryConfig = SUPPORTED_COUNTRIES[countryCode];
|
|
1157
|
+
const preferredRegion = proxySettings.region || countryConfig?.proxyRegion;
|
|
1158
|
+
|
|
1159
|
+
// Try to get proxy from preferred region first
|
|
1160
|
+
if (preferredRegion && this.proxyManager.activeProxies.has(preferredRegion)) {
|
|
1161
|
+
const proxy = this.proxyManager.activeProxies.get(preferredRegion);
|
|
1162
|
+
if (proxy.healthScore > 50 && !this.proxyManager.failedProxies.has(preferredRegion)) {
|
|
1163
|
+
this.stats.proxyUsage++;
|
|
1164
|
+
return this.formatProxyConfig(proxy, proxySettings);
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
// Fallback to best available proxy
|
|
1169
|
+
const availableProxies = Array.from(this.proxyManager.activeProxies.entries())
|
|
1170
|
+
.filter(([region, proxy]) =>
|
|
1171
|
+
proxy.healthScore > 30 &&
|
|
1172
|
+
!this.proxyManager.failedProxies.has(region)
|
|
1173
|
+
)
|
|
1174
|
+
.sort(([,a], [,b]) => b.healthScore - a.healthScore);
|
|
1175
|
+
|
|
1176
|
+
if (availableProxies.length > 0) {
|
|
1177
|
+
const [region, proxy] = availableProxies[0];
|
|
1178
|
+
this.stats.proxyUsage++;
|
|
1179
|
+
return this.formatProxyConfig(proxy, proxySettings);
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
return null;
|
|
1183
|
+
}
|
|
1184
|
+
|
|
1185
|
+
/**
|
|
1186
|
+
* Format proxy configuration for browser use
|
|
1187
|
+
*/
|
|
1188
|
+
formatProxyConfig(proxy, settings) {
|
|
1189
|
+
return {
|
|
1190
|
+
server: `${settings.type || 'https'}://${proxy.endpoint}:${proxy.port}`,
|
|
1191
|
+
username: proxy.username,
|
|
1192
|
+
password: proxy.password,
|
|
1193
|
+
bypass: settings.bypass || 'localhost,127.0.0.1'
|
|
1194
|
+
};
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
/**
|
|
1198
|
+
* Get cultural browsing behavior for a country
|
|
1199
|
+
*/
|
|
1200
|
+
getCulturalBehavior(countryCode) {
|
|
1201
|
+
return this.culturalPatterns.get(countryCode) || this.culturalPatterns.get('US');
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
/**
|
|
1205
|
+
* Get measurement system for a country
|
|
1206
|
+
*/
|
|
1207
|
+
getMeasurementSystem(countryCode) {
|
|
1208
|
+
const imperialCountries = ['US', 'GB', 'LR', 'MM'];
|
|
1209
|
+
return imperialCountries.includes(countryCode) ? 'imperial' : 'metric';
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
/**
|
|
1213
|
+
* Get time format preference for a country
|
|
1214
|
+
*/
|
|
1215
|
+
getTimeFormat(countryCode) {
|
|
1216
|
+
const twelveHourCountries = ['US', 'CA', 'AU', 'NZ', 'PH', 'EG', 'SA'];
|
|
1217
|
+
return twelveHourCountries.includes(countryCode) ? '12h' : '24h';
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
/**
|
|
1221
|
+
* Get currency display preference for a country
|
|
1222
|
+
*/
|
|
1223
|
+
getCurrencyDisplay(countryCode) {
|
|
1224
|
+
const symbolCountries = ['US', 'GB', 'JP', 'CN', 'IN', 'KR'];
|
|
1225
|
+
return symbolCountries.includes(countryCode) ? 'symbol' : 'code';
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
/**
|
|
1229
|
+
* Generate bypass strategies for geo-blocking
|
|
1230
|
+
*/
|
|
1231
|
+
async generateBypassStrategies(url, response) {
|
|
1232
|
+
const strategies = [];
|
|
1233
|
+
|
|
1234
|
+
// Country-based bypass
|
|
1235
|
+
strategies.push({
|
|
1236
|
+
type: 'country_change',
|
|
1237
|
+
priority: 1,
|
|
1238
|
+
description: 'Access from different geographic location',
|
|
1239
|
+
suggestedCountries: this.getOptimalCountriesForUrl(url),
|
|
1240
|
+
estimatedSuccess: 0.8
|
|
1241
|
+
});
|
|
1242
|
+
|
|
1243
|
+
// Proxy-based bypass
|
|
1244
|
+
if (this.proxyManager.activeProxies.size > 0) {
|
|
1245
|
+
strategies.push({
|
|
1246
|
+
type: 'proxy_rotation',
|
|
1247
|
+
priority: 2,
|
|
1248
|
+
description: 'Use proxy servers from different regions',
|
|
1249
|
+
availableRegions: Array.from(this.proxyManager.activeProxies.keys()),
|
|
1250
|
+
estimatedSuccess: 0.7
|
|
1251
|
+
});
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
// User agent rotation
|
|
1255
|
+
strategies.push({
|
|
1256
|
+
type: 'user_agent_rotation',
|
|
1257
|
+
priority: 3,
|
|
1258
|
+
description: 'Rotate user agent strings',
|
|
1259
|
+
suggestedAgents: await this.getOptimalUserAgents(url),
|
|
1260
|
+
estimatedSuccess: 0.4
|
|
1261
|
+
});
|
|
1262
|
+
|
|
1263
|
+
// Header manipulation
|
|
1264
|
+
strategies.push({
|
|
1265
|
+
type: 'header_manipulation',
|
|
1266
|
+
priority: 4,
|
|
1267
|
+
description: 'Modify HTTP headers to bypass detection',
|
|
1268
|
+
modifications: this.getHeaderModifications(url),
|
|
1269
|
+
estimatedSuccess: 0.3
|
|
1270
|
+
});
|
|
1271
|
+
|
|
1272
|
+
return strategies.sort((a, b) => a.priority - b.priority);
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
/**
|
|
1276
|
+
* Get optimal countries for accessing a URL
|
|
1277
|
+
*/
|
|
1278
|
+
getOptimalCountriesForUrl(url) {
|
|
1279
|
+
const domain = new URL(url).hostname;
|
|
1280
|
+
|
|
1281
|
+
// Domain-specific suggestions
|
|
1282
|
+
const domainMappings = {
|
|
1283
|
+
'bbc.co.uk': ['GB', 'IE', 'AU'],
|
|
1284
|
+
'cnn.com': ['US', 'CA'],
|
|
1285
|
+
'lemonde.fr': ['FR', 'BE', 'CH'],
|
|
1286
|
+
'spiegel.de': ['DE', 'AT', 'CH'],
|
|
1287
|
+
'nhk.or.jp': ['JP'],
|
|
1288
|
+
'globo.com': ['BR'],
|
|
1289
|
+
'rt.com': ['RU']
|
|
1290
|
+
};
|
|
1291
|
+
|
|
1292
|
+
if (domainMappings[domain]) {
|
|
1293
|
+
return domainMappings[domain];
|
|
1294
|
+
}
|
|
1295
|
+
|
|
1296
|
+
// TLD-based suggestions
|
|
1297
|
+
const tldMatch = domain.match(/\.([a-z]{2})$/);
|
|
1298
|
+
if (tldMatch) {
|
|
1299
|
+
const tld = tldMatch[1].toUpperCase();
|
|
1300
|
+
if (SUPPORTED_COUNTRIES[tld]) {
|
|
1301
|
+
return [tld];
|
|
1302
|
+
}
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
// Default fallbacks
|
|
1306
|
+
return ['US', 'GB', 'DE', 'CA', 'AU'];
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
/**
|
|
1310
|
+
* Get optimal user agents for a URL
|
|
1311
|
+
*/
|
|
1312
|
+
async getOptimalUserAgents(url) {
|
|
1313
|
+
return [
|
|
1314
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
1315
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
1316
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
1317
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0',
|
|
1318
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/121.0'
|
|
1319
|
+
];
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
/**
|
|
1323
|
+
* Get header modifications for bypass
|
|
1324
|
+
*/
|
|
1325
|
+
getHeaderModifications(url) {
|
|
1326
|
+
return {
|
|
1327
|
+
'X-Forwarded-For': 'Remove or randomize',
|
|
1328
|
+
'X-Real-IP': 'Remove',
|
|
1329
|
+
'CF-Connecting-IP': 'Remove',
|
|
1330
|
+
'X-Originating-IP': 'Remove',
|
|
1331
|
+
'Referer': 'Randomize or remove',
|
|
1332
|
+
'Origin': 'Match target domain'
|
|
1333
|
+
};
|
|
1334
|
+
}
|
|
1335
|
+
|
|
1336
|
+
/**
|
|
1337
|
+
* Enhanced language detection with multiple methods
|
|
1338
|
+
*/
|
|
1339
|
+
async performLanguageDetection(content, detection) {
|
|
1340
|
+
// HTML lang attribute
|
|
1341
|
+
const langMatch = content.match(/<html[^>]+lang=["']([^"']+)["']/i);
|
|
1342
|
+
if (langMatch) {
|
|
1343
|
+
detection.detectedLanguage = langMatch[1];
|
|
1344
|
+
detection.evidence.push(`HTML lang attribute: ${langMatch[1]}`);
|
|
1345
|
+
detection.confidence += 0.3;
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
// Meta content-language
|
|
1349
|
+
const metaLangMatch = content.match(/<meta[^>]+http-equiv=["']content-language["'][^>]+content=["']([^"']+)["']/i);
|
|
1350
|
+
if (metaLangMatch) {
|
|
1351
|
+
detection.detectedLanguage = metaLangMatch[1];
|
|
1352
|
+
detection.evidence.push(`Meta content-language: ${metaLangMatch[1]}`);
|
|
1353
|
+
detection.confidence += 0.25;
|
|
1354
|
+
}
|
|
1355
|
+
|
|
1356
|
+
// Text analysis for language detection
|
|
1357
|
+
const textSample = this.extractTextSample(content);
|
|
1358
|
+
if (textSample) {
|
|
1359
|
+
const detectedLang = await this.analyzeTextLanguage(textSample);
|
|
1360
|
+
if (detectedLang) {
|
|
1361
|
+
detection.evidence.push(`Text analysis: ${detectedLang.language} (${detectedLang.confidence}%)`);
|
|
1362
|
+
detection.confidence += detectedLang.confidence / 100 * 0.2;
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
/**
|
|
1368
|
+
* Script and text direction detection
|
|
1369
|
+
*/
|
|
1370
|
+
async performScriptDetection(content, detection) {
|
|
1371
|
+
// RTL script detection
|
|
1372
|
+
const rtlPatterns = {
|
|
1373
|
+
arabic: /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/,
|
|
1374
|
+
hebrew: /[\u0590-\u05FF]/,
|
|
1375
|
+
persian: /[\u06A0-\u06EF]/
|
|
1376
|
+
};
|
|
1377
|
+
|
|
1378
|
+
for (const [script, pattern] of Object.entries(rtlPatterns)) {
|
|
1379
|
+
if (pattern.test(content)) {
|
|
1380
|
+
detection.detectedScript = script;
|
|
1381
|
+
detection.isRTL = true;
|
|
1382
|
+
detection.evidence.push(`RTL script detected: ${script}`);
|
|
1383
|
+
detection.confidence += 0.15;
|
|
1384
|
+
break;
|
|
1385
|
+
}
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
// CJK script detection
|
|
1389
|
+
const cjkPatterns = {
|
|
1390
|
+
chinese: /[\u4E00-\u9FFF]/,
|
|
1391
|
+
japanese: /[\u3040-\u309F\u30A0-\u30FF]/,
|
|
1392
|
+
korean: /[\uAC00-\uD7AF]/
|
|
1393
|
+
};
|
|
1394
|
+
|
|
1395
|
+
for (const [script, pattern] of Object.entries(cjkPatterns)) {
|
|
1396
|
+
if (pattern.test(content)) {
|
|
1397
|
+
detection.detectedScript = script;
|
|
1398
|
+
detection.evidence.push(`CJK script detected: ${script}`);
|
|
1399
|
+
detection.confidence += 0.1;
|
|
1400
|
+
break;
|
|
1401
|
+
}
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
/**
|
|
1406
|
+
* Extract text sample for language analysis
|
|
1407
|
+
*/
|
|
1408
|
+
extractTextSample(content) {
|
|
1409
|
+
// Remove HTML tags and extract meaningful text
|
|
1410
|
+
const textContent = content.replace(/<[^>]*>/g, ' ')
|
|
1411
|
+
.replace(/\s+/g, ' ')
|
|
1412
|
+
.trim();
|
|
1413
|
+
|
|
1414
|
+
// Return first 500 characters for analysis
|
|
1415
|
+
return textContent.substring(0, 500);
|
|
1416
|
+
}
|
|
1417
|
+
|
|
1418
|
+
/**
|
|
1419
|
+
* Analyze text for language detection
|
|
1420
|
+
*/
|
|
1421
|
+
async analyzeTextLanguage(text) {
|
|
1422
|
+
// Simple heuristic-based language detection
|
|
1423
|
+
// In a real implementation, this could use a proper language detection library
|
|
1424
|
+
const patterns = {
|
|
1425
|
+
'en': /\b(the|and|is|in|to|of|a|that|it|with|for|as|was|on|are|you)\b/gi,
|
|
1426
|
+
'es': /\b(el|la|de|que|y|en|un|es|se|no|te|lo|le|da|su|por|son)\b/gi,
|
|
1427
|
+
'fr': /\b(le|de|et|à|un|il|être|et|en|avoir|que|pour|dans|ce|son|une)\b/gi,
|
|
1428
|
+
'de': /\b(der|die|und|in|den|von|zu|das|mit|sich|des|auf|für|ist|im)\b/gi,
|
|
1429
|
+
'it': /\b(il|di|che|e|la|per|in|un|è|da|sono|con|non|si|una|su)\b/gi
|
|
1430
|
+
};
|
|
1431
|
+
|
|
1432
|
+
let bestMatch = null;
|
|
1433
|
+
let maxMatches = 0;
|
|
1434
|
+
|
|
1435
|
+
for (const [lang, pattern] of Object.entries(patterns)) {
|
|
1436
|
+
const matches = (text.match(pattern) || []).length;
|
|
1437
|
+
if (matches > maxMatches) {
|
|
1438
|
+
maxMatches = matches;
|
|
1439
|
+
bestMatch = { language: lang, confidence: Math.min(95, matches * 5) };
|
|
1440
|
+
}
|
|
1441
|
+
}
|
|
1442
|
+
|
|
1443
|
+
return bestMatch;
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1446
|
+
/**
|
|
1447
|
+
* Enhanced country detection
|
|
1448
|
+
*/
|
|
1449
|
+
async performCountryDetection(content, url, detection) {
|
|
1450
|
+
// TLD analysis
|
|
1451
|
+
const urlObj = new URL(url);
|
|
1452
|
+
const tldMatch = urlObj.hostname.match(/\.([a-z]{2})$/);
|
|
1453
|
+
if (tldMatch) {
|
|
1454
|
+
const tld = tldMatch[1].toUpperCase();
|
|
1455
|
+
if (SUPPORTED_COUNTRIES[tld]) {
|
|
1456
|
+
detection.detectedCountry = tld;
|
|
1457
|
+
detection.evidence.push(`TLD suggests country: ${tld}`);
|
|
1458
|
+
detection.confidence += 0.2;
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
// Enhanced currency detection
|
|
1463
|
+
const currencyPatterns = {
|
|
1464
|
+
'$': { countries: ['US', 'CA', 'AU', 'NZ', 'SG'], symbols: ['$', 'USD', 'CAD', 'AUD'] },
|
|
1465
|
+
'€': { countries: ['DE', 'FR', 'IT', 'ES', 'NL'], symbols: ['€', 'EUR'] },
|
|
1466
|
+
'£': { countries: ['GB'], symbols: ['£', 'GBP'] },
|
|
1467
|
+
'¥': { countries: ['JP', 'CN'], symbols: ['¥', 'JPY', 'CNY', '¥'] },
|
|
1468
|
+
'₹': { countries: ['IN'], symbols: ['₹', 'INR'] },
|
|
1469
|
+
'₽': { countries: ['RU'], symbols: ['₽', 'RUB'] },
|
|
1470
|
+
'₩': { countries: ['KR'], symbols: ['₩', 'KRW'] },
|
|
1471
|
+
'﷼': { countries: ['SA'], symbols: ['﷼', 'SAR'] }
|
|
1472
|
+
};
|
|
1473
|
+
|
|
1474
|
+
for (const [symbol, data] of Object.entries(currencyPatterns)) {
|
|
1475
|
+
const found = data.symbols.some(s => content.includes(s));
|
|
1476
|
+
if (found) {
|
|
1477
|
+
detection.evidence.push(`Currency symbol found: ${symbol}`);
|
|
1478
|
+
detection.confidence += 0.1;
|
|
1479
|
+
|
|
1480
|
+
if (!detection.detectedCountry && data.countries.length === 1) {
|
|
1481
|
+
detection.detectedCountry = data.countries[0];
|
|
1482
|
+
}
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
// Phone number pattern analysis
|
|
1487
|
+
const phonePatterns = {
|
|
1488
|
+
'US': /\+1[\s.-]?\(?\\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}/,
|
|
1489
|
+
'GB': /\+44[\s.-]?\d{2,4}[\s.-]?\d{6,8}/,
|
|
1490
|
+
'DE': /\+49[\s.-]?\d{2,4}[\s.-]?\d{6,8}/,
|
|
1491
|
+
'FR': /\+33[\s.-]?\d{1}[\s.-]?\d{8}/
|
|
1492
|
+
};
|
|
1493
|
+
|
|
1494
|
+
for (const [country, pattern] of Object.entries(phonePatterns)) {
|
|
1495
|
+
if (pattern.test(content)) {
|
|
1496
|
+
detection.evidence.push(`Phone pattern suggests country: ${country}`);
|
|
1497
|
+
detection.confidence += 0.1;
|
|
1498
|
+
if (!detection.detectedCountry) {
|
|
1499
|
+
detection.detectedCountry = country;
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
}
|
|
1504
|
+
|
|
1505
|
+
/**
|
|
1506
|
+
* Cultural pattern detection
|
|
1507
|
+
*/
|
|
1508
|
+
async performCulturalDetection(content, detection) {
|
|
1509
|
+
// Date format detection
|
|
1510
|
+
const datePatterns = {
|
|
1511
|
+
'US': /\d{1,2}\/\d{1,2}\/\d{4}/,
|
|
1512
|
+
'GB': /\d{1,2}\/\d{1,2}\/\d{4}|\d{1,2}-\d{1,2}-\d{4}/,
|
|
1513
|
+
'DE': /\d{1,2}\.\d{1,2}\.\d{4}/,
|
|
1514
|
+
'JP': /\d{4}\/\d{1,2}\/\d{1,2}/
|
|
1515
|
+
};
|
|
1516
|
+
|
|
1517
|
+
for (const [country, pattern] of Object.entries(datePatterns)) {
|
|
1518
|
+
if (pattern.test(content)) {
|
|
1519
|
+
detection.culturalIndicators.push(`Date format suggests: ${country}`);
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
// Measurement system detection
|
|
1524
|
+
const metricIndicators = /\d+\s*(cm|mm|km|kg|celsius|°C)/i;
|
|
1525
|
+
const imperialIndicators = /\d+\s*(inch|foot|feet|yard|mile|pound|fahrenheit|°F)/i;
|
|
1526
|
+
|
|
1527
|
+
if (metricIndicators.test(content)) {
|
|
1528
|
+
detection.culturalIndicators.push('Metric measurement system');
|
|
1529
|
+
}
|
|
1530
|
+
if (imperialIndicators.test(content)) {
|
|
1531
|
+
detection.culturalIndicators.push('Imperial measurement system');
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
/**
|
|
1536
|
+
* Generate comprehensive localization recommendations
|
|
1537
|
+
*/
|
|
1538
|
+
async generateLocalizationRecommendations(detection) {
|
|
1539
|
+
if (detection.detectedCountry) {
|
|
1540
|
+
detection.recommendations.push({
|
|
1541
|
+
type: 'country_localization',
|
|
1542
|
+
countryCode: detection.detectedCountry,
|
|
1543
|
+
confidence: detection.confidence,
|
|
1544
|
+
reason: 'Detected from page content analysis'
|
|
1545
|
+
});
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
if (detection.detectedLanguage) {
|
|
1549
|
+
detection.recommendations.push({
|
|
1550
|
+
type: 'language_localization',
|
|
1551
|
+
language: detection.detectedLanguage,
|
|
1552
|
+
confidence: detection.confidence,
|
|
1553
|
+
reason: 'Detected from HTML attributes and content'
|
|
1554
|
+
});
|
|
1555
|
+
}
|
|
1556
|
+
|
|
1557
|
+
if (detection.isRTL) {
|
|
1558
|
+
detection.recommendations.push({
|
|
1559
|
+
type: 'rtl_support',
|
|
1560
|
+
enabled: true,
|
|
1561
|
+
reason: 'RTL script detected in content'
|
|
1562
|
+
});
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1565
|
+
// Translation recommendations
|
|
1566
|
+
if (this.translationProviders.size > 0 && detection.detectedLanguage) {
|
|
1567
|
+
detection.recommendations.push({
|
|
1568
|
+
type: 'translation_available',
|
|
1569
|
+
sourceLanguage: detection.detectedLanguage,
|
|
1570
|
+
providers: Array.from(this.translationProviders.keys()),
|
|
1571
|
+
reason: 'Translation services available'
|
|
1572
|
+
});
|
|
1573
|
+
}
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
clearCache() {
|
|
1577
|
+
this.localeCache.clear();
|
|
1578
|
+
this.geoLocationCache.clear();
|
|
1579
|
+
this.timezoneCache.clear();
|
|
1580
|
+
this.proxyCache.clear();
|
|
1581
|
+
this.translationCache.clear();
|
|
1582
|
+
this.emit('cacheCleared');
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1585
|
+
/**
|
|
1586
|
+
* Cleanup method for proper resource disposal
|
|
1587
|
+
*/
|
|
1588
|
+
async cleanup() {
|
|
1589
|
+
try {
|
|
1590
|
+
this.clearCache();
|
|
1591
|
+
this.removeAllListeners();
|
|
1592
|
+
this.resetStats();
|
|
1593
|
+
|
|
1594
|
+
// Clear all health check intervals
|
|
1595
|
+
if (this.healthCheckInterval) {
|
|
1596
|
+
clearInterval(this.healthCheckInterval);
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
// Reset proxy manager
|
|
1600
|
+
this.proxyManager.activeProxies.clear();
|
|
1601
|
+
this.proxyManager.failedProxies.clear();
|
|
1602
|
+
|
|
1603
|
+
// Clear translation providers
|
|
1604
|
+
this.translationProviders.clear();
|
|
1605
|
+
|
|
1606
|
+
} catch (error) {
|
|
1607
|
+
console.warn("Warning during LocalizationManager cleanup:", error.message);
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
export default LocalizationManager;
|
|
1613
|
+
|
|
1614
|
+
// Export constants for external use
|
|
1615
|
+
export { SUPPORTED_COUNTRIES, RTL_LANGUAGES, PROXY_PROVIDERS, TRANSLATION_SERVICES };
|