fraudcrawler 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fraudcrawler might be problematic. Click here for more details.

fraudcrawler/base/base.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import json
2
2
  import logging
3
+ import base64
3
4
  from pydantic import (
4
5
  BaseModel,
5
6
  Field,
@@ -9,7 +10,7 @@ from pydantic import (
9
10
  from pydantic_settings import BaseSettings
10
11
  from urllib.parse import urlparse
11
12
  import re
12
- from typing import Any, Dict, List, TYPE_CHECKING
13
+ from typing import Any, Dict, List
13
14
 
14
15
  import httpx
15
16
 
@@ -23,9 +24,6 @@ from fraudcrawler.settings import (
23
24
  DEFAULT_HTTPX_REDIRECTS,
24
25
  )
25
26
 
26
- if TYPE_CHECKING:
27
- from fraudcrawler.scraping.zyte import ZyteAPI
28
-
29
27
  logger = logging.getLogger(__name__)
30
28
 
31
29
  # Load google locations and languages
@@ -245,7 +243,7 @@ class DomainUtils:
245
243
  hostname = hostname[4:]
246
244
  return hostname.lower()
247
245
 
248
- async def _unblock_url(self, url: str, zyte_api: "ZyteAPI") -> bytes | None:
246
+ async def _unblock_url(self, url: str, zyte_api: Any) -> bytes | None:
249
247
  """Attempts to unblock a URL using Zyte proxy mode when direct access fails.
250
248
 
251
249
  This method is specifically designed to handle 403 Forbidden errors for domains
@@ -263,9 +261,6 @@ class DomainUtils:
263
261
  details = await zyte_api.details(url)
264
262
 
265
263
  if details and "httpResponseBody" in details:
266
- # Decode the base64 content
267
- import base64
268
-
269
264
  html_content = base64.b64decode(details["httpResponseBody"])
270
265
  logger.info(f"Successfully unblocked URL using Zyte proxy: {url}")
271
266
  return html_content
@@ -97,4 +97,4 @@ def search(search_term: str):
97
97
 
98
98
 
99
99
  if __name__ == "__main__":
100
- search(search_term='Liebherr "TP1410"')
100
+ search(search_term="electric cigarettes")
@@ -131,6 +131,17 @@ class SerpAPI(SearchEngine):
131
131
  search_string += " site:" + " OR site:".join(s for s in sites)
132
132
  return search_string
133
133
 
134
+ @staticmethod
135
+ def _get_google_domain(location: Location) -> str:
136
+ """Gets the Google domain for the given location if they do not use the default pattern google.tld"""
137
+ if location.name == "Brazil":
138
+ return "google.com.br"
139
+ elif location.name == "United Kingdom":
140
+ return "google.co.uk"
141
+ elif location.name == "Argentina":
142
+ return "google.com.ar"
143
+ return f"google.{location.code}"
144
+
134
145
  async def _search(
135
146
  self,
136
147
  search_string: str,
@@ -169,16 +180,19 @@ class SerpAPI(SearchEngine):
169
180
  f"num_results={num_results}."
170
181
  )
171
182
 
172
- # Setup the parameters
183
+ # Get Google domain and country code
184
+ google_domain = self._get_google_domain(location)
185
+ country_code = location.code
186
+
173
187
  params: Dict[str, str | int] = {
174
188
  "engine": engine,
175
189
  "q": search_string,
176
- "google_domain": f"google.{location.code}",
190
+ "google_domain": google_domain,
177
191
  "location_requested": location.name,
178
192
  "location_used": location.name,
179
- "tbs": f"ctr:{location.code.upper()}",
180
- "cr": f"country{location.code.upper()}",
181
- "gl": location.code,
193
+ "tbs": f"ctr:{country_code.upper()}",
194
+ "cr": f"country{country_code.upper()}",
195
+ "gl": country_code,
182
196
  "hl": language.code,
183
197
  "num": num_results,
184
198
  "api_key": self._api_key,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: fraudcrawler
3
- Version: 0.5.8
3
+ Version: 0.5.9
4
4
  Summary: Intelligent Market Monitoring
5
5
  Home-page: https://github.com/open-veanu/fraudcrawler
6
6
  License: MIT
@@ -1,22 +1,22 @@
1
1
  fraudcrawler/__init__.py,sha256=Kr19jWhtbC1shVoB9fHvBSeoG1IyQB9re1kCZ4YIAi0,842
2
2
  fraudcrawler/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- fraudcrawler/base/base.py,sha256=NOJC12qw-iSkHScPnxFLfzUvg0w57qGaID6OAzHRXeo,8695
3
+ fraudcrawler/base/base.py,sha256=S5VP1PD2tymZMmVcVmP4FI0VlCQxDCB2gp0vVD6eN8g,8543
4
4
  fraudcrawler/base/client.py,sha256=yhkNrhL2SuJXTknLf-8P81fv01FnFMahREZgem-Z-f0,5832
5
5
  fraudcrawler/base/google-languages.json,sha256=z0VtjMCsCcZq11OkCIb9jMDD1p9Ty4lhV7bq4ddYvec,10748
6
6
  fraudcrawler/base/google-locations.json,sha256=UtNu0iSStllvFRTQXMobWKmZR1hKmtgtHftLNgaJTT0,9204345
7
7
  fraudcrawler/base/orchestrator.py,sha256=28X45XLPlJe2hvff8HTLo-V08LNeS0zMWBHe5W3hk4c,27039
8
8
  fraudcrawler/base/retry.py,sha256=9VyVrbYR_0YnfxFhUrvcM3aWCYR6oR4iZE4A3zzVZUs,1015
9
- fraudcrawler/launch_demo_pipeline.py,sha256=j5lu8lLl8QrkVU1MJH25uKtyYk_6lBSeoouCo30aRXg,4634
9
+ fraudcrawler/launch_demo_pipeline.py,sha256=hTzGFQDEwchDSwUx0HgG_TW5h9J7BXM7jn_iB8iI838,4636
10
10
  fraudcrawler/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  fraudcrawler/processing/processor.py,sha256=Qq8QcTlqfnzFi1t-1KkncXxaIszUO7pGK3LXTdHkDnM,7638
12
12
  fraudcrawler/scraping/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  fraudcrawler/scraping/enrich.py,sha256=M4ErLF7q-5EKfEd-dIDS73mZc-aPFp5pJbgdRKCO3f8,13112
14
- fraudcrawler/scraping/search.py,sha256=ZjxOj95ih6o6bOWA0JnBwjFlMzGS-8Sb1P-yvHI5aO0,24957
14
+ fraudcrawler/scraping/search.py,sha256=5fTRo7Tkkz9REU-u50oZu4kbynjIcIigp9HZ0cS_UCg,25510
15
15
  fraudcrawler/scraping/url.py,sha256=5Z3hPW73E-TLhM-Zha8OTcUOumc_rcx64R0fT9z2Hi8,1748
16
16
  fraudcrawler/scraping/zyte.py,sha256=GqvVWA1AWVoClAwd-hQ9iynsT0dOb7R0ntaLK5XVivM,8340
17
17
  fraudcrawler/settings.py,sha256=uwXMOQpuwyWkuMU0asYGtBlL_qJj8F-Xkg4dUaCmDxE,3670
18
- fraudcrawler-0.5.8.dist-info/LICENSE,sha256=B-3FuHfe3S0fWAlKlceskPcRhzXq81g-rJ-ddUYb4O8,1062
19
- fraudcrawler-0.5.8.dist-info/METADATA,sha256=-e9xqpIk0EjO6fqwhmQZ5gsDrl6eJKU7VQdp8MeN0R4,6642
20
- fraudcrawler-0.5.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
21
- fraudcrawler-0.5.8.dist-info/entry_points.txt,sha256=1Befm7cM6945y2AA1z9V4gZV63mtSWcAs7ypvgux_Xg,79
22
- fraudcrawler-0.5.8.dist-info/RECORD,,
18
+ fraudcrawler-0.5.9.dist-info/LICENSE,sha256=B-3FuHfe3S0fWAlKlceskPcRhzXq81g-rJ-ddUYb4O8,1062
19
+ fraudcrawler-0.5.9.dist-info/METADATA,sha256=sMszTwcTVxuMGfRhJJZKLAPShnY4zDU6gShAa6k3tPg,6642
20
+ fraudcrawler-0.5.9.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
21
+ fraudcrawler-0.5.9.dist-info/entry_points.txt,sha256=1Befm7cM6945y2AA1z9V4gZV63mtSWcAs7ypvgux_Xg,79
22
+ fraudcrawler-0.5.9.dist-info/RECORD,,