PyPI - fraudcrawler - Versions diffs - 0.5.8__tar.gz → 0.5.9__tar.gz - Mend

fraudcrawler 0.5.8tar.gz → 0.5.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of fraudcrawler might be problematic. Click here for more details.

Files changed (21) hide show

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: fraudcrawler
-Version: 0.5.8
+Version: 0.5.9
 Summary: Intelligent Market Monitoring
 Home-page: https://github.com/open-veanu/fraudcrawler
 License: MIT

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/base/base.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import json
 import logging
+import base64
 from pydantic import (
     BaseModel,
     Field,
@@ -9,7 +10,7 @@ from pydantic import (
 from pydantic_settings import BaseSettings
 from urllib.parse import urlparse
 import re
-from typing import Any, Dict, List, TYPE_CHECKING
+from typing import Any, Dict, List
 import httpx
@@ -23,9 +24,6 @@ from fraudcrawler.settings import (
     DEFAULT_HTTPX_REDIRECTS,
 )
-if TYPE_CHECKING:
-    from fraudcrawler.scraping.zyte import ZyteAPI
 logger = logging.getLogger(__name__)
 # Load google locations and languages
@@ -245,7 +243,7 @@ class DomainUtils:
             hostname = hostname[4:]
         return hostname.lower()
-    async def _unblock_url(self, url: str, zyte_api: "ZyteAPI") -> bytes | None:
+    async def _unblock_url(self, url: str, zyte_api: Any) -> bytes | None:
         """Attempts to unblock a URL using Zyte proxy mode when direct access fails.
         This method is specifically designed to handle 403 Forbidden errors for domains
@@ -263,9 +261,6 @@ class DomainUtils:
             details = await zyte_api.details(url)
             if details and "httpResponseBody" in details:
-                # Decode the base64 content
-                import base64
                 html_content = base64.b64decode(details["httpResponseBody"])
                 logger.info(f"Successfully unblocked URL using Zyte proxy: {url}")
                 return html_content

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/launch_demo_pipeline.py RENAMED Viewed

@@ -97,4 +97,4 @@ def search(search_term: str):
 if __name__ == "__main__":
-    search(search_term='Liebherr "TP1410"')
+    search(search_term="electric cigarettes")

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/scraping/search.py RENAMED Viewed

@@ -131,6 +131,17 @@ class SerpAPI(SearchEngine):
             search_string += " site:" + " OR site:".join(s for s in sites)
         return search_string
+    @staticmethod
+    def _get_google_domain(location: Location) -> str:
+        """Gets the Google domain for the given location if they do not use the default pattern google.tld"""
+        if location.name == "Brazil":
+            return "google.com.br"
+        elif location.name == "United Kingdom":
+            return "google.co.uk"
+        elif location.name == "Argentina":
+            return "google.com.ar"
+        return f"google.{location.code}"
     async def _search(
         self,
         search_string: str,
@@ -169,16 +180,19 @@ class SerpAPI(SearchEngine):
             f"num_results={num_results}."
         )
-        # Setup the parameters
+        # Get Google domain and country code
+        google_domain = self._get_google_domain(location)
+        country_code = location.code
         params: Dict[str, str | int] = {
             "engine": engine,
             "q": search_string,
-            "google_domain": f"google.{location.code}",
+            "google_domain": google_domain,
             "location_requested": location.name,
             "location_used": location.name,
-            "tbs": f"ctr:{location.code.upper()}",
-            "cr": f"country{location.code.upper()}",
-            "gl": location.code,
+            "tbs": f"ctr:{country_code.upper()}",
+            "cr": f"country{country_code.upper()}",
+            "gl": country_code,
             "hl": language.code,
             "num": num_results,
             "api_key": self._api_key,

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "fraudcrawler"
-version = "0.5.8"
+version = "0.5.9"
 description = "Intelligent Market Monitoring"
 authors = [
     "Domingo Bertus <hello@veanu.ch>",

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/LICENSE RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/README.md RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/base/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/base/client.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/base/google-languages.json RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/base/google-locations.json RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/base/orchestrator.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/base/retry.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/processing/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/processing/processor.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/scraping/__init__.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/scraping/enrich.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/scraping/url.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/scraping/zyte.py RENAMED Viewed

File without changes

{fraudcrawler-0.5.8 → fraudcrawler-0.5.9}/fraudcrawler/settings.py RENAMED Viewed

File without changes

fraudcrawler 0.5.8__tar.gz → 0.5.9__tar.gz

Potentially problematic release.

fraudcrawler 0.5.8tar.gz → 0.5.9tar.gz